diff options
Diffstat (limited to 'src/mesa/drivers')
197 files changed, 19089 insertions, 20448 deletions
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 6a98c29a3d..3b397fef7d 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -202,16 +202,11 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->GetIntegerv = NULL; driver->GetPointerv = NULL; -#if FEATURE_ARB_vertex_buffer_object - driver->NewBufferObject = _mesa_new_buffer_object; - driver->DeleteBuffer = _mesa_delete_buffer_object; - driver->BindBuffer = NULL; - driver->BufferData = _mesa_buffer_data; - driver->BufferSubData = _mesa_buffer_subdata; - driver->GetBufferSubData = _mesa_buffer_get_subdata; - driver->MapBuffer = _mesa_buffer_map; - driver->UnmapBuffer = _mesa_buffer_unmap; -#endif + /* buffer objects */ + _mesa_init_buffer_object_functions(driver); + + /* query objects */ + _mesa_init_query_object_functions(driver); #if FEATURE_EXT_framebuffer_object driver->NewFramebuffer = _mesa_new_framebuffer; @@ -225,14 +220,6 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->BlitFramebuffer = _swrast_BlitFramebuffer; #endif - /* query objects */ - driver->NewQueryObject = _mesa_new_query_object; - driver->DeleteQuery = _mesa_delete_query; - driver->BeginQuery = _mesa_begin_query; - driver->EndQuery = _mesa_end_query; - driver->WaitQuery = _mesa_wait_query; - driver->CheckQuery = _mesa_check_query; - /* APPLE_vertex_array_object */ driver->NewArrayObject = _mesa_new_array_object; driver->DeleteArrayObject = _mesa_delete_array_object; diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template index 5c01d233c1..bd38e3be47 100644 --- a/src/mesa/drivers/dri/Makefile.template +++ b/src/mesa/drivers/dri/Makefile.template @@ -92,7 +92,7 @@ clean: install: $(LIBNAME) $(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) - $(INSTALL) -m 755 $(LIBNAME) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) + $(MINSTALL) -m 755 $(LIBNAME) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) -include depend diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index 576494940f..2b1493a6de 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -37,6 +37,9 @@ typedef GLboolean ( * PFNGLXGETMSCRATEOMLPROC) (__DRIdrawable *drawable, int32_t *numerator, int32_t *denominator); #endif +static void dri_get_drawable(__DRIdrawable *pdp); +static void dri_put_drawable(__DRIdrawable *pdp); + /** * This is just a token extension used to signal that the driver * supports setting a read drawable. @@ -130,7 +133,7 @@ static int driUnbindContext(__DRIcontext *pcp) return GL_FALSE; } - pdp->refcount--; + dri_put_drawable(pdp); if (prp != pdp) { if (prp->refcount == 0) { @@ -138,7 +141,7 @@ static int driUnbindContext(__DRIcontext *pcp) return GL_FALSE; } - prp->refcount--; + dri_put_drawable(prp); } @@ -174,10 +177,10 @@ static int driBindContext(__DRIcontext *pcp, pcp->driReadablePriv = prp; if (pdp) { pdp->driContextPriv = pcp; - pdp->refcount++; + dri_get_drawable(pdp); } if ( prp && pdp != prp ) { - prp->refcount++; + dri_get_drawable(prp); } } @@ -434,7 +437,7 @@ driCreateNewDrawable(__DRIscreen *psp, const __DRIconfig *config, pdp->loaderPrivate = data; pdp->hHWDrawable = hwDrawable; - pdp->refcount = 0; + pdp->refcount = 1; pdp->pStamp = NULL; pdp->lastStamp = 0; pdp->index = 0; @@ -487,12 +490,19 @@ dri2CreateNewDrawable(__DRIscreen *screen, return pdraw; } - -static void -driDestroyDrawable(__DRIdrawable *pdp) +static void dri_get_drawable(__DRIdrawable *pdp) +{ + pdp->refcount++; +} + +static void dri_put_drawable(__DRIdrawable *pdp) { __DRIscreenPrivate *psp; + pdp->refcount--; + if (pdp->refcount) + return; + if (pdp) { psp = pdp->driScreenPriv; (*psp->DriverAPI.DestroyBuffer)(pdp); @@ -508,6 +518,12 @@ driDestroyDrawable(__DRIdrawable *pdp) } } +static void +driDestroyDrawable(__DRIdrawable *pdp) +{ + dri_put_drawable(pdp); +} + /*@}*/ diff --git a/src/mesa/drivers/dri/common/extension_helper.h b/src/mesa/drivers/dri/common/extension_helper.h index 8dcaaee307..e308fd2831 100644 --- a/src/mesa/drivers/dri/common/extension_helper.h +++ b/src/mesa/drivers/dri/common/extension_helper.h @@ -406,9 +406,10 @@ static const char UniformMatrix4fvARB_names[] = ""; #endif -#if defined(need_GL_APPLE_vertex_array_object) +#if defined(need_GL_ARB_vertex_array_object) || defined(need_GL_APPLE_vertex_array_object) static const char DeleteVertexArraysAPPLE_names[] = "ip\0" /* Parameter signature */ + "glDeleteVertexArrays\0" "glDeleteVertexArraysAPPLE\0" ""; #endif @@ -941,6 +942,13 @@ static const char WeightivARB_names[] = ""; #endif +#if defined(need_GL_SGIX_instruments) +static const char PollInstrumentsSGIX_names[] = + "p\0" /* Parameter signature */ + "glPollInstrumentsSGIX\0" + ""; +#endif + #if defined(need_GL_SUN_global_alpha) static const char GlobalAlphaFactordSUN_names[] = "d\0" /* Parameter signature */ @@ -1763,6 +1771,13 @@ static const char DeleteFencesNV_names[] = ""; #endif +#if defined(need_GL_SGIX_polynomial_ffd) +static const char DeformationMap3dSGIX_names[] = + "iddiiddiiddiip\0" /* Parameter signature */ + "glDeformationMap3dSGIX\0" + ""; +#endif + #if defined(need_GL_VERSION_2_0) static const char IsShader_names[] = "i\0" /* Parameter signature */ @@ -2107,6 +2122,13 @@ static const char Tangent3fvEXT_names[] = ""; #endif +#if defined(need_GL_ARB_vertex_array_object) +static const char GenVertexArrays_names[] = + "ip\0" /* Parameter signature */ + "glGenVertexArrays\0" + ""; +#endif + #if defined(need_GL_ARB_framebuffer_object) || defined(need_GL_EXT_framebuffer_object) static const char BindFramebufferEXT_names[] = "ii\0" /* Parameter signature */ @@ -2333,10 +2355,10 @@ static const char GetCombinerStageParameterfvNV_names[] = ""; #endif -#if defined(need_GL_EXT_coordinate_frame) -static const char Binormal3fEXT_names[] = - "fff\0" /* Parameter signature */ - "glBinormal3fEXT\0" +#if defined(need_GL_ARB_vertex_array_object) +static const char BindVertexArray_names[] = + "i\0" /* Parameter signature */ + "glBindVertexArray\0" ""; #endif @@ -2766,10 +2788,10 @@ static const char Uniform4fARB_names[] = ""; #endif -#if defined(need_GL_IBM_multimode_draw_arrays) -static const char MultiModeDrawArraysIBM_names[] = - "pppii\0" /* Parameter signature */ - "glMultiModeDrawArraysIBM\0" +#if defined(need_GL_ARB_map_buffer_range) +static const char FlushMappedBufferRange_names[] = + "iii\0" /* Parameter signature */ + "glFlushMappedBufferRange\0" ""; #endif @@ -3396,10 +3418,11 @@ static const char GetProgramParameterdvNV_names[] = ""; #endif -#if defined(need_GL_SGIX_instruments) -static const char PollInstrumentsSGIX_names[] = - "p\0" /* Parameter signature */ - "glPollInstrumentsSGIX\0" +#if defined(need_GL_ARB_vertex_array_object) || defined(need_GL_APPLE_vertex_array_object) +static const char IsVertexArrayAPPLE_names[] = + "i\0" /* Parameter signature */ + "glIsVertexArray\0" + "glIsVertexArrayAPPLE\0" ""; #endif @@ -3897,6 +3920,13 @@ static const char VertexAttribs4dvNV_names[] = ""; #endif +#if defined(need_GL_IBM_multimode_draw_arrays) +static const char MultiModeDrawArraysIBM_names[] = + "pppii\0" /* Parameter signature */ + "glMultiModeDrawArraysIBM\0" + ""; +#endif + #if defined(need_GL_VERSION_2_0) || defined(need_GL_ARB_vertex_program) static const char VertexAttrib4dARB_names[] = "idddd\0" /* Parameter signature */ @@ -3926,6 +3956,13 @@ static const char VertexWeightfEXT_names[] = ""; #endif +#if defined(need_GL_EXT_coordinate_frame) +static const char Binormal3fEXT_names[] = + "fff\0" /* Parameter signature */ + "glBinormal3fEXT\0" + ""; +#endif + #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_fog_coord) static const char FogCoordfvEXT_names[] = "p\0" /* Parameter signature */ @@ -4041,10 +4078,10 @@ static const char BlendFuncSeparateEXT_names[] = ""; #endif -#if defined(need_GL_APPLE_vertex_array_object) -static const char IsVertexArrayAPPLE_names[] = - "i\0" /* Parameter signature */ - "glIsVertexArrayAPPLE\0" +#if defined(need_GL_ARB_map_buffer_range) +static const char MapBufferRange_names[] = + "iiii\0" /* Parameter signature */ + "glMapBufferRange\0" ""; #endif @@ -4289,6 +4326,13 @@ static const char SpriteParameterivSGIX_names[] = ""; #endif +#if defined(need_GL_EXT_provoking_vertex) +static const char ProvokingVertexEXT_names[] = + "i\0" /* Parameter signature */ + "glProvokingVertexEXT\0" + ""; +#endif + #if defined(need_GL_VERSION_1_3) static const char MultiTexCoord1fARB_names[] = "if\0" /* Parameter signature */ @@ -4396,6 +4440,13 @@ static const char WindowPos3ivMESA_names[] = ""; #endif +#if defined(need_GL_ARB_copy_buffer) +static const char CopyBufferSubData_names[] = + "iiiii\0" /* Parameter signature */ + "glCopyBufferSubData\0" + ""; +#endif + #if defined(need_GL_VERSION_1_5) || defined(need_GL_ARB_vertex_buffer_object) static const char IsBufferARB_names[] = "i\0" /* Parameter signature */ @@ -4560,13 +4611,6 @@ static const char Minmax_names[] = ""; #endif -#if defined(need_GL_SGIX_polynomial_ffd) -static const char DeformationMap3dSGIX_names[] = - "iddiiddiiddiip\0" /* Parameter signature */ - "glDeformationMap3dSGIX\0" - ""; -#endif - #if defined(need_GL_VERSION_1_4) || defined(need_GL_EXT_fog_coord) static const char FogCoorddvEXT_names[] = "p\0" /* Parameter signature */ @@ -4933,8 +4977,15 @@ static const struct dri_extension_function GL_3DFX_tbuffer_functions[] = { static const struct dri_extension_function GL_APPLE_vertex_array_object_functions[] = { { DeleteVertexArraysAPPLE_names, DeleteVertexArraysAPPLE_remap_index, -1 }, { GenVertexArraysAPPLE_names, GenVertexArraysAPPLE_remap_index, -1 }, - { BindVertexArrayAPPLE_names, BindVertexArrayAPPLE_remap_index, -1 }, { IsVertexArrayAPPLE_names, IsVertexArrayAPPLE_remap_index, -1 }, + { BindVertexArrayAPPLE_names, BindVertexArrayAPPLE_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + +#if defined(need_GL_ARB_copy_buffer) +static const struct dri_extension_function GL_ARB_copy_buffer_functions[] = { + { CopyBufferSubData_names, CopyBufferSubData_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -4972,6 +5023,14 @@ static const struct dri_extension_function GL_ARB_framebuffer_object_functions[] }; #endif +#if defined(need_GL_ARB_map_buffer_range) +static const struct dri_extension_function GL_ARB_map_buffer_range_functions[] = { + { FlushMappedBufferRange_names, FlushMappedBufferRange_remap_index, -1 }, + { MapBufferRange_names, MapBufferRange_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + #if defined(need_GL_ARB_matrix_palette) static const struct dri_extension_function GL_ARB_matrix_palette_functions[] = { { MatrixIndexusvARB_names, MatrixIndexusvARB_remap_index, -1 }, @@ -5080,6 +5139,16 @@ static const struct dri_extension_function GL_ARB_transpose_matrix_functions[] = }; #endif +#if defined(need_GL_ARB_vertex_array_object) +static const struct dri_extension_function GL_ARB_vertex_array_object_functions[] = { + { DeleteVertexArraysAPPLE_names, DeleteVertexArraysAPPLE_remap_index, -1 }, + { GenVertexArrays_names, GenVertexArrays_remap_index, -1 }, + { BindVertexArray_names, BindVertexArray_remap_index, -1 }, + { IsVertexArrayAPPLE_names, IsVertexArrayAPPLE_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + #if defined(need_GL_ARB_vertex_blend) static const struct dri_extension_function GL_ARB_vertex_blend_functions[] = { { WeightubvARB_names, WeightubvARB_remap_index, -1 }, @@ -5333,7 +5402,6 @@ static const struct dri_extension_function GL_EXT_coordinate_frame_functions[] = { Binormal3ivEXT_names, Binormal3ivEXT_remap_index, -1 }, { Tangent3sEXT_names, Tangent3sEXT_remap_index, -1 }, { Tangent3fvEXT_names, Tangent3fvEXT_remap_index, -1 }, - { Binormal3fEXT_names, Binormal3fEXT_remap_index, -1 }, { Tangent3dvEXT_names, Tangent3dvEXT_remap_index, -1 }, { Binormal3bvEXT_names, Binormal3bvEXT_remap_index, -1 }, { Binormal3dEXT_names, Binormal3dEXT_remap_index, -1 }, @@ -5342,6 +5410,7 @@ static const struct dri_extension_function GL_EXT_coordinate_frame_functions[] = { Tangent3ivEXT_names, Tangent3ivEXT_remap_index, -1 }, { Tangent3dEXT_names, Tangent3dEXT_remap_index, -1 }, { Binormal3svEXT_names, Binormal3svEXT_remap_index, -1 }, + { Binormal3fEXT_names, Binormal3fEXT_remap_index, -1 }, { Binormal3dvEXT_names, Binormal3dvEXT_remap_index, -1 }, { Tangent3iEXT_names, Tangent3iEXT_remap_index, -1 }, { Tangent3bvEXT_names, Tangent3bvEXT_remap_index, -1 }, @@ -5527,6 +5596,13 @@ static const struct dri_extension_function GL_EXT_polygon_offset_functions[] = { }; #endif +#if defined(need_GL_EXT_provoking_vertex) +static const struct dri_extension_function GL_EXT_provoking_vertex_functions[] = { + { ProvokingVertexEXT_names, ProvokingVertexEXT_remap_index, -1 }, + { NULL, 0, 0 } +}; +#endif + #if defined(need_GL_EXT_secondary_color) static const struct dri_extension_function GL_EXT_secondary_color_functions[] = { { SecondaryColor3iEXT_names, SecondaryColor3iEXT_remap_index, -1 }, @@ -6024,11 +6100,11 @@ static const struct dri_extension_function GL_SGIX_igloo_interface_functions[] = #if defined(need_GL_SGIX_instruments) static const struct dri_extension_function GL_SGIX_instruments_functions[] = { { ReadInstrumentsSGIX_names, ReadInstrumentsSGIX_remap_index, -1 }, + { PollInstrumentsSGIX_names, PollInstrumentsSGIX_remap_index, -1 }, { GetInstrumentsSGIX_names, GetInstrumentsSGIX_remap_index, -1 }, { StartInstrumentsSGIX_names, StartInstrumentsSGIX_remap_index, -1 }, { StopInstrumentsSGIX_names, StopInstrumentsSGIX_remap_index, -1 }, { InstrumentsBufferSGIX_names, InstrumentsBufferSGIX_remap_index, -1 }, - { PollInstrumentsSGIX_names, PollInstrumentsSGIX_remap_index, -1 }, { NULL, 0, 0 } }; #endif @@ -6055,9 +6131,9 @@ static const struct dri_extension_function GL_SGIX_pixel_texture_functions[] = { #if defined(need_GL_SGIX_polynomial_ffd) static const struct dri_extension_function GL_SGIX_polynomial_ffd_functions[] = { { LoadIdentityDeformationMapSGIX_names, LoadIdentityDeformationMapSGIX_remap_index, -1 }, + { DeformationMap3dSGIX_names, DeformationMap3dSGIX_remap_index, -1 }, { DeformSGIX_names, DeformSGIX_remap_index, -1 }, { DeformationMap3fSGIX_names, DeformationMap3fSGIX_remap_index, -1 }, - { DeformationMap3dSGIX_names, DeformationMap3dSGIX_remap_index, -1 }, { NULL, 0, 0 } }; #endif diff --git a/src/mesa/drivers/dri/common/xmlpool.h b/src/mesa/drivers/dri/common/xmlpool.h index 7fbc6e800d..587517ea10 100644 --- a/src/mesa/drivers/dri/common/xmlpool.h +++ b/src/mesa/drivers/dri/common/xmlpool.h @@ -60,6 +60,10 @@ #define DRI_CONF_OPT_BEGIN(name,type,def) \ "<option name=\""#name"\" type=\""#type"\" default=\""#def"\">\n" +/** \brief Begin an option definition with qouted default value */ +#define DRI_CONF_OPT_BEGIN_Q(name,type,def) \ +"<option name=\""#name"\" type=\""#type"\" default="#def">\n" + /** \brief Begin an option definition with restrictions on valid values */ #define DRI_CONF_OPT_BEGIN_V(name,type,def,valid) \ "<option name=\""#name"\" type=\""#type"\" default=\""#def"\" valid=\""valid"\">\n" diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 9f4bd1699f..beaf9a4b12 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -19,6 +19,7 @@ DRIVER_SOURCES = \ intel_batchbuffer.c \ intel_clear.c \ intel_extensions.c \ + intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_tex_layout.c \ intel_tex_image.c \ diff --git a/src/mesa/drivers/dri/i915/i830_reg.h b/src/mesa/drivers/dri/i915/i830_reg.h index d210c2d08e..db16871001 100644 --- a/src/mesa/drivers/dri/i915/i830_reg.h +++ b/src/mesa/drivers/dri/i915/i830_reg.h @@ -48,19 +48,6 @@ #define AA_LINE_ENABLE ((1<<1) | 1) #define AA_LINE_DISABLE (1<<1) -#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) -/* Dword 1 */ -#define BUF_3D_ID_COLOR_BACK (0x3<<24) -#define BUF_3D_ID_DEPTH (0x7<<24) -#define BUF_3D_USE_FENCE (1<<23) -#define BUF_3D_TILED_SURFACE (1<<22) -#define BUF_3D_TILE_WALK_X 0 -#define BUF_3D_TILE_WALK_Y (1<<21) -#define BUF_3D_PITCH(x) (((x)/4)<<2) -/* Dword 2 */ -#define BUF_3D_ADDR(x) ((x) & ~0x3) - - #define _3DSTATE_COLOR_FACTOR_CMD (CMD_3D | (0x1d<<24) | (0x1<<16)) #define _3DSTATE_COLOR_FACTOR_N_CMD(stage) (CMD_3D | (0x1d<<24) | \ diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index 753c25b57e..6f998fa6f7 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -174,14 +174,16 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I830_TEXREG_TM0LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_2 | (LOAD_TEXTURE_MAP0 << unit) | 4); -/* state[I830_TEXREG_TM0S0] = (TM0S0_USE_FENCE | */ -/* t->intel.TextureOffset); */ - - state[I830_TEXREG_TM0S1] = (((firstImage->Height - 1) << TM0S1_HEIGHT_SHIFT) | ((firstImage->Width - 1) << TM0S1_WIDTH_SHIFT) | format); + if (intelObj->mt->region->tiling != I915_TILING_NONE) { + state[I830_TEXREG_TM0S1] |= TM0S1_TILED_SURFACE; + if (intelObj->mt->region->tiling == I915_TILING_Y) + state[I830_TEXREG_TM0S1] |= TM0S1_TILE_WALK; + } + state[I830_TEXREG_TM0S2] = ((((pitch / 4) - 1) << TM0S2_PITCH_SHIFT) | TM0S2_CUBE_FACE_ENA_MASK); diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index 3bf02de61f..cbee9f9efe 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -552,7 +552,7 @@ i830_emit_state(struct intel_context *intel) if (state->tex_buffer[i]) { OUT_RELOC(state->tex_buffer[i], I915_GEM_DOMAIN_SAMPLER, 0, - state->tex_offset[i] | TM0S0_USE_FENCE); + state->tex_offset[i]); } else if (state == &i830->meta) { assert(i == 0); @@ -634,21 +634,11 @@ i830_state_draw_region(struct intel_context *intel, /* * Set stride/cpp values */ - if (color_region) { - state->Buffer[I830_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - state->Buffer[I830_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(color_region->pitch * color_region->cpp) | - BUF_3D_USE_FENCE); - } + i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_CBUFADDR0], + color_region, BUF_3D_ID_COLOR_BACK); - if (depth_region) { - state->Buffer[I830_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - state->Buffer[I830_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | - BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) | - BUF_3D_USE_FENCE); - } + i915_set_buf_info_for_region(&state->Buffer[I830_DESTREG_DBUFADDR0], + depth_region, BUF_3D_ID_DEPTH); /* * Compute/set I830_DESTREG_DV1 value @@ -718,26 +708,6 @@ i830_set_draw_region(struct intel_context *intel, i830_state_draw_region(intel, &i830->state, color_regions[0], depth_region); } -#if 0 -static void -i830_update_color_z_regions(intelContextPtr intel, - const intelRegion * colorRegion, - const intelRegion * depthRegion) -{ - i830ContextPtr i830 = I830_CONTEXT(intel); - - i830->state.Buffer[I830_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(colorRegion->pitch) | - BUF_3D_USE_FENCE); - i830->state.Buffer[I830_DESTREG_CBUFADDR2] = colorRegion->offset; - - i830->state.Buffer[I830_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | BUF_3D_PITCH(depthRegion->pitch) | BUF_3D_USE_FENCE); - i830->state.Buffer[I830_DESTREG_DBUFADDR2] = depthRegion->offset; -} -#endif - - /* This isn't really handled at the moment. */ static void diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index 1f9f363df9..367d2a3b64 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -73,7 +73,7 @@ i915InvalidateState(GLcontext * ctx, GLuint new_state) p->params_uptodate = 0; } - if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM)) + if (new_state & (_NEW_FOG | _NEW_HINT | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) i915_update_fog(ctx); } diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h index 8891e11c6f..84db58ea95 100644 --- a/src/mesa/drivers/dri/i915/i915_reg.h +++ b/src/mesa/drivers/dri/i915/i915_reg.h @@ -93,20 +93,6 @@ /* 3DSTATE_BIN_CONTROL p141 */ -/* p143 */ -#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) -/* Dword 1 */ -#define BUF_3D_ID_COLOR_BACK (0x3<<24) -#define BUF_3D_ID_DEPTH (0x7<<24) -#define BUF_3D_USE_FENCE (1<<23) -#define BUF_3D_TILED_SURFACE (1<<22) -#define BUF_3D_TILE_WALK_X 0 -#define BUF_3D_TILE_WALK_Y (1<<21) -#define BUF_3D_PITCH(x) (((x)/4)<<2) -/* Dword 2 */ -#define BUF_3D_ADDR(x) ((x) & ~0x3) - - /* 3DSTATE_CHROMA_KEY */ /* 3DSTATE_CLEAR_PARAMETERS, p150 */ @@ -155,6 +141,7 @@ /* p161 */ #define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) /* Dword 1 */ +#define CLASSIC_EARLY_DEPTH (1<<31) #define TEX_DEFAULT_COLOR_OGL (0<<30) #define TEX_DEFAULT_COLOR_D3D (1<<30) #define ZR_EARLY_DEPTH (1<<29) diff --git a/src/mesa/drivers/dri/i915/i915_tex_layout.c b/src/mesa/drivers/dri/i915/i915_tex_layout.c index 7cc1c096e4..d9588e5b56 100644 --- a/src/mesa/drivers/dri/i915/i915_tex_layout.c +++ b/src/mesa/drivers/dri/i915/i915_tex_layout.c @@ -55,6 +55,17 @@ static GLint step_offsets[6][2] = { [FACE_NEG_Z] = {-1, 1}, }; + +static GLint bottom_offsets[6] = { + [FACE_POS_X] = 16 + 0 * 8, + [FACE_POS_Y] = 16 + 1 * 8, + [FACE_POS_Z] = 16 + 2 * 8, + [FACE_NEG_X] = 16 + 3 * 8, + [FACE_NEG_Y] = 16 + 4 * 8, + [FACE_NEG_Z] = 16 + 5 * 8, +}; + + /** * Cube texture map layout for i830M-GM915. * @@ -101,7 +112,8 @@ static GLint step_offsets[6][2] = { */ static void i915_miptree_layout_cube(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { const GLuint dim = mt->width0; GLuint face; @@ -111,7 +123,7 @@ i915_miptree_layout_cube(struct intel_context *intel, assert(lvlWidth == lvlHeight); /* cubemap images are square */ /* double pitch for cube layouts */ - mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, dim * 2); mt->total_height = dim * 4; for (level = mt->first_level; level <= mt->last_level; level++) { @@ -145,7 +157,8 @@ i915_miptree_layout_cube(struct intel_context *intel, static void i915_miptree_layout_3d(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { GLuint width = mt->width0; GLuint height = mt->height0; @@ -154,7 +167,7 @@ i915_miptree_layout_3d(struct intel_context *intel, GLint level; /* Calculate the size of a single slice. */ - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); /* XXX: hardware expects/requires 9 levels at minimum. */ for (level = mt->first_level; level <= MAX2(8, mt->last_level); level++) { @@ -189,14 +202,15 @@ i915_miptree_layout_3d(struct intel_context *intel, static void i915_miptree_layout_2d(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { GLuint width = mt->width0; GLuint height = mt->height0; GLuint img_height; GLint level; - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); mt->total_height = 0; for (level = mt->first_level; level <= mt->last_level; level++) { @@ -217,19 +231,20 @@ i915_miptree_layout_2d(struct intel_context *intel, } GLboolean -i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) +i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt, + uint32_t tiling) { switch (mt->target) { case GL_TEXTURE_CUBE_MAP: - i915_miptree_layout_cube(intel, mt); + i915_miptree_layout_cube(intel, mt, tiling); break; case GL_TEXTURE_3D: - i915_miptree_layout_3d(intel, mt); + i915_miptree_layout_3d(intel, mt, tiling); break; case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_RECTANGLE_ARB: - i915_miptree_layout_2d(intel, mt); + i915_miptree_layout_2d(intel, mt, tiling); break; default: _mesa_problem(NULL, "Unexpected tex target in i915_miptree_layout()"); @@ -297,7 +312,7 @@ i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) * +---+ +---+ +---+ +---+ +---+ +---+ * * The bottom row continues with the remaining 2x2 then the 1x1 mip contents - * in order, with each of them aligned to a 4x4 block boundary. Thus, for + * in order, with each of them aligned to a 8x8 block boundary. Thus, for * 32x32 cube maps and smaller, the bottom row layout is going to dictate the * pitch of the tree. For a tree with 4x4 images, the pitch is at least * 14 * 8 = 112 texels, for 2x2 it is at least 12 * 8 texels, and for 1x1 @@ -306,7 +321,8 @@ i915_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) static void i945_miptree_layout_cube(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { const GLuint dim = mt->width0; GLuint face; @@ -320,9 +336,9 @@ i945_miptree_layout_cube(struct intel_context *intel, * or the final row of 4x4, 2x2 and 1x1 faces below this. */ if (dim > 32) - mt->pitch = intel_miptree_pitch_align (intel, mt, dim * 2); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, dim * 2); else - mt->pitch = intel_miptree_pitch_align (intel, mt, 14 * 8); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, 14 * 8); if (dim >= 4) mt->total_height = dim * 4 + 4; @@ -375,10 +391,11 @@ i945_miptree_layout_cube(struct intel_context *intel, x = (face - 4) * 8; break; } + break; case 2: y = mt->total_height - 4; - x = 16 + face * 8; + x = bottom_offsets[face]; break; case 1: @@ -396,7 +413,8 @@ i945_miptree_layout_cube(struct intel_context *intel, static void i945_miptree_layout_3d(struct intel_context *intel, - struct intel_mipmap_tree * mt) + struct intel_mipmap_tree * mt, + uint32_t tiling) { GLuint width = mt->width0; GLuint height = mt->height0; @@ -405,7 +423,7 @@ i945_miptree_layout_3d(struct intel_context *intel, GLuint pack_y_pitch; GLuint level; - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); mt->total_height = 0; pack_y_pitch = MAX2(mt->height0, 2); @@ -450,22 +468,23 @@ i945_miptree_layout_3d(struct intel_context *intel, } GLboolean -i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt) +i945_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree * mt, + uint32_t tiling) { switch (mt->target) { case GL_TEXTURE_CUBE_MAP: if (mt->compressed) - i945_miptree_layout_cube(intel, mt); + i945_miptree_layout_cube(intel, mt, tiling); else - i915_miptree_layout_cube(intel, mt); + i915_miptree_layout_cube(intel, mt, tiling); break; case GL_TEXTURE_3D: - i945_miptree_layout_3d(intel, mt); + i945_miptree_layout_3d(intel, mt, tiling); break; case GL_TEXTURE_1D: case GL_TEXTURE_2D: case GL_TEXTURE_RECTANGLE_ARB: - i945_miptree_layout_2d(intel, mt); + i945_miptree_layout_2d(intel, mt, tiling); break; default: _mesa_problem(NULL, "Unexpected tex target in i945_miptree_layout()"); diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index a37dd7f4fb..32d4b30cf9 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -185,8 +185,13 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I915_TEXREG_MS3] = (((firstImage->Height - 1) << MS3_HEIGHT_SHIFT) | - ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format | - MS3_USE_FENCE_REGS); + ((firstImage->Width - 1) << MS3_WIDTH_SHIFT) | format); + + if (intelObj->mt->region->tiling != I915_TILING_NONE) { + state[I915_TEXREG_MS3] |= MS3_TILED_SURFACE; + if (intelObj->mt->region->tiling == I915_TILING_Y) + state[I915_TEXREG_MS3] |= MS3_TILE_WALK; + } state[I915_TEXREG_MS4] = ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) | MS4_CUBE_FACE_ENA_MASK | diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 115004616f..2fca247af1 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -42,6 +42,7 @@ #include "intel_regions.h" #include "intel_tris.h" #include "intel_fbo.h" +#include "intel_chipset.h" #include "i915_reg.h" #include "i915_context.h" @@ -529,6 +530,23 @@ i915_destroy_context(struct intel_context *intel) _tnl_free_vertices(&intel->ctx); } +void +i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region, + uint32_t buffer_id) +{ + state[0] = _3DSTATE_BUF_INFO_CMD; + state[1] = buffer_id; + + if (region != NULL) { + state[1] |= BUF_3D_PITCH(region->pitch * region->cpp); + + if (region->tiling != I915_TILING_NONE) { + state[1] |= BUF_3D_TILED_SURFACE; + if (region->tiling == I915_TILING_Y) + state[1] |= BUF_3D_TILE_WALK_Y; + } + } +} /** * Set the drawing regions for the color and depth/stencil buffers. @@ -562,21 +580,11 @@ i915_state_draw_region(struct intel_context *intel, /* * Set stride/cpp values */ - if (color_region) { - state->Buffer[I915_DESTREG_CBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - state->Buffer[I915_DESTREG_CBUFADDR1] = - (BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(color_region->pitch * color_region->cpp) | - BUF_3D_USE_FENCE); - } + i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_CBUFADDR0], + color_region, BUF_3D_ID_COLOR_BACK); - if (depth_region) { - state->Buffer[I915_DESTREG_DBUFADDR0] = _3DSTATE_BUF_INFO_CMD; - state->Buffer[I915_DESTREG_DBUFADDR1] = - (BUF_3D_ID_DEPTH | - BUF_3D_PITCH(depth_region->pitch * depth_region->cpp) | - BUF_3D_USE_FENCE); - } + i915_set_buf_info_for_region(&state->Buffer[I915_DESTREG_DBUFADDR0], + depth_region, BUF_3D_ID_DEPTH); /* * Compute/set I915_DESTREG_DV1 value @@ -604,6 +612,14 @@ i915_state_draw_region(struct intel_context *intel, } } + /* This isn't quite safe, thus being hidden behind an option. When changing + * the value of this bit, the pipeline needs to be MI_FLUSHed. And it + * can only be set when a depth buffer is already defined. + */ + if (IS_945(intel->intelScreen->deviceID) && intel->use_early_z && + depth_region->tiling != I915_TILING_NONE) + value |= CLASSIC_EARLY_DEPTH; + if (depth_region && depth_region->cpp == 4) { value |= DEPTH_FRMT_24_FIXED_8_OTHER; } diff --git a/src/mesa/drivers/dri/i915/intel_generatemipmap.c b/src/mesa/drivers/dri/i915/intel_generatemipmap.c new file mode 120000 index 0000000000..4c6b37ada0 --- /dev/null +++ b/src/mesa/drivers/dri/i915/intel_generatemipmap.c @@ -0,0 +1 @@ +../intel/intel_generatemipmap.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i915/intel_pixel_read.c b/src/mesa/drivers/dri/i915/intel_pixel_read.c index 56087aacd4..cc4589f4d4 100644..120000 --- a/src/mesa/drivers/dri/i915/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i915/intel_pixel_read.c @@ -1,306 +1 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "main/glheader.h" -#include "main/enums.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "main/image.h" -#include "main/bufferobj.h" -#include "swrast/swrast.h" - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_batchbuffer.h" -#include "intel_blit.h" -#include "intel_buffers.h" -#include "intel_regions.h" -#include "intel_pixel.h" -#include "intel_buffer_objects.h" - -/* For many applications, the new ability to pull the source buffers - * back out of the GTT and then do the packing/conversion operations - * in software will be as much of an improvement as trying to get the - * blitter and/or texture engine to do the work. - * - * This step is gated on private backbuffers. - * - * Obviously the frontbuffer can't be pulled back, so that is either - * an argument for blit/texture readpixels, or for blitting to a - * temporary and then pulling that back. - * - * When the destination is a pbo, however, it's not clear if it is - * ever going to be pulled to main memory (though the access param - * will be a good hint). So it sounds like we do want to be able to - * choose between blit/texture implementation on the gpu and pullback - * and cpu-based copying. - * - * Unless you can magically turn client memory into a PBO for the - * duration of this call, there will be a cpu-based copying step in - * any case. - */ - - -static GLboolean -do_texture_readpixels(GLcontext * ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *pack, - struct intel_region *dest_region) -{ -#if 0 - struct intel_context *intel = intel_context(ctx); - intelScreenPrivate *screen = intel->intelScreen; - GLint pitch = pack->RowLength ? pack->RowLength : width; - __DRIdrawablePrivate *dPriv = intel->driDrawable; - int textureFormat; - GLenum glTextureFormat; - int destFormat, depthFormat, destPitch; - drm_clip_rect_t tmp; - - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); - - - if (ctx->_ImageTransferState || - pack->SwapBytes || pack->LsbFirst || !pack->Invert) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: check_color failed\n", __FUNCTION__); - return GL_FALSE; - } - - intel->vtbl.meta_texrect_source(intel, intel_readbuf_region(intel)); - - if (!intel->vtbl.meta_render_dest(intel, dest_region, type, format)) { - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: couldn't set dest %s/%s\n", - __FUNCTION__, - _mesa_lookup_enum_by_nr(type), - _mesa_lookup_enum_by_nr(format)); - return GL_FALSE; - } - - LOCK_HARDWARE(intel); - - if (intel->driDrawable->numClipRects) { - intel->vtbl.install_meta_state(intel); - intel->vtbl.meta_no_depth_write(intel); - intel->vtbl.meta_no_stencil_write(intel); - - if (!driClipRectToFramebuffer(ctx->ReadBuffer, &x, &y, &width, &height)) { - UNLOCK_HARDWARE(intel); - SET_STATE(i830, state); - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s: cliprect failed\n", __FUNCTION__); - return GL_TRUE; - } - - y = dPriv->h - y - height; - x += dPriv->x; - y += dPriv->y; - - - /* Set the frontbuffer up as a large rectangular texture. - */ - intel->vtbl.meta_tex_rect_source(intel, src_region, textureFormat); - - - intel->vtbl.meta_texture_blend_replace(i830, glTextureFormat); - - - /* Set the 3d engine to draw into the destination region: - */ - - intel->vtbl.meta_draw_region(intel, dest_region); - intel->vtbl.meta_draw_format(intel, destFormat, depthFormat); /* ?? */ - - - /* Draw a single quad, no cliprects: - */ - intel->vtbl.meta_disable_cliprects(intel); - - intel->vtbl.draw_quad(intel, - 0, width, 0, height, - 0x00ff00ff, x, x + width, y, y + height); - - intel->vtbl.leave_meta_state(intel); - } - UNLOCK_HARDWARE(intel); - - intel_region_wait_fence(ctx, dest_region); /* required by GL */ - return GL_TRUE; -#endif - - return GL_FALSE; -} - - - - -static GLboolean -do_blit_readpixels(GLcontext * ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *pack, GLvoid * pixels) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_region *src = intel_readbuf_region(intel); - struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj); - GLuint dst_offset; - GLuint rowLength; - - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s\n", __FUNCTION__); - - if (!src) - return GL_FALSE; - - if (dst) { - /* XXX This validation should be done by core mesa: - */ - if (!_mesa_validate_pbo_access(2, pack, width, height, 1, - format, type, pixels)) { - _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels"); - return GL_TRUE; - } - } - else { - /* PBO only for now: - */ - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s - not PBO\n", __FUNCTION__); - return GL_FALSE; - } - - - if (ctx->_ImageTransferState || - !intel_check_blit_format(src, format, type)) { - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s - bad format for blit\n", __FUNCTION__); - return GL_FALSE; - } - - if (pack->Alignment != 1 || pack->SwapBytes || pack->LsbFirst) { - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s: bad packing params\n", __FUNCTION__); - return GL_FALSE; - } - - if (pack->RowLength > 0) - rowLength = pack->RowLength; - else - rowLength = width; - - if (pack->Invert) { - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s: MESA_PACK_INVERT not done yet\n", __FUNCTION__); - return GL_FALSE; - } - else { - rowLength = -rowLength; - } - - /* XXX 64-bit cast? */ - dst_offset = (GLuint) _mesa_image_address(2, pack, pixels, width, height, - format, type, 0, 0, 0); - - - /* Although the blits go on the command buffer, need to do this and - * fire with lock held to guarentee cliprects are correct. - */ - intelFlush(&intel->ctx); - LOCK_HARDWARE(intel); - - if (intel->driDrawable->numClipRects) { - GLboolean all = (width * height * src->cpp == dst->Base.Size && - x == 0 && dst_offset == 0); - - dri_bo *dst_buffer = intel_bufferobj_buffer(intel, dst, - all ? INTEL_WRITE_FULL : - INTEL_WRITE_PART); - __DRIdrawablePrivate *dPriv = intel->driDrawable; - int nbox = dPriv->numClipRects; - drm_clip_rect_t *box = dPriv->pClipRects; - drm_clip_rect_t rect; - drm_clip_rect_t src_rect; - int i; - - src_rect.x1 = dPriv->x + x; - src_rect.y1 = dPriv->y + dPriv->h - (y + height); - src_rect.x2 = src_rect.x1 + width; - src_rect.y2 = src_rect.y1 + height; - - - - for (i = 0; i < nbox; i++) { - if (!intel_intersect_cliprects(&rect, &src_rect, &box[i])) - continue; - - intelEmitCopyBlit(intel, - src->cpp, - src->pitch, src->buffer, 0, src->tiling, - rowLength, dst_buffer, dst_offset, GL_FALSE, - rect.x1, - rect.y1, - rect.x1 - src_rect.x1, - rect.y2 - src_rect.y2, - rect.x2 - rect.x1, rect.y2 - rect.y1, - GL_COPY); - } - } - UNLOCK_HARDWARE(intel); - - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s - DONE\n", __FUNCTION__); - - return GL_TRUE; -} - -void -intelReadPixels(GLcontext * ctx, - GLint x, GLint y, GLsizei width, GLsizei height, - GLenum format, GLenum type, - const struct gl_pixelstore_attrib *pack, GLvoid * pixels) -{ - if (INTEL_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "%s\n", __FUNCTION__); - - intelFlush(ctx); - - if (do_blit_readpixels - (ctx, x, y, width, height, format, type, pack, pixels)) - return; - - if (do_texture_readpixels - (ctx, x, y, width, height, format, type, pack, pixels)) - return; - - if (INTEL_DEBUG & DEBUG_PIXEL) - _mesa_printf("%s: fallback to swrast\n", __FUNCTION__); - - _swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels); -} +../intel/intel_pixel_read.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 2934414d99..00a42111da 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -14,6 +14,7 @@ DRIVER_SOURCES = \ intel_decode.c \ intel_extensions.c \ intel_fbo.c \ + intel_generatemipmap.c \ intel_mipmap_tree.c \ intel_regions.c \ intel_screen.c \ @@ -22,6 +23,7 @@ DRIVER_SOURCES = \ intel_pixel_bitmap.c \ intel_pixel_copy.c \ intel_pixel_draw.c \ + intel_pixel_read.c \ intel_state.c \ intel_swapbuffers.c \ intel_tex.c \ @@ -69,6 +71,7 @@ DRIVER_SOURCES = \ brw_vs_constval.c \ brw_vs_emit.c \ brw_vs_state.c \ + brw_vs_surface_state.c \ brw_vtbl.c \ brw_wm.c \ brw_wm_debug.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 577497bf6b..13fece44b8 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -131,6 +131,7 @@ struct brw_context; #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 #define BRW_NEW_INPUT_VARYING 0x200 #define BRW_NEW_PSP 0x800 +#define BRW_NEW_WM_SURFACES 0x1000 #define BRW_NEW_FENCE 0x2000 #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 @@ -245,6 +246,9 @@ struct brw_vs_ouput_sizes { }; +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 @@ -450,8 +454,6 @@ struct brw_context struct { struct brw_state_flags dirty; - struct brw_tracked_state **atoms; - GLuint nr_atoms; GLuint nr_color_regions; struct intel_region *color_regions[MAX_DRAW_BUFFERS]; @@ -471,7 +473,8 @@ struct brw_context int validated_bo_count; } state; - struct brw_cache cache; + struct brw_cache cache; /** non-surface items */ + struct brw_cache surface_cache; /* surface items */ struct brw_cached_batch_item *cached_batch_items; struct { @@ -555,11 +558,6 @@ struct brw_context GLuint vs_size; GLuint total_size; - /* Dynamic tracker which changes to reflect the state referenced - * by active fp and vp program parameters: - */ - struct brw_tracked_state tracked_state; - dri_bo *curbe_bo; /** Offset within curbe_bo of space for current curbe entry */ GLuint curbe_offset; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 9197fede2d..a1a6c53d0e 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -36,6 +36,7 @@ #include "main/macros.h" #include "main/enums.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" #include "intel_regions.h" @@ -188,13 +189,6 @@ static void prepare_constant_buffer(struct brw_context *brw) GLfloat *buf; GLuint i; - /* Update our own dependency flags. This works because this - * function will also be called whenever fp or vp changes. - */ - brw->curbe.tracked_state.dirty.mesa = (_NEW_TRANSFORM|_NEW_PROJECTION); - brw->curbe.tracked_state.dirty.mesa |= vp->program.Base.Parameters->StateFlags; - brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags; - if (sz == 0) { if (brw->curbe.last_buf) { free(brw->curbe.last_buf); @@ -335,78 +329,11 @@ static void prepare_constant_buffer(struct brw_context *brw) */ } - -/** - * Copy Mesa program parameters into given constant buffer. - */ -static void -update_constant_buffer(struct brw_context *brw, - const struct gl_program_parameter_list *params, - dri_bo *const_buffer) -{ - struct intel_context *intel = &brw->intel; - const int size = params->NumParameters * 4 * sizeof(GLfloat); - - /* copy Mesa program constants into the buffer */ - if (const_buffer && size > 0) { - - assert(const_buffer); - assert(const_buffer->size >= size); - - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(const_buffer); - memcpy(const_buffer->virtual, params->ParameterValues, size); - drm_intel_gem_bo_unmap_gtt(const_buffer); - } - else { - dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); - } - - if (0) { - int i; - for (i = 0; i < params->NumParameters; i++) { - float *p = params->ParameterValues[i]; - printf("%d: %f %f %f %f\n", i, p[0], p[1], p[2], p[3]); - } - } - } -} - - -/** Copy current vertex program's parameters into the constant buffer */ -static void -update_vertex_constant_buffer(struct brw_context *brw) -{ - struct brw_vertex_program *vp = - (struct brw_vertex_program *) brw->vertex_program; - if (0) { - printf("update VS constants in buffer %p\n", vp->const_buffer); - printf("program %u\n", vp->program.Base.Id); - } - if (vp->use_const_buffer) - update_constant_buffer(brw, vp->program.Base.Parameters, vp->const_buffer); -} - - -/** Copy current fragment program's parameters into the constant buffer */ -static void -update_fragment_constant_buffer(struct brw_context *brw) -{ - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - if (fp->use_const_buffer) - update_constant_buffer(brw, fp->program.Base.Parameters, fp->const_buffer); -} - - static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; - update_vertex_constant_buffer(brw); - update_fragment_constant_buffer(brw); - BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); @@ -428,7 +355,7 @@ static void emit_constant_buffer(struct brw_context *brw) */ const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = (_NEW_TRANSFORM|_NEW_PROJECTION), /* plus fp and vp flags */ + .mesa = _NEW_PROGRAM_CONSTANTS, .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index b91b20bec6..3ef56a0068 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -343,7 +343,7 @@ static void brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; GLuint i; const unsigned char *ptr = NULL; GLuint interleave = 0; @@ -362,11 +362,11 @@ static void brw_prepare_vertices(struct brw_context *brw) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); /* Accumulate the list of enabled arrays. */ - while (tmp) { - GLuint i = _mesa_ffsll(tmp)-1; + while (vs_inputs) { + GLuint i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; - tmp &= ~(1<<i); + vs_inputs &= ~(1 << i); enabled[nr_enabled++] = input; } @@ -477,17 +477,17 @@ static void brw_emit_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLuint tmp = brw->vs.prog_data->inputs_read; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint i; GLuint nr_enabled = 0; /* Accumulate the list of enabled arrays. */ - while (tmp) { - i = _mesa_ffsll(tmp)-1; + while (vs_inputs) { + i = _mesa_ffsll(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; - tmp &= ~(1<<i); + vs_inputs &= ~(1 << i); enabled[nr_enabled++] = input; } @@ -635,7 +635,7 @@ static void brw_emit_indices(struct brw_context *brw) if (index_buffer == NULL) return; - ib_size = get_size(index_buffer->type) * index_buffer->count; + ib_size = get_size(index_buffer->type) * index_buffer->count - 1; /* Emit the indexbuffer packet: */ diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 62c98bd8bb..3ee56fe1b6 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -97,7 +97,7 @@ struct brw_glsl_call; #define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 4000 +#define BRW_EU_MAX_INSN 10000 struct brw_compile { struct brw_instruction store[BRW_EU_MAX_INSN]; @@ -855,12 +855,10 @@ void brw_math( struct brw_compile *p, void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLuint scratch_offset ); void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLboolean relAddr, GLuint location, GLuint bind_table_index ); @@ -875,7 +873,6 @@ void brw_dp_READ_4_vs( struct brw_compile *p, void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, - GLuint msg_reg_nr, GLuint scratch_offset ); /* If/else/endif. Works by manipulating the execution flags on each diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 60ea44f7a9..2a147fb8c3 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -865,9 +865,9 @@ void brw_math_16( struct brw_compile *p, */ void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, - GLuint msg_reg_nr, GLuint scratch_offset ) { + GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); @@ -877,7 +877,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); - + brw_pop_insn_state(p); } @@ -912,9 +912,9 @@ void brw_dp_WRITE_16( struct brw_compile *p, */ void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLuint scratch_offset ) { + GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); @@ -924,7 +924,7 @@ void brw_dp_READ_16( struct brw_compile *p, brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); - + brw_pop_insn_state(p); } @@ -958,21 +958,26 @@ void brw_dp_READ_16( struct brw_compile *p, */ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLboolean relAddr, GLuint location, GLuint bind_table_index ) { + /* XXX: relAddr not implemented */ + GLuint msg_reg_nr = 1; { + struct brw_reg b; brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); - /* set message header global offset field (reg 0, element 2) */ - /* Note that grf[0] will be copied to mrf[1] implicitly by the SEND instr */ - brw_MOV(p, - retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), - brw_imm_d(location)); + /* Setup MRF[1] with location/offset into const buffer */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + brw_MOV(p, b, brw_imm_ud(location)); brw_pop_insn_state(p); } @@ -988,7 +993,7 @@ void brw_dp_READ_4( struct brw_compile *p, dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); brw_set_dest(insn, dest); - brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(insn, bind_table_index, diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c index 299357409c..d27c6c24ca 100644 --- a/src/mesa/drivers/dri/i965/brw_fallback.c +++ b/src/mesa/drivers/dri/i965/brw_fallback.c @@ -37,6 +37,9 @@ #include "tnl/tnl.h" #include "brw_context.h" #include "brw_fallback.h" +#include "intel_chipset.h" +#include "intel_fbo.h" +#include "intel_regions.h" #include "glapi/glapi.h" @@ -44,6 +47,7 @@ static GLboolean do_check_fallback(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; GLcontext *ctx = &brw->intel.ctx; GLuint i; @@ -81,6 +85,33 @@ static GLboolean do_check_fallback(struct brw_context *brw) return GL_TRUE; } + /* _NEW_BUFFERS */ + if (IS_965(intel->intelScreen->deviceID) && + !IS_G4X(intel->intelScreen->deviceID)) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + /* The original gen4 hardware couldn't set up WM surfaces pointing + * at an offset within a tile, which can happen when rendering to + * anything but the base level of a texture or the +X face/0 depth. + * This was fixed with the 4 Series hardware. + * + * For these original chips, you would have to make the depth and + * color destination surfaces include information on the texture + * type, LOD, face, and various limits to use them as a destination. + * I would have done this, but there's also a nasty requirement that + * the depth and the color surfaces all be of the same LOD, which + * may be a worse requirement than this alignment. (Also, we may + * want to just demote the texture to untiled, instead). + */ + if (irb->region && irb->region->tiling != I915_TILING_NONE && + (irb->region->draw_offset & 4095)) { + DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n"); + return GL_TRUE; + } + } + } return GL_FALSE; } diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 9bc5c35139..67c39e509c 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -118,7 +118,10 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); - OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + if (brw->vs.bind_bo != NULL) + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + else + OUT_BATCH(0); OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ @@ -241,6 +244,8 @@ static void emit_depthbuffer(struct brw_context *brw) return; } + assert(region->tiling != I915_TILING_X); + BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index c3c85978f4..e1c2c7777b 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -166,6 +166,9 @@ static void upload_sf_prog(struct brw_context *brw) key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + /* _NEW_HINT */ + key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + /* _NEW_POLYGON */ if (key.do_twoside_color) { /* If we're rendering to a FBO, we have to invert the polygon @@ -188,7 +191,7 @@ static void upload_sf_prog(struct brw_context *brw) const struct brw_tracked_state brw_sf_prog = { .dirty = { - .mesa = (_NEW_LIGHT|_NEW_POLYGON|_NEW_POINT), + .mesa = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index 1c0fb70fe0..6426b6df9f 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -51,7 +51,8 @@ struct brw_sf_prog_key { GLuint do_flat_shading:1; GLuint frontface_ccw:1; GLuint do_point_sprite:1; - GLuint pad:10; + GLuint linear_color:1; /**< linear interp vs. perspective interp */ + GLuint pad:25; GLenum SpriteOrigin; }; diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index 862835f157..2f63610425 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -295,9 +295,6 @@ static void invert_det( struct brw_sf_compile *c) } -#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \ - FRAG_BIT_COL0 | \ - FRAG_BIT_COL1) static GLboolean calculate_masks( struct brw_sf_compile *c, GLuint reg, @@ -306,9 +303,16 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, GLushort *pc_linear) { GLboolean is_last_attr = (reg == c->nr_setup_regs - 1); - GLuint persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS; + GLuint persp_mask; GLuint linear_mask; + if (c->key.do_flat_shading || c->key.linear_color) + persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS | + FRAG_BIT_COL0 | + FRAG_BIT_COL1); + else + persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS); + if (c->key.do_flat_shading) linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1); else diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 81b0a45998..bf9f6cae55 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -72,11 +72,13 @@ const struct brw_tracked_state brw_sf_vp; const struct brw_tracked_state brw_state_base_address; const struct brw_tracked_state brw_urb_fence; const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_surfaces; const struct brw_tracked_state brw_vs_prog; const struct brw_tracked_state brw_vs_unit; const struct brw_tracked_state brw_wm_input_sizes; const struct brw_tracked_state brw_wm_prog; const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_constant_surface; const struct brw_tracked_state brw_wm_surfaces; const struct brw_tracked_state brw_wm_unit; @@ -91,6 +93,20 @@ const struct brw_tracked_state brw_drawing_rect; const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; +/** + * Use same key for WM and VS surfaces. + */ +struct brw_surface_key { + GLenum target, depthmode; + dri_bo *bo; + GLint format, internal_format; + GLint first_level, last_level; + GLint width, height, depth; + GLint pitch, cpp; + uint32_t tiling; + GLuint offset; +}; + /*********************************************************************** * brw_state.c */ @@ -135,8 +151,8 @@ dri_bo *brw_search_cache( struct brw_cache *cache, void *aux_return); void brw_state_cache_check_size( struct brw_context *brw ); -void brw_init_cache( struct brw_context *brw ); -void brw_destroy_cache( struct brw_context *brw ); +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c @@ -150,4 +166,9 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, void brw_destroy_batch_cache( struct brw_context *brw ); void brw_clear_batch_cache_flush( struct brw_context *brw ); +/* brw_wm_surface_state.c */ +dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_surface_key *key ); + #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index d5b5166406..e40d7a0416 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -56,9 +56,9 @@ * incorrect program is run for the other instance. */ +#include "main/imports.h" #include "brw_state.h" #include "intel_batchbuffer.h" -#include "main/imports.h" /* XXX: Fixme - have to include these to get the sizes of the prog_key * structs: @@ -69,8 +69,10 @@ #include "brw_sf.h" #include "brw_gs.h" -static GLuint hash_key( const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) + +static GLuint +hash_key(const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) { GLuint *ikey = (GLuint *)key; GLuint hash = 0, i; @@ -95,6 +97,7 @@ static GLuint hash_key( const void *key, GLuint key_size, return hash; } + /** * Marks a new buffer as being chosen for the given cache id. */ @@ -111,6 +114,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, cache->brw->state.dirty.cache |= 1 << cache_id; } + static struct brw_cache_item * search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, GLuint hash, const void *key, GLuint key_size, @@ -143,7 +147,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, } -static void rehash( struct brw_cache *cache ) +static void +rehash(struct brw_cache *cache) { struct brw_cache_item **items; struct brw_cache_item *c, *next; @@ -164,15 +169,17 @@ static void rehash( struct brw_cache *cache ) cache->size = size; } + /** * Returns the buffer object matching cache_id and key, or NULL. */ -dri_bo *brw_search_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs, - void *aux_return ) +dri_bo * +brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs, + void *aux_return) { struct brw_cache_item *item; GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); @@ -192,6 +199,7 @@ dri_bo *brw_search_cache( struct brw_cache *cache, return item->bo; } + dri_bo * brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, @@ -265,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache, return bo; } -/* This doesn't really work with aux data. Use search/upload instead + +/** + * This doesn't really work with aux data. Use search/upload instead */ dri_bo * brw_cache_data_sz(struct brw_cache *cache, @@ -296,6 +306,7 @@ brw_cache_data_sz(struct brw_cache *cache, return bo; } + /** * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. * @@ -319,21 +330,22 @@ enum pool_type { DW_GENERAL_STATE }; + static void -brw_init_cache_id( struct brw_context *brw, - const char *name, - enum brw_cache_id id, - GLuint key_size, - GLuint aux_size) +brw_init_cache_id(struct brw_cache *cache, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) { - struct brw_cache *cache = &brw->cache; - cache->name[id] = strdup(name); cache->key_size[id] = key_size; cache->aux_size[id] = aux_size; } -void brw_init_cache( struct brw_context *brw ) + +static void +brw_init_non_surface_cache(struct brw_context *brw) { struct brw_cache *cache = &brw->cache; @@ -342,114 +354,136 @@ void brw_init_cache( struct brw_context *brw ) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * - sizeof(struct brw_cache_item)); + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_VP", BRW_CC_VP, sizeof(struct brw_cc_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CC_UNIT", BRW_CC_UNIT, sizeof(struct brw_cc_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_PROG", BRW_WM_PROG, sizeof(struct brw_wm_prog_key), sizeof(struct brw_wm_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER_DEFAULT_COLOR", BRW_SAMPLER_DEFAULT_COLOR, sizeof(struct brw_sampler_default_color), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SAMPLER", BRW_SAMPLER, 0, /* variable key/data size */ 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "WM_UNIT", BRW_WM_UNIT, sizeof(struct brw_wm_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_PROG", BRW_SF_PROG, sizeof(struct brw_sf_prog_key), sizeof(struct brw_sf_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_VP", BRW_SF_VP, sizeof(struct brw_sf_viewport), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SF_UNIT", BRW_SF_UNIT, sizeof(struct brw_sf_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_UNIT", BRW_VS_UNIT, sizeof(struct brw_vs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "VS_PROG", BRW_VS_PROG, sizeof(struct brw_vs_prog_key), sizeof(struct brw_vs_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_UNIT", BRW_CLIP_UNIT, sizeof(struct brw_clip_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "CLIP_PROG", BRW_CLIP_PROG, sizeof(struct brw_clip_prog_key), sizeof(struct brw_clip_prog_data)); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_UNIT", BRW_GS_UNIT, sizeof(struct brw_gs_unit_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "GS_PROG", BRW_GS_PROG, sizeof(struct brw_gs_prog_key), sizeof(struct brw_gs_prog_data)); +} + + +static void +brw_init_surface_cache(struct brw_context *brw) +{ + struct brw_cache *cache = &brw->surface_cache; + + cache->brw = brw; - brw_init_cache_id(brw, + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, "SS_SURFACE", BRW_SS_SURFACE, sizeof(struct brw_surface_state), 0); - brw_init_cache_id(brw, + brw_init_cache_id(cache, "SS_SURF_BIND", BRW_SS_SURF_BIND, 0, 0); } + +void +brw_init_caches(struct brw_context *brw) +{ + brw_init_non_surface_cache(brw); + brw_init_surface_cache(brw); +} + + static void -brw_clear_cache( struct brw_context *brw ) +brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) { struct brw_cache_item *c, *next; GLuint i; @@ -457,8 +491,8 @@ brw_clear_cache( struct brw_context *brw ) if (INTEL_DEBUG & DEBUG_STATE) _mesa_printf("%s\n", __FUNCTION__); - for (i = 0; i < brw->cache.size; i++) { - for (c = brw->cache.items[i]; c; c = next) { + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { int j; next = c->next; @@ -468,10 +502,10 @@ brw_clear_cache( struct brw_context *brw ) free((void *)c->key); free(c); } - brw->cache.items[i] = NULL; + cache->items[i] = NULL; } - brw->cache.n_items = 0; + cache->n_items = 0; if (brw->curbe.last_buf) { _mesa_free(brw->curbe.last_buf); @@ -483,25 +517,46 @@ brw_clear_cache( struct brw_context *brw ) brw->state.dirty.cache |= ~0; } -void brw_state_cache_check_size( struct brw_context *brw ) + +void +brw_state_cache_check_size(struct brw_context *brw) { + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + /* un-tuned guess. We've got around 20 state objects for a total of around * 32k, so 1000 of them is around 1.5MB. */ if (brw->cache.n_items > 1000) - brw_clear_cache(brw); + brw_clear_cache(brw, &brw->cache); + + if (brw->surface_cache.n_items > 1000) + brw_clear_cache(brw, &brw->surface_cache); } -void brw_destroy_cache( struct brw_context *brw ) + +static void +brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { GLuint i; - brw_clear_cache(brw); + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { - dri_bo_unreference(brw->cache.last_bo[i]); - free(brw->cache.name[i]); + dri_bo_unreference(cache->last_bo[i]); + free(cache->name[i]); } - free(brw->cache.items); - brw->cache.items = NULL; - brw->cache.size = 0; + free(cache->items); + cache->items = NULL; + cache->size = 0; +} + + +void +brw_destroy_caches(struct brw_context *brw) +{ + brw_destroy_cache(brw, &brw->cache); + brw_destroy_cache(brw, &brw->surface_cache); } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index a713262269..e94fa7d2b4 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -126,6 +126,8 @@ static void dump_wm_surface_state(struct brw_context *brw) surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not "); state_out(name, surf, surfoff, 4, "mip base %d\n", surf->ss4.min_lod); + state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", + surf->ss5.x_offset, surf->ss5.y_offset); dri_bo_unmap(surf_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 5de1450e61..c6dfea4743 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -59,11 +59,12 @@ const struct brw_tracked_state *atoms[] = &brw_curbe_offsets, &brw_recalculate_urb_fence, - &brw_cc_vp, &brw_cc_unit, - &brw_wm_surfaces, /* must do before samplers */ + &brw_vs_surfaces, /* must do before unit */ + &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ + &brw_wm_surfaces, /* must do before samplers and unit */ &brw_wm_samplers, &brw_wm_unit, @@ -88,54 +89,26 @@ const struct brw_tracked_state *atoms[] = &brw_line_stipple, &brw_aa_line_parameters, - /* Ordering of the commands below is documented as fixed. - */ -#if 0 - &brw_pipelined_state_pointers, - &brw_urb_fence, - &brw_constant_buffer_state, -#else + &brw_psp_urb_cbs, -#endif &brw_drawing_rect, &brw_indices, &brw_vertices, - NULL, /* brw_constant_buffer */ + &brw_constant_buffer }; void brw_init_state( struct brw_context *brw ) { - GLuint i; - - brw_init_cache(brw); - - brw->state.atoms = _mesa_malloc(sizeof(atoms)); - brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); - _mesa_memcpy(brw->state.atoms, atoms, sizeof(atoms)); - - /* Patch in a pointer to the dynamic state atom: - */ - for (i = 0; i < brw->state.nr_atoms; i++) - if (brw->state.atoms[i] == NULL) - brw->state.atoms[i] = &brw->curbe.tracked_state; - - _mesa_memcpy(&brw->curbe.tracked_state, - &brw_constant_buffer, - sizeof(brw_constant_buffer)); + brw_init_caches(brw); } void brw_destroy_state( struct brw_context *brw ) { - if (brw->state.atoms) { - _mesa_free(brw->state.atoms); - brw->state.atoms = NULL; - } - - brw_destroy_cache(brw); + brw_destroy_caches(brw); brw_destroy_batch_cache(brw); } @@ -218,6 +191,7 @@ static struct dirty_bit_map mesa_bits[] = { DEFINE_BIT(_NEW_MULTISAMPLE), DEFINE_BIT(_NEW_TRACK_MATRIX), DEFINE_BIT(_NEW_PROGRAM), + DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), {0, 0, 0} }; @@ -336,7 +310,7 @@ void brw_validate_state( struct brw_context *brw ) /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; @@ -367,8 +341,8 @@ void brw_upload_state(struct brw_context *brw) _mesa_memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < brw->state.nr_atoms; i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; assert(atom->dirty.mesa || @@ -397,7 +371,7 @@ void brw_upload_state(struct brw_context *brw) } else { for (i = 0; i < Elements(atoms); i++) { - const struct brw_tracked_state *atom = brw->state.atoms[i]; + const struct brw_tracked_state *atom = atoms[i]; if (brw->intel.Fallback) break; diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 89e2981203..040ec9327d 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1075,7 +1075,7 @@ struct brw_surface_state GLuint y_offset:4; GLuint pad0:1; GLuint x_offset:7; - } ss5; /* NEW in Integrated Graphics Device */ + } ss5; /* New in G4X */ }; diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 51a617fcb4..67d8d96947 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -28,7 +28,6 @@ * Authors: * Keith Whitwell <keith@tungstengraphics.com> */ - /* Code to layout images in a mipmap tree for i965. */ @@ -40,14 +39,15 @@ #define FILE_DEBUG_FLAG DEBUG_MIPTREE -GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_tree *mt ) +GLboolean brw_miptree_layout(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling) { - /* XXX: these vary depending on image format: - */ -/* GLint align_w = 4; */ + /* XXX: these vary depending on image format: */ + /* GLint align_w = 4; */ switch (mt->target) { - case GL_TEXTURE_CUBE_MAP: + case GL_TEXTURE_CUBE_MAP: case GL_TEXTURE_3D: { GLuint width = mt->width0; GLuint height = mt->height0; @@ -59,25 +59,25 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t GLuint align_w = 4; mt->total_height = 0; - + if (mt->compressed) { align_w = intel_compressed_alignment(mt->internal_format); mt->pitch = ALIGN(width, align_w); pack_y_pitch = (height + 3) / 4; } else { - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->width0); - pack_y_pitch = ALIGN(mt->height0, align_h); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); + pack_y_pitch = ALIGN(mt->height0, align_h); } - pack_x_pitch = mt->pitch; + pack_x_pitch = width; pack_x_nr = 1; - for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { + for (level = mt->first_level ; level <= mt->last_level ; level++) { GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; GLint x = 0; GLint y = 0; GLint q, j; - + intel_miptree_set_level_info(mt, level, nr_images, 0, mt->total_height, width, height, depth); @@ -89,7 +89,7 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t } x = 0; - y += pack_y_pitch; + y += pack_y_pitch; } @@ -98,40 +98,40 @@ GLboolean brw_miptree_layout( struct intel_context *intel, struct intel_mipmap_t height = minify(height); depth = minify(depth); - if (mt->compressed) { - pack_y_pitch = (height + 3) / 4; - - if (pack_x_pitch > ALIGN(width, align_w)) { - pack_x_pitch = ALIGN(width, align_w); - pack_x_nr <<= 1; - } - } else { - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->pitch); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - pack_y_pitch = ALIGN(pack_y_pitch, align_h); - } - } + if (mt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > ALIGN(width, align_w)) { + pack_x_pitch = ALIGN(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = ALIGN(pack_y_pitch, align_h); + } + } } break; } default: - i945_miptree_layout_2d(intel, mt); + i945_miptree_layout_2d(intel, mt, tiling); break; } - DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, - mt->pitch, + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + mt->pitch, mt->total_height, mt->cpp, mt->pitch * mt->total_height * mt->cpp ); - + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index b69616d6e5..edb604b653 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -69,13 +69,18 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { GLuint i, reg = 0, mrf; -#if 0 - if (c->vp->program.Base.Parameters->NumParameters >= 6) - c->vp->use_const_buffer = 1; + /* Determine whether to use a real constant buffer or use a block + * of GRF registers for constants. The later is faster but only + * works if everything fits in the GRF. + * XXX this heuristic/check may need some fine tuning... + */ + if (c->vp->program.Base.Parameters->NumParameters + + c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF) + c->vp->use_const_buffer = GL_TRUE; else -#endif c->vp->use_const_buffer = GL_FALSE; - /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ + + /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/ /* r0 -- reserved as usual */ @@ -709,10 +714,11 @@ get_constant(struct brw_vs_compile *c, struct brw_compile *p = &c->func; struct brw_reg const_reg; struct brw_reg const2_reg; + const GLboolean relAddr = src->RelAddr; assert(argIndex < 3); - if (c->current_const[argIndex].index != src->Index || src->RelAddr) { + if (c->current_const[argIndex].index != src->Index || relAddr) { struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; c->current_const[argIndex].index = src->Index; @@ -725,13 +731,13 @@ get_constant(struct brw_vs_compile *c, brw_dp_READ_4_vs(p, c->current_const[argIndex].reg,/* writeback dest */ 0, /* oword */ - src->RelAddr, /* relative indexing? */ + relAddr, /* relative indexing? */ addrReg, /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ ); - if (src->RelAddr) { + if (relAddr) { /* second read */ const2_reg = get_tmp(c); @@ -742,7 +748,7 @@ get_constant(struct brw_vs_compile *c, brw_dp_READ_4_vs(p, const2_reg, /* writeback dest */ 1, /* oword */ - src->RelAddr, /* relative indexing? */ + relAddr, /* relative indexing? */ addrReg, /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER @@ -752,7 +758,7 @@ get_constant(struct brw_vs_compile *c, const_reg = c->current_const[argIndex].reg; - if (src->RelAddr) { + if (relAddr) { /* merge the two Owords into the constant register */ /* const_reg[7..4] = const2_reg[7..4] */ brw_MOV(p, @@ -1177,15 +1183,15 @@ post_vs_emit( struct brw_vs_compile *c, */ void brw_vs_emit(struct brw_vs_compile *c ) { -#define MAX_IFSN 32 +#define MAX_IF_DEPTH 32 +#define MAX_LOOP_DEPTH 32 struct brw_compile *p = &c->func; - GLuint nr_insns = c->vp->program.Base.NumInstructions; - GLuint insn, if_insn = 0; + const GLuint nr_insns = c->vp->program.Base.NumInstructions; + GLuint insn, if_depth = 0, loop_depth = 0; GLuint end_offset = 0; struct brw_instruction *end_inst, *last_inst; - struct brw_instruction *if_inst[MAX_IFSN]; - struct brw_indirect stack_index = brw_indirect(0, 0); - + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + const struct brw_indirect stack_index = brw_indirect(0, 0); GLuint index; GLuint file; @@ -1219,7 +1225,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) for (insn = 0; insn < nr_insns; insn++) { - struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; struct brw_reg args[3], dst; GLuint i; @@ -1232,7 +1238,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) */ if (inst->Opcode != OPCODE_SWZ) for (i = 0; i < 3; i++) { - struct prog_src_register *src = &inst->SrcReg[i]; + const struct prog_src_register *src = &inst->SrcReg[i]; index = src->Index; file = src->File; if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) @@ -1376,16 +1382,51 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_xpd(p, dst, args[0], args[1]); break; case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); break; case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); break; +#if 0 + case OPCODE_BGNLOOP: + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + break; + case OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_ENDLOOP: + { + struct brw_instruction *inst0, *inst1; + loop_depth--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; + inst0->bits3.if_else.pop_count = 0; + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = inst1 - inst0; + inst0->bits3.if_else.pop_count = 0; + } + } + } + break; +#else + (void) loop_inst; + (void) loop_depth; +#endif case OPCODE_BRA: brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c new file mode 100644 index 0000000000..89f47522a1 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -0,0 +1,226 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "main/mtypes.h" +#include "main/texformat.h" +#include "main/texstore.h" +#include "shader/prog_parameter.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +/* Creates a new VS constant buffer reflecting the current VS program's + * constants, if needed by the VS program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_vs_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_VERTEX_PROGRAM */ + if (!vp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} + +/** + * Update the surface state for a VS constant buffer. + * + * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer. + */ +static void +brw_update_vs_constant_surface( GLcontext *ctx, + GLuint surf) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + + assert(surf == 0); + + /* If we're in this state update atom, we need to update VS constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(vp->const_buffer); + vp->const_buffer = brw_vs_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (vp->const_buffer == 0) { + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = NULL; + return; + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = vp->const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->vs.surf_bo[surf] == NULL) { + brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } +} + + +/** + * Constructs the binding table for the VS surface state. + */ +static dri_bo * +brw_vs_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < BRW_VS_MAX_SURF; i++) + if (brw->vs.surf_bo[i]) + data[i] = brw->vs.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + data, data_size, + NULL, NULL); + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + /* The presumed offsets were set in the data values for + * brw_upload_cache. + */ + drm_intel_bo_emit_reloc(bind_bo, i * 4, + brw->vs.surf_bo[i], 0, + I915_GEM_DOMAIN_INSTRUCTION, 0); + } + } + + free(data); + } + + return bind_bo; +} + +/** + * Vertex shader surfaces (constant buffer). + * + * This consumes the state updates for the constant buffer needing + * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and + * CACHE_NEW_SURF_BIND for the binding table upload. + */ +static void prepare_vs_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + int i; + int nr_surfaces = 0; + + brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); + + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + nr_surfaces = i + 1; + } + } + + if (brw->vs.nr_surfaces != nr_surfaces) { + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; + brw->vs.nr_surfaces = nr_surfaces; + } + + /* Note that we don't end up updating the bind_bo if we don't have a + * surface to be pointing at. This should be relatively harmless, as it + * just slightly increases our working set size. + */ + if (brw->vs.nr_surfaces != 0) { + dri_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = brw_vs_get_binding_table(brw); + } +} + +const struct brw_tracked_state brw_vs_surfaces = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_VERTEX_PROGRAM), + .cache = 0 + }, + .prepare = prepare_vs_surfaces, +}; + + + diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index ba03afd6c1..b284e623d0 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -194,7 +194,7 @@ static GLuint brw_flush_cmd( void ) struct brw_mi_flush flush; flush.opcode = CMD_MI_FLUSH; flush.pad = 0; - flush.flags = BRW_FLUSH_READ_CACHE | BRW_FLUSH_STATE_CACHE; + flush.flags = BRW_FLUSH_STATE_CACHE; return *(GLuint *)&flush; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 8a3b7df9c7..14e05be4f6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -272,6 +272,9 @@ static void brw_wm_populate_key( struct brw_context *brw, /* _NEW_LIGHT */ key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + /* _NEW_HINT */ + key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + /* _NEW_TEXTURE */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; @@ -351,6 +354,7 @@ const struct brw_tracked_state brw_wm_prog = { .dirty = { .mesa = (_NEW_COLOR | _NEW_DEPTH | + _NEW_HINT | _NEW_STENCIL | _NEW_POLYGON | _NEW_LINE | diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 295fed851b..ba497432c6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -63,6 +63,7 @@ struct brw_wm_prog_key { GLuint computes_depth:1; /* could be derived from program string */ GLuint source_depth_to_render_target:1; GLuint flat_shade:1; + GLuint linear_color:1; /**< linear interpolation vs perspective interp */ GLuint runtime_check_aads_emit:1; GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ @@ -241,15 +242,20 @@ struct brw_wm_compile { GLuint max_wm_grf; GLuint last_scratch; + GLuint cur_inst; /**< index of current instruction */ + + GLboolean out_of_regs; /**< ran out of GRF registers? */ + /** Mapping from Mesa registers to hardware registers */ struct { GLboolean inited; struct brw_reg reg; } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + GLboolean used_grf[BRW_WM_MAX_GRF]; + GLuint first_free_grf; struct brw_reg stack; struct brw_reg emit_mask_reg; - GLuint reg_index; /**< Index of next free GRF register */ GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; GLuint tmp_max; diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 72fc21d2eb..4c3879f9fa 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -353,6 +353,19 @@ static void emit_mad( struct brw_compile *p, } } +static void emit_trunc( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + brw_RNDZ(p, dst[i], arg0[i]); + } + } +} static void emit_lrp( struct brw_compile *p, const struct brw_reg *dst, @@ -1044,7 +1057,6 @@ static void emit_spill( struct brw_wm_compile *c, */ brw_dp_WRITE_16(p, retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW), - 1, slot); } @@ -1072,7 +1084,6 @@ static void emit_unspill( struct brw_wm_compile *c, brw_dp_READ_16(p, retype(vec16(reg), BRW_REGISTER_TYPE_UW), - 1, slot); } @@ -1224,6 +1235,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_dph(p, dst, dst_flags, args[0], args[1]); break; + case OPCODE_TRUNC: + emit_trunc(p, dst, dst_flags, args[0]); + break; + case OPCODE_LRP: emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 49aad281d7..b9e8dd2e96 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -354,13 +354,25 @@ static void emit_interp( struct brw_wm_compile *c, src_undef()); } else { - emit_op(c, - WM_LINTERP, - dst, - 0, - interp, - deltas, - src_undef()); + if (c->key.linear_color) { + emit_op(c, + WM_LINTERP, + dst, + 0, + interp, + deltas, + src_undef()); + } + else { + /* perspective-corrected color interpolation */ + emit_op(c, + WM_PINTERP, + dst, + 0, + interp, + deltas, + get_pixel_w(c)); + } } break; case FRAG_ATTRIB_FOGC: diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 094c1af2fe..6e6f2bac7b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1,5 +1,7 @@ #include "main/macros.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_optimize.h" #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" @@ -21,7 +23,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { case OPCODE_IF: - case OPCODE_TRUNC: case OPCODE_ENDIF: case OPCODE_CAL: case OPCODE_BRK: @@ -42,6 +43,83 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) } + +static void +reclaim_temps(struct brw_wm_compile *c); + + +/** Mark GRF register as used. */ +static void +prealloc_grf(struct brw_wm_compile *c, int r) +{ + c->used_grf[r] = GL_TRUE; +} + + +/** Mark given GRF register as not in use. */ +static void +release_grf(struct brw_wm_compile *c, int r) +{ + /*assert(c->used_grf[r]);*/ + c->used_grf[r] = GL_FALSE; + c->first_free_grf = MIN2(c->first_free_grf, r); +} + + +/** Return index of a free GRF, mark it as used. */ +static int +alloc_grf(struct brw_wm_compile *c) +{ + GLuint r; + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + /* no free temps, try to reclaim some */ + reclaim_temps(c); + c->first_free_grf = 0; + + /* try alloc again */ + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + for (r = 0; r < BRW_WM_MAX_GRF; r++) { + assert(c->used_grf[r]); + } + + /* really, no free GRF regs found */ + if (!c->out_of_regs) { + /* print warning once per compilation */ + _mesa_warning(NULL, "i965: ran out of registers for fragment program"); + c->out_of_regs = GL_TRUE; + } + + return -1; +} + + +/** Return number of GRF registers used */ +static int +num_grf_used(const struct brw_wm_compile *c) +{ + int r; + for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) + if (c->used_grf[r]) + return r + 1; + return 0; +} + + + /** * Record the mapping of a Mesa register to a hardware register. */ @@ -68,11 +146,23 @@ static int get_scalar_dst_index(const struct prog_instruction *inst) static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { struct brw_reg reg; - if(c->tmp_index == c->tmp_max) - c->tmp_regs[ c->tmp_max++ ] = c->reg_index++; - + + /* if we need to allocate another temp, grow the tmp_regs[] array */ + if (c->tmp_index == c->tmp_max) { + int r = alloc_grf(c); + if (r < 0) { + /*printf("Out of temps in %s\n", __FUNCTION__);*/ + r = 50; /* XXX random register! */ + } + c->tmp_regs[ c->tmp_max++ ] = r; + } + + /* form the GRF register */ reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); + /*printf("alloc_temp %d\n", reg.nr);*/ + assert(reg.nr < BRW_WM_MAX_GRF); return reg; + } /** @@ -130,35 +220,29 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, return brw_null_reg(); } + assert(index < 256); + assert(component < 4); + /* see if we've already allocated a HW register for this Mesa register */ if (c->wm_regs[file][index][component].inited) { - /* yes, re-use */ - reg = c->wm_regs[file][index][component].reg; + /* yes, re-use */ + reg = c->wm_regs[file][index][component].reg; } else { /* no, allocate new register */ - reg = brw_vec8_grf(c->reg_index, 0); - } + int grf = alloc_grf(c); + /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ + if (grf < 0) { + /* totally out of temps */ + grf = 51; /* XXX random register! */ + } - /* if this is a new register allocation, record it in the table */ - if (!c->wm_regs[file][index][component].inited) { - set_reg(c, file, index, component, reg); - c->reg_index++; - } + reg = brw_vec8_grf(grf, 0); + /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ - if (c->reg_index >= BRW_WM_MAX_GRF - 12) { - /* ran out of temporary registers! */ -#if 1 - /* This is a big hack for now. - * Return bad register index, just don't hang the GPU. - */ - _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index); - c->reg_index = BRW_WM_MAX_GRF - 13; -#else - return brw_null_reg(); -#endif + set_reg(c, file, index, component, reg); } - + if (neg & (1 << component)) { reg = negate(reg); } @@ -168,6 +252,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, } + +/** + * This is called if we run out of GRF registers. Examine the live intervals + * of temp regs in the program and free those which won't be used again. + */ +static void +reclaim_temps(struct brw_wm_compile *c) +{ + GLint intBegin[MAX_PROGRAM_TEMPS]; + GLint intEnd[MAX_PROGRAM_TEMPS]; + int index; + + /*printf("Reclaim temps:\n");*/ + + _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, + intBegin, intEnd); + + for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { + if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { + /* program temp[i] can be freed */ + int component; + /*printf(" temp[%d] is dead\n", index);*/ + for (component = 0; component < 4; component++) { + if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { + int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; + release_grf(c, r); + /* + printf(" Reclaim temp %d, reg %d at inst %d\n", + index, r, c->cur_inst); + */ + c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; + } + } + } + } +} + + + + /** * Preallocate registers. This sets up the Mesa to hardware register * mapping for certain registers, such as constants (uniforms/state vars) @@ -179,6 +303,10 @@ static void prealloc_reg(struct brw_wm_compile *c) struct brw_reg reg; int urb_read_length = 0; GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; + GLuint reg_index = 0; + + memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); + c->first_free_grf = 0; for (i = 0; i < 4; i++) { if (i < c->key.nr_depth_regs) @@ -187,14 +315,20 @@ static void prealloc_reg(struct brw_wm_compile *c) reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - c->reg_index += 2 * c->key.nr_depth_regs; + reg_index += 2 * c->key.nr_depth_regs; /* constants */ { - const int nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_temps = c->fp->program.Base.NumTemporaries; /* use a real constant buffer, or just use a section of the GRF? */ - c->fp->use_const_buffer = GL_FALSE; /* (nr_params > 8);*/ + /* XXX this heuristic may need adjustment... */ + if ((nr_params + nr_temps) * 4 + reg_index > 80) + c->fp->use_const_buffer = GL_TRUE; + else + c->fp->use_const_buffer = GL_FALSE; + /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ if (c->fp->use_const_buffer) { /* We'll use a real constant buffer and fetch constants from @@ -216,7 +350,7 @@ static void prealloc_reg(struct brw_wm_compile *c) for (i = 0; i < nr_params; i++) { /* loop over XYZW channels */ for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + reg = brw_vec1_grf(reg_index + index / 8, index % 8); /* Save pointer to parameter/constant value. * Constants will be copied in prepare_constant_buffer() */ @@ -226,7 +360,7 @@ static void prealloc_reg(struct brw_wm_compile *c) } /* number of constant regs used (each reg is float[8]) */ c->nr_creg = 2 * ((4 * nr_params + 15) / 16); - c->reg_index += c->nr_creg; + reg_index += c->nr_creg; } } @@ -242,23 +376,31 @@ static void prealloc_reg(struct brw_wm_compile *c) fp_input = -1; if (fp_input >= 0 && inputs & (1 << fp_input)) { - urb_read_length = c->reg_index; - reg = brw_vec8_grf(c->reg_index, 0); + urb_read_length = reg_index; + reg = brw_vec8_grf(reg_index, 0); for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); } if (c->key.vp_outputs_written & (1 << i)) { - c->reg_index += 2; + reg_index += 2; } } c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; c->prog_data.urb_read_length = urb_read_length; c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); - c->reg_index += 2; + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index++; + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index += 2; + + /* mark GRF regs [0..reg_index-1] as in-use */ + for (i = 0; i < reg_index; i++) + prealloc_grf(c, i); + + /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */ + prealloc_grf(c, 126); + prealloc_grf(c, 127); /* An instruction may reference up to three constants. * They'll be found in these registers. @@ -267,12 +409,12 @@ static void prealloc_reg(struct brw_wm_compile *c) if (c->fp->use_const_buffer) { for (i = 0; i < 3; i++) { c->current_const[i].index = -1; - c->current_const[i].reg = alloc_tmp(c); + c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); } } #if 0 printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); - printf("AFTER PRE_ALLOC, reg_index = %d\n", c->reg_index); + printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); #endif } @@ -294,23 +436,20 @@ static void fetch_constants(struct brw_wm_compile *c, if (src->File == PROGRAM_STATE_VAR || src->File == PROGRAM_CONSTANT || src->File == PROGRAM_UNIFORM) { - if (c->current_const[i].index != src->Index) { - c->current_const[i].index = src->Index; + c->current_const[i].index = src->Index; #if 0 - printf(" fetch const[%d] for arg %d into reg %d\n", - src->Index, i, c->current_const[i].reg.nr); + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, i, c->current_const[i].reg.nr); #endif - /* need to fetch the constant now */ - brw_dp_READ_4(p, - c->current_const[i].reg, /* writeback dest */ - 1, /* msg_reg */ - src->RelAddr, /* relative indexing? */ - 16 * src->Index, /* byte offset */ - SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ - ); - } + /* need to fetch the constant now */ + brw_dp_READ_4(p, + c->current_const[i].reg, /* writeback dest */ + src->RelAddr, /* relative indexing? */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ + ); } } } @@ -379,6 +518,14 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, const GLuint nr = 1; const GLuint component = GET_SWZ(src->Swizzle, channel); + /* Extended swizzle terms */ + if (component == SWIZZLE_ZERO) { + return brw_imm_f(0.0F); + } + else if (component == SWIZZLE_ONE) { + return brw_imm_f(1.0F); + } + if (c->fp->use_const_buffer && (src->File == PROGRAM_STATE_VAR || src->File == PROGRAM_CONSTANT || @@ -676,27 +823,26 @@ static void emit_fb_write(struct brw_wm_compile *c, } if (c->key.dest_depth_reg) { - GLuint comp = c->key.dest_depth_reg / 2; - GLuint off = c->key.dest_depth_reg % 2; + const GLuint comp = c->key.dest_depth_reg / 2; + const GLuint off = c->key.dest_depth_reg % 2; - assert(comp == 1); - assert(off == 0); -#if 0 - /* XXX do we need this code? comp always 1, off always 0, it seems */ if (off != 0) { + /* XXX this code needs review/testing */ + struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp); + struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1); + brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1)); /* 2nd half? */ - brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_MOV(p, brw_message_reg(nr+1), arg1_1); brw_pop_insn_state(p); } else -#endif { - struct brw_reg src = get_src_reg(c, inst, 1, 1); - brw_MOV(p, brw_message_reg(nr), src); + struct brw_reg src = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src); } nr += 2; } @@ -2598,15 +2744,15 @@ static void post_wm_emit( struct brw_wm_compile *c ) static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) { -#define MAX_IFSN 32 +#define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 - struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH]; - struct brw_instruction *inst0, *inst1; - int i, if_insn = 0, loop_insn = 0; + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + GLuint i, if_depth = 0, loop_depth = 0; struct brw_compile *p = &c->func; struct brw_indirect stack_index = brw_indirect(0, 0); - c->reg_index = 0; + c->out_of_regs = GL_FALSE; + prealloc_reg(c); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); @@ -2614,6 +2760,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) for (i = 0; i < c->nr_fp_insns; i++) { const struct prog_instruction *inst = &c->prog_instructions[i]; + c->cur_inst = i; + #if 0 _mesa_printf("Inst %d: ", i); _mesa_print_instruction(inst); @@ -2773,15 +2921,15 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_kil(c); break; case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); break; case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); break; case OPCODE_BGNSUB: brw_save_label(p, inst->Comment, p->nr_insn); @@ -2815,7 +2963,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) break; case OPCODE_BGNLOOP: /* XXX may need to invalidate the current_constant regs */ - loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: brw_BREAK(p); @@ -2826,36 +2974,35 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_ENDLOOP: - loop_insn--; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]); - /* patch all the BREAK instructions from - last BEGINLOOP */ - while (inst0 > loop_inst[loop_insn]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { + { + struct brw_instruction *inst0, *inst1; + loop_depth--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; inst0->bits3.if_else.pop_count = 0; - } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { inst0->bits3.if_else.jump_count = inst1 - inst0; inst0->bits3.if_else.pop_count = 0; - } - } - break; + } + } + } + break; default: _mesa_printf("unsupported IR in fragment shader %d\n", inst->Opcode); } + if (inst->CondUpdate) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); else brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); - - if (c->reg_index >= BRW_WM_MAX_GRF) { - _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()"); - /* XXX we need to do some proper error recovery here */ - } } @@ -2879,6 +3026,6 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_print_program(c, "brw_wm_glsl_emit done"); } - c->prog_data.total_grf = c->reg_index; + c->prog_data.total_grf = num_grf_used(c); c->prog_data.total_scratch = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index bd60ac9b31..8fd067abe7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -116,6 +116,10 @@ const struct { { C, 0, 1, 1, 1 } }; +/** + * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES + * \param lookup bitmask of IZ_* flags + */ void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, struct brw_wm_prog_key *key ) diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index ab9aa2f10d..3436a24717 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -159,6 +159,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_FRC: case OPCODE_MOV: case OPCODE_SWZ: + case OPCODE_TRUNC: read0 = writemask; break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 805df8a4af..096f74394e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -176,22 +176,6 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, } } - -/** - * Use same key for WM and VS surfaces. - */ -struct brw_surface_key { - GLenum target, depthmode; - dri_bo *bo; - GLint format, internal_format; - GLint first_level, last_level; - GLint width, height, depth; - GLint pitch, cpp; - uint32_t tiling; - GLuint offset; -}; - - static void brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) { @@ -268,7 +252,7 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.cube_neg_z = 1; } - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -321,10 +305,11 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.tiling = intelObj->mt->region->tiling; dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); } @@ -336,7 +321,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ -static dri_bo * +dri_bo * brw_create_constant_surface( struct brw_context *brw, struct brw_surface_key *key ) { @@ -362,7 +347,7 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ - bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), &key->bo, key->bo ? 1 : 0, &surf, sizeof(surf), @@ -380,39 +365,70 @@ brw_create_constant_surface( struct brw_context *brw, return bo; } +/* Creates a new WM constant buffer reflecting the current fragment program's + * constants, if needed by the fragment program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_wm_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = fp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (!fp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} /** * Update the surface state for a WM constant buffer. * The constant buffer will be (re)allocated here if needed. */ -static dri_bo * +static void brw_update_wm_constant_surface( GLcontext *ctx, - GLuint surf, - dri_bo *const_buffer, - const struct gl_program_parameter_list *params) + GLuint surf) { struct brw_context *brw = brw_context(ctx); struct brw_surface_key key; - struct intel_context *intel = &brw->intel; - const int size = params->NumParameters * 4 * sizeof(GLfloat); + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = + fp->program.Base.Parameters; - /* free old const buffer if too small */ - if (const_buffer && const_buffer->size < size) { - dri_bo_unreference(const_buffer); - const_buffer = NULL; - } + /* If we're in this state update atom, we need to update WM constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); - /* alloc new buffer if needed */ - if (!const_buffer) { - const_buffer = - drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64); + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + return; } memset(&key, 0, sizeof(key)); key.format = MESA_FORMAT_RGBA_FLOAT32; key.internal_format = GL_RGBA; - key.bo = const_buffer; + key.bo = fp->const_buffer; key.depthmode = GL_NONE; key.pitch = params->NumParameters; key.width = params->NumParameters; @@ -427,77 +443,59 @@ brw_update_wm_constant_surface( GLcontext *ctx, */ dri_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), &key.bo, key.bo ? 1 : 0, NULL); if (brw->wm.surf_bo[surf] == NULL) { brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); } - - return const_buffer; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } - /** - * Update the surface state for a VS constant buffer. - * The constant buffer will be (re)allocated here if needed. + * Updates surface / buffer for fragment shader constant buffer, if + * one is required. + * + * This consumes the state updates for the constant buffer, and produces + * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for + * inclusion in the binding table. */ -static dri_bo * -brw_update_vs_constant_surface( GLcontext *ctx, - GLuint surf, - dri_bo *const_buffer, - const struct gl_program_parameter_list *params) +static void prepare_wm_constant_surface(struct brw_context *brw ) { - struct brw_context *brw = brw_context(ctx); - struct brw_surface_key key; - struct intel_context *intel = &brw->intel; - const int size = params->NumParameters * 4 * sizeof(GLfloat); - - assert(surf == 0); - - /* free old const buffer if too small */ - if (const_buffer && const_buffer->size < size) { - dri_bo_unreference(const_buffer); - const_buffer = NULL; - } - - /* alloc new buffer if needed */ - if (!const_buffer) { - const_buffer = - drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); - } - - memset(&key, 0, sizeof(key)); - - key.format = MESA_FORMAT_RGBA_FLOAT32; - key.internal_format = GL_RGBA; - key.bo = const_buffer; - key.depthmode = GL_NONE; - key.pitch = params->NumParameters; - key.width = params->NumParameters; - key.height = 1; - key.depth = 1; - key.cpp = 16; + GLcontext *ctx = &brw->intel.ctx; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; - /* - printf("%s:\n", __FUNCTION__); - printf(" width %d height %d depth %d cpp %d pitch %d\n", - key.width, key.height, key.depth, key.cpp, key.pitch); - */ + drm_intel_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); - dri_bo_unreference(brw->vs.surf_bo[surf]); - brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->vs.surf_bo[surf] == NULL) { - brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + if (brw->wm.surf_bo[surf] != NULL) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + } + return; } - return const_buffer; + brw_update_wm_constant_surface(ctx, surf); } +const struct brw_tracked_state brw_wm_constant_surface = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_FRAGMENT_PROGRAM), + .cache = 0 + }, + .prepare = prepare_wm_constant_surface, +}; + /** * Sets up a surface state structure to point at the given region. @@ -507,7 +505,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, static void brw_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, - unsigned int unit, GLboolean cached) + unsigned int unit) { GLcontext *ctx = &brw->intel.ctx; dri_bo *region_bo = NULL; @@ -567,12 +565,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw, ctx->Color.BlendEnabled); dri_bo_unreference(brw->wm.surf_bo[unit]); - brw->wm.surf_bo[unit] = NULL; - if (cached) - brw->wm.surf_bo[unit] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - NULL); + brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + NULL); if (brw->wm.surf_bo[unit] == NULL) { struct brw_surface_state surf; @@ -581,7 +578,27 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf.ss0.surface_format = key.surface_format; surf.ss0.surface_type = key.surface_type; - surf.ss1.base_addr = key.draw_offset; + if (key.tiling == I915_TILING_NONE) { + surf.ss1.base_addr = key.draw_offset; + } else { + uint32_t tile_offset = key.draw_offset % 4096; + + surf.ss1.base_addr = key.draw_offset - tile_offset; + + assert(BRW_IS_G4X(brw) || tile_offset == 0); + if (BRW_IS_G4X(brw)) { + if (key.tiling == I915_TILING_X) { + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 512 / 2; + } else { + surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 128 / 2; + } + } + } if (region_bo != NULL) surf.ss1.base_addr += region_bo->offset; /* reloc */ @@ -598,7 +615,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf.ss0.writedisable_alpha = !key.color_mask[3]; /* Key size will never match key size for textures, so we're safe. */ - brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, + BRW_SS_SURFACE, &key, sizeof(key), ®ion_bo, 1, &surf, sizeof(surf), @@ -611,7 +629,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], offsetof(struct brw_surface_state, ss1), region_bo, - key.draw_offset, + surf.ss1.base_addr, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } @@ -630,7 +648,7 @@ brw_wm_get_binding_table(struct brw_context *brw) assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, NULL); @@ -646,7 +664,7 @@ brw_wm_get_binding_table(struct brw_context *brw) else data[i] = 0; - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, data, data_size, @@ -682,27 +700,17 @@ static void prepare_wm_surfaces(struct brw_context *brw ) for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { brw_update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i], - i, - GL_FALSE); + i); } } else { - brw_update_renderbuffer_surface(brw, NULL, 0, GL_TRUE); + brw_update_renderbuffer_surface(brw, NULL, 0); } old_nr_surfaces = brw->wm.nr_surfaces; brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; - /* Update surface / buffer for fragment shader constant buffer */ - { - const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - fp->const_buffer = - brw_update_wm_constant_surface(ctx, surf, fp->const_buffer, - fp->program.Base.Parameters); - - brw->wm.nr_surfaces = surf + 1; - } + if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) + brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; /* Update surfaces for textures */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { @@ -735,100 +743,16 @@ static void prepare_wm_surfaces(struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; } - -/** - * Constructs the binding table for the VS surface state. - */ -static dri_bo * -brw_vs_get_binding_table(struct brw_context *brw) -{ - dri_bo *bind_bo; - - assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF); - - bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, brw->vs.nr_surfaces, - NULL); - - if (bind_bo == NULL) { - GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint); - uint32_t *data = malloc(data_size); - int i; - - for (i = 0; i < brw->vs.nr_surfaces; i++) - if (brw->vs.surf_bo[i]) - data[i] = brw->vs.surf_bo[i]->offset; - else - data[i] = 0; - - bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, brw->vs.nr_surfaces, - data, data_size, - NULL, NULL); - - /* Emit binding table relocations to surface state */ - for (i = 0; i < BRW_VS_MAX_SURF; i++) { - if (brw->vs.surf_bo[i] != NULL) { - dri_bo_emit_reloc(bind_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0, - i * sizeof(GLuint), - brw->vs.surf_bo[i]); - } - } - - free(data); - } - - return bind_bo; -} - - -/** - * Vertex shader surfaces. Just constant buffer for now. Could add vertex - * shader textures in the future. - */ -static void prepare_vs_surfaces(struct brw_context *brw ) -{ - GLcontext *ctx = &brw->intel.ctx; - - /* Update surface / buffer for vertex shader constant buffer */ - { - const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER; - struct brw_vertex_program *vp = - (struct brw_vertex_program *) brw->vertex_program; - vp->const_buffer = - brw_update_vs_constant_surface(ctx, surf, vp->const_buffer, - vp->program.Base.Parameters); - - brw->vs.nr_surfaces = 1; - } - - dri_bo_unreference(brw->vs.bind_bo); - brw->vs.bind_bo = brw_vs_get_binding_table(brw); - - if (1) - brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; -} - - -static void -prepare_surfaces(struct brw_context *brw) -{ - prepare_wm_surfaces(brw); - prepare_vs_surfaces(brw); -} - - const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM, - .brw = BRW_NEW_CONTEXT, + .mesa = (_NEW_COLOR | + _NEW_TEXTURE | + _NEW_BUFFERS), + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_WM_SURFACES), .cache = 0 }, - .prepare = prepare_surfaces, + .prepare = prepare_wm_surfaces, }; diff --git a/src/mesa/drivers/dri/i965/intel_generatemipmap.c b/src/mesa/drivers/dri/i965/intel_generatemipmap.c new file mode 120000 index 0000000000..4c6b37ada0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_generatemipmap.c @@ -0,0 +1 @@ +../intel/intel_generatemipmap.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c new file mode 120000 index 0000000000..cc4589f4d4 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -0,0 +1 @@ +../intel/intel_pixel_read.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 4919828131..75d315d82b 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -108,6 +108,8 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; } + assert(src->tiling != I915_TILING_Y); + assert(dst->tiling != I915_TILING_Y); #ifndef I915 if (src->tiling != I915_TILING_NONE) { CMD |= XY_SRC_TILED; @@ -175,66 +177,6 @@ intelCopyBuffer(const __DRIdrawablePrivate * dPriv, UNLOCK_HARDWARE(intel); } - - - -void -intelEmitFillBlit(struct intel_context *intel, - GLuint cpp, - GLshort dst_pitch, - dri_bo *dst_buffer, - GLuint dst_offset, - uint32_t dst_tiling, - GLshort x, GLshort y, - GLshort w, GLshort h, - GLuint color) -{ - GLuint BR13, CMD; - BATCH_LOCALS; - - dst_pitch *= cpp; - - switch (cpp) { - case 1: - BR13 = (0xF0 << 16); - CMD = XY_COLOR_BLT_CMD; - break; - case 2: - BR13 = (0xF0 << 16) | BR13_565; - CMD = XY_COLOR_BLT_CMD; - break; - case 4: - BR13 = (0xF0 << 16) | BR13_8888; - CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; - break; - default: - return; - } -#ifndef I915 - if (dst_tiling != I915_TILING_NONE) { - CMD |= XY_DST_TILED; - dst_pitch /= 4; - } -#endif - - DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", - __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); - - assert(w > 0); - assert(h > 0); - - BEGIN_BATCH(6, NO_LOOP_CLIPRECTS); - OUT_BATCH(CMD); - OUT_BATCH(BR13 | dst_pitch); - OUT_BATCH((y << 16) | x); - OUT_BATCH(((y + h) << 16) | (x + w)); - OUT_RELOC(dst_buffer, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - dst_offset); - OUT_BATCH(color); - ADVANCE_BATCH(); -} - static GLuint translate_raster_op(GLenum logicop) { switch(logicop) { @@ -261,7 +203,7 @@ static GLuint translate_raster_op(GLenum logicop) /* Copy BitBlt */ -void +GLboolean intelEmitCopyBlit(struct intel_context *intel, GLuint cpp, GLshort src_pitch, @@ -283,6 +225,19 @@ intelEmitCopyBlit(struct intel_context *intel, dri_bo *aper_array[3]; BATCH_LOCALS; + if (dst_tiling != I915_TILING_NONE) { + if (dst_offset & 4095) + return GL_FALSE; + if (dst_tiling == I915_TILING_Y) + return GL_FALSE; + } + if (src_tiling != I915_TILING_NONE) { + if (src_offset & 4095) + return GL_FALSE; + if (src_tiling == I915_TILING_Y) + return GL_FALSE; + } + /* do space/cliprects check before going any further */ do { aper_array[0] = intel->batch->buf; @@ -320,7 +275,7 @@ intelEmitCopyBlit(struct intel_context *intel, if (locked) UNLOCK_HARDWARE(intel); - return; + return GL_TRUE; } intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS); @@ -347,7 +302,7 @@ intelEmitCopyBlit(struct intel_context *intel, CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; break; default: - return; + return GL_FALSE; } #ifndef I915 @@ -362,7 +317,7 @@ intelEmitCopyBlit(struct intel_context *intel, #endif if (dst_y2 <= dst_y || dst_x2 <= dst_x) { - return; + return GL_TRUE; } assert(dst_x < dst_x2); @@ -384,6 +339,8 @@ intelEmitCopyBlit(struct intel_context *intel, ADVANCE_BATCH(); intel_batchbuffer_emit_mi_flush(intel->batch); + + return GL_TRUE; } @@ -596,7 +553,7 @@ intelClearWithBlit(GLcontext *ctx, GLbitfield mask) UNLOCK_HARDWARE(intel); } -void +GLboolean intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLuint cpp, GLubyte *src_bits, GLuint src_size, @@ -612,11 +569,18 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, int dwords = ALIGN(src_size, 8) / 4; uint32_t opcode, br13, blit_cmd; + if (dst_tiling != I915_TILING_NONE) { + if (dst_offset & 4095) + return GL_FALSE; + if (dst_tiling == I915_TILING_Y) + return GL_FALSE; + } + assert( logic_op - GL_CLEAR >= 0 ); assert( logic_op - GL_CLEAR < 0x10 ); if (w < 0 || h < 0) - return; + return GL_TRUE; dst_pitch *= cpp; @@ -673,4 +637,6 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, REFERENCES_CLIPRECTS ); intel_batchbuffer_emit_mi_flush(intel->batch); + + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h index 52065b13ed..152fa3f17b 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.h +++ b/src/mesa/drivers/dri/intel/intel_blit.h @@ -35,7 +35,8 @@ extern void intelCopyBuffer(const __DRIdrawablePrivate * dpriv, extern void intelClearWithBlit(GLcontext * ctx, GLbitfield mask); -extern void intelEmitCopyBlit(struct intel_context *intel, +GLboolean +intelEmitCopyBlit(struct intel_context *intel, GLuint cpp, GLshort src_pitch, dri_bo *src_buffer, @@ -50,16 +51,7 @@ extern void intelEmitCopyBlit(struct intel_context *intel, GLshort w, GLshort h, GLenum logicop ); -extern void intelEmitFillBlit(struct intel_context *intel, - GLuint cpp, - GLshort dst_pitch, - dri_bo *dst_buffer, - GLuint dst_offset, - uint32_t dst_tiling, - GLshort x, GLshort y, - GLshort w, GLshort h, GLuint color); - -void +GLboolean intelEmitImmediateColorExpandBlit(struct intel_context *intel, GLuint cpp, GLubyte *src_bits, GLuint src_size, diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index c31fe91ad6..aed0e45a28 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -35,6 +35,9 @@ #include "intel_batchbuffer.h" #include "intel_regions.h" +static GLboolean +intel_bufferobj_unmap(GLcontext * ctx, + GLenum target, struct gl_buffer_object *obj); /** Allocates a new dri_bo to store the data for the buffer object. */ static void @@ -100,7 +103,13 @@ intel_bufferobj_free(GLcontext * ctx, struct gl_buffer_object *obj) struct intel_buffer_object *intel_obj = intel_buffer_object(obj); assert(intel_obj); - assert(!obj->Pointer); /* Mesa should have unmapped it */ + + /* Buffer objects are automatically unmapped when deleting according + * to the spec, but Mesa doesn't do UnmapBuffer for us at context destroy + * (though it does if you call glDeleteBuffers) + */ + if (obj->Pointer) + intel_bufferobj_unmap(ctx, 0, obj); _mesa_free(intel_obj->sys_buffer); if (intel_obj->region) { @@ -291,14 +300,19 @@ intel_bufferobj_buffer(struct intel_context *intel, } if (intel_obj->buffer == NULL) { + void *sys_buffer = intel_obj->sys_buffer; + + /* only one of buffer and sys_buffer could be non-NULL */ intel_bufferobj_alloc_buffer(intel, intel_obj); + intel_obj->sys_buffer = NULL; + intel_bufferobj_subdata(&intel->ctx, GL_ARRAY_BUFFER_ARB, 0, intel_obj->Base.Size, - intel_obj->sys_buffer, + sys_buffer, &intel_obj->Base); - _mesa_free(intel_obj->sys_buffer); + _mesa_free(sys_buffer); intel_obj->sys_buffer = NULL; } @@ -306,15 +320,13 @@ intel_bufferobj_buffer(struct intel_context *intel, } void -intel_bufferobj_init(struct intel_context *intel) +intelInitBufferObjectFuncs(struct dd_function_table *functions) { - GLcontext *ctx = &intel->ctx; - - ctx->Driver.NewBufferObject = intel_bufferobj_alloc; - ctx->Driver.DeleteBuffer = intel_bufferobj_free; - ctx->Driver.BufferData = intel_bufferobj_data; - ctx->Driver.BufferSubData = intel_bufferobj_subdata; - ctx->Driver.GetBufferSubData = intel_bufferobj_get_subdata; - ctx->Driver.MapBuffer = intel_bufferobj_map; - ctx->Driver.UnmapBuffer = intel_bufferobj_unmap; + functions->NewBufferObject = intel_bufferobj_alloc; + functions->DeleteBuffer = intel_bufferobj_free; + functions->BufferData = intel_bufferobj_data; + functions->BufferSubData = intel_bufferobj_subdata; + functions->GetBufferSubData = intel_bufferobj_get_subdata; + functions->MapBuffer = intel_bufferobj_map; + functions->UnmapBuffer = intel_bufferobj_unmap; } diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h index 0431015631..8164407f07 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.h +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h @@ -60,7 +60,7 @@ dri_bo *intel_bufferobj_buffer(struct intel_context *intel, /* Hook the bufferobject implementation into mesa: */ -void intel_bufferobj_init(struct intel_context *intel); +void intelInitBufferObjectFuncs(struct dd_function_table *functions); @@ -72,10 +72,7 @@ void intel_bufferobj_init(struct intel_context *intel); static INLINE struct intel_buffer_object * intel_buffer_object(struct gl_buffer_object *obj) { - if (obj->Name) - return (struct intel_buffer_object *) obj; - else - return NULL; + return (struct intel_buffer_object *) obj; } /* Helpers for zerocopy image uploads. See also intel_regions.h: diff --git a/src/mesa/drivers/dri/intel/intel_buffers.c b/src/mesa/drivers/dri/intel/intel_buffers.c index d2fad9e4ea..e7357e78c5 100644 --- a/src/mesa/drivers/dri/intel/intel_buffers.c +++ b/src/mesa/drivers/dri/intel/intel_buffers.c @@ -157,7 +157,7 @@ intel_draw_buffer(GLcontext * ctx, struct gl_framebuffer *fb) /* Do this here, not core Mesa, since this function is called from * many places within the driver. */ - if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + if (ctx->NewState & _NEW_BUFFERS) { /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ _mesa_update_framebuffer(ctx); /* this updates the DrawBuffer's Width/Height if it's a FBO */ @@ -345,6 +345,23 @@ intelDrawBuffer(GLcontext * ctx, GLenum mode) static void intelReadBuffer(GLcontext * ctx, GLenum mode) { + if ((ctx->DrawBuffer != NULL) && (ctx->DrawBuffer->Name == 0)) { + struct intel_context *const intel = intel_context(ctx); + const GLboolean was_front_buffer_reading = + intel->is_front_buffer_reading; + + intel->is_front_buffer_reading = (mode == GL_FRONT_LEFT) + || (mode == GL_FRONT); + + /* If we weren't front-buffer reading before but we are now, make sure + * that the front-buffer has actually been allocated. + */ + if (!was_front_buffer_reading && intel->is_front_buffer_reading) { + intel_update_renderbuffers(intel->driContext, + intel->driContext->driDrawablePriv); + } + } + if (ctx->ReadBuffer == ctx->DrawBuffer) { /* This will update FBO completeness status. * A framebuffer will be incomplete if the GL_READ_BUFFER setting diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index 19f47632ac..273856fd2f 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -53,6 +53,7 @@ #include "intel_clear.h" #include "intel_fbo.h" #include "intel_pixel.h" +#include "intel_regions.h" #define FILE_DEBUG_FLAG DEBUG_BLIT @@ -133,7 +134,6 @@ intel_clear_tris(GLcontext *ctx, GLbitfield mask) BUFFER_BIT_STENCIL)) == 0); _mesa_PushAttrib(GL_COLOR_BUFFER_BIT | - GL_CURRENT_BIT | GL_DEPTH_BUFFER_BIT | GL_ENABLE_BIT | GL_POLYGON_BIT | @@ -312,7 +312,6 @@ static const char *buffer_names[] = { static void intelClear(GLcontext *ctx, GLbitfield mask) { - struct intel_context *intel = intel_context(ctx); const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask); GLbitfield tri_mask = 0; GLbitfield blit_mask = 0; @@ -340,7 +339,7 @@ intelClear(GLcontext *ctx, GLbitfield mask) = intel_get_rb_region(fb, BUFFER_STENCIL); if (stencilRegion) { /* have hw stencil */ - if (IS_965(intel->intelScreen->deviceID) || + if (stencilRegion->tiling == I915_TILING_Y || (ctx->Stencil.WriteMask[0] & 0xff) != 0xff) { /* We have to use the 3D engine if we're clearing a partial mask * of the stencil buffer, or if we're on a 965 which has a tiled @@ -357,9 +356,10 @@ intelClear(GLcontext *ctx, GLbitfield mask) /* HW depth */ if (mask & BUFFER_BIT_DEPTH) { + const struct intel_region *irb = intel_get_rb_region(fb, BUFFER_DEPTH); + /* clear depth with whatever method is used for stencil (see above) */ - if (IS_965(intel->intelScreen->deviceID) || - tri_mask & BUFFER_BIT_STENCIL) + if (irb->tiling == I915_TILING_Y || tri_mask & BUFFER_BIT_STENCIL) tri_mask |= BUFFER_BIT_DEPTH; else blit_mask |= BUFFER_BIT_DEPTH; diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 1222a9a8c6..21d13b862b 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -220,7 +220,9 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) struct intel_renderbuffer *stencil_rb; i = 0; - if ((intel->is_front_buffer_rendering || !intel_fb->color_rb[1]) + if ((intel->is_front_buffer_rendering || + intel->is_front_buffer_reading || + !intel_fb->color_rb[1]) && intel_fb->color_rb[0]) { attachments[i++] = __DRI_BUFFER_FRONT_LEFT; attachments[i++] = intel_bits_per_pixel(intel_fb->color_rb[0]); @@ -561,10 +563,14 @@ intelInitDriverFunctions(struct dd_function_table *functions) functions->CopyConvolutionFilter2D = _swrast_CopyConvolutionFilter2D; intelInitTextureFuncs(functions); + intelInitTextureImageFuncs(functions); + intelInitTextureSubImageFuncs(functions); + intelInitTextureCopyImageFuncs(functions); intelInitStateFuncs(functions); intelInitClearFuncs(functions); intelInitBufferFuncs(functions); intelInitPixelFuncs(functions); + intelInitBufferObjectFuncs(functions); } @@ -661,6 +667,13 @@ intelInitContext(struct intel_context *intel, _mesa_init_point(ctx); ctx->Const.MaxColorAttachments = 4; /* XXX FBO: review this */ + if (IS_965(intelScreen->deviceID)) { + if (MAX_WIDTH > 8192) + ctx->Const.MaxRenderbufferSize = 8192; + } else { + if (MAX_WIDTH > 2048) + ctx->Const.MaxRenderbufferSize = 2048; + } /* Initialize the software rasterizer and helper modules. */ _swrast_CreateContext(ctx); @@ -718,7 +731,6 @@ intelInitContext(struct intel_context *intel, intel->batch = intel_batchbuffer_alloc(intel); - intel_bufferobj_init(intel); intel_fbo_init(intel); if (intel->ctx.Mesa_DXTn) { @@ -728,6 +740,15 @@ intelInitContext(struct intel_context *intel, else if (driQueryOptionb(&intel->optionCache, "force_s3tc_enable")) { _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } + intel->use_texture_tiling = driQueryOptionb(&intel->optionCache, + "texture_tiling"); + if (intel->use_texture_tiling && + !intel->intelScreen->kernel_exec_fencing) { + fprintf(stderr, "No kernel support for execution fencing, " + "disabling texture tiling"); + intel->use_texture_tiling = GL_FALSE; + } + intel->use_early_z = driQueryOptionb(&intel->optionCache, "early_z"); intel->prim.primitive = ~0; @@ -789,13 +810,64 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) intel->prim.vb_bo = NULL; if (release_texture_heaps) { - /* This share group is about to go away, free our private - * texture object data. + /* Nothing is currently done here to free texture heaps; + * but we're not using the texture heap utilities, so I + * rather think we shouldn't. I've taken a look, and can't + * find any private texture data hanging around anywhere, but + * I'm not yet certain there isn't any at all... */ - if (INTEL_DEBUG & DEBUG_TEXTURE) + /* if (INTEL_DEBUG & DEBUG_TEXTURE) fprintf(stderr, "do something to free texture heaps\n"); + */ } + /* XXX In intelMakeCurrent() below, the context's static regions are + * referenced inside the frame buffer; it's listed as a hack, + * with a comment of "XXX FBO temporary fix-ups!", but + * as long as it's there, we should release the regions here. + * The do/while loop around the block is used to allow the + * "continue" statements inside the block to exit the block, + * to avoid many layers of "if" constructs. + */ + do { + __DRIdrawablePrivate * driDrawPriv = intel->driDrawable; + struct intel_framebuffer *intel_fb; + struct intel_renderbuffer *irbDepth, *irbStencil; + if (!driDrawPriv) { + /* We're already detached from the drawable; exit this block. */ + continue; + } + intel_fb = (struct intel_framebuffer *) driDrawPriv->driverPrivate; + if (!intel_fb) { + /* The frame buffer is already gone; exit this block. */ + continue; + } + irbDepth = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); + irbStencil = intel_get_renderbuffer(&intel_fb->Base, BUFFER_STENCIL); + + /* If the regions of the frame buffer still match the regions + * of the context, release them. If they've changed somehow, + * leave them alone. + */ + if (intel_fb->color_rb[0] && intel_fb->color_rb[0]->region == intel->front_region) { + intel_renderbuffer_set_region(intel_fb->color_rb[0], NULL); + } + if (intel_fb->color_rb[1] && intel_fb->color_rb[1]->region == intel->back_region) { + intel_renderbuffer_set_region(intel_fb->color_rb[1], NULL); + } + + if (irbDepth && irbDepth->region == intel->depth_region) { + intel_renderbuffer_set_region(irbDepth, NULL); + } + /* Usually, the stencil buffer is the same as the depth buffer; + * but they're handled separately in MakeCurrent, so we'll + * handle them separately here. + */ + if (irbStencil && irbStencil->region == intel->depth_region) { + intel_renderbuffer_set_region(irbStencil, NULL); + } + } while (0); + intel_region_release(&intel->front_region); intel_region_release(&intel->back_region); intel_region_release(&intel->depth_region); @@ -804,6 +876,8 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) /* free the Mesa context */ _mesa_free_context_data(&intel->ctx); + + } } @@ -832,7 +906,10 @@ intelMakeCurrent(__DRIcontextPrivate * driContextPriv, if (driDrawPriv != driReadPriv) intel_update_renderbuffers(driContextPriv, driReadPriv); } else { - /* XXX FBO temporary fix-ups! */ + /* XXX FBO temporary fix-ups! These are released in + * intelDextroyContext(), above. Changes here should be + * reflected there. + */ /* if the renderbuffers don't have regions, init them from the context */ struct intel_renderbuffer *irbDepth = intel_get_renderbuffer(&intel_fb->Base, BUFFER_DEPTH); diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index b5cf7e6648..36f33b6f31 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -161,12 +161,22 @@ struct intel_context struct { struct gl_fragment_program *bitmap_fp; struct gl_vertex_program *passthrough_vp; + struct gl_buffer_object *texcoord_vbo; struct gl_fragment_program *saved_fp; GLboolean saved_fp_enable; struct gl_vertex_program *saved_vp; GLboolean saved_vp_enable; + struct gl_fragment_program *tex2d_fp; + + GLboolean saved_texcoord_enable; + struct gl_buffer_object *saved_array_vbo, *saved_texcoord_vbo; + GLenum saved_texcoord_type; + GLsizei saved_texcoord_size, saved_texcoord_stride; + const void *saved_texcoord_ptr; + int saved_active_texture; + GLint saved_vp_x, saved_vp_y; GLsizei saved_vp_width, saved_vp_height; GLenum saved_matrix_mode; @@ -294,6 +304,17 @@ struct intel_context * easily. */ GLboolean is_front_buffer_rendering; + /** + * Track whether front-buffer is the current read target. + * + * This is closely associated with is_front_buffer_rendering, but may + * be set separately. The DRI2 fake front buffer must be referenced + * either way. + */ + GLboolean is_front_buffer_reading; + + GLboolean use_texture_tiling; + GLboolean use_early_z; drm_clip_rect_t fboRect; /**< cliprect for FBO rendering */ @@ -549,6 +570,9 @@ void intel_viewport(GLcontext * ctx, GLint x, GLint y, void intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable); +void i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region, + uint32_t buffer_id); + /*====================================================================== * Inline conversion functions. * These are better-typed than the macros used previously: diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 9ec1b4ec2f..0ff497fc2a 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -34,6 +34,7 @@ #define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters #define need_GL_ARB_shader_objects +#define need_GL_ARB_vertex_array_object #define need_GL_ARB_vertex_program #define need_GL_ARB_vertex_shader #define need_GL_ARB_window_pos @@ -48,6 +49,7 @@ #define need_GL_EXT_point_parameters #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side +#define need_GL_APPLE_vertex_array_object #define need_GL_ATI_separate_stencil #define need_GL_ATI_envmap_bumpmap #define need_GL_NV_point_sprite @@ -65,6 +67,7 @@ * i965_dri. */ static const struct dri_extension card_extensions[] = { + { "GL_ARB_half_float_pixel", NULL }, { "GL_ARB_multitexture", NULL }, { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, { "GL_ARB_texture_border_clamp", NULL }, @@ -75,6 +78,7 @@ static const struct dri_extension card_extensions[] = { { "GL_ARB_texture_env_dot3", NULL }, { "GL_ARB_texture_mirrored_repeat", NULL }, { "GL_ARB_texture_rectangle", NULL }, + { "GL_ARB_vertex_array_object", GL_ARB_vertex_array_object_functions}, { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, { "GL_ARB_window_pos", GL_ARB_window_pos_functions }, { "GL_EXT_blend_color", GL_EXT_blend_color_functions }, @@ -95,6 +99,7 @@ static const struct dri_extension card_extensions[] = { { "GL_EXT_texture_lod_bias", NULL }, { "GL_3DFX_texture_compression_FXT1", NULL }, { "GL_APPLE_client_storage", NULL }, + { "GL_APPLE_vertex_array_object", GL_APPLE_vertex_array_object_functions}, { "GL_MESA_pack_invert", NULL }, { "GL_MESA_ycbcr_texture", NULL }, { "GL_NV_blend_square", NULL }, diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 52647ddf8b..0ea413aee1 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -217,7 +217,8 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, DBG("Allocating %d x %d Intel RBO (pitch %d)\n", width, height, pitch); - irb->region = intel_region_alloc(intel, cpp, width, height, pitch, + irb->region = intel_region_alloc(intel, I915_TILING_NONE, + cpp, width, height, pitch, GL_TRUE); if (!irb->region) return GL_FALSE; /* out of memory? */ @@ -574,9 +575,10 @@ intel_render_texture(GLcontext * ctx, ASSERT(newImage); - if (newImage->Border != 0) { - /* Fallback on drawing to a texture with a border, which won't have a - * miptree. + intel_image = intel_texture_image(newImage); + if (!intel_image->mt) { + /* Fallback on drawing to a texture that doesn't have a miptree + * (has a border, width/height 0, etc.) */ _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); _mesa_render_texture(ctx, fb, att); @@ -607,7 +609,6 @@ intel_render_texture(GLcontext * ctx, irb->Base.RefCount); /* point the renderbufer's region to the texture image region */ - intel_image = intel_texture_image(newImage); if (irb->region != intel_image->mt->region) { if (irb->region) intel_region_release(&irb->region); @@ -679,6 +680,11 @@ intel_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) if (rb == NULL) continue; + if (irb == NULL) { + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + continue; + } + switch (irb->texformat->MesaFormat) { case MESA_FORMAT_ARGB8888: case MESA_FORMAT_RGB565: diff --git a/src/mesa/drivers/dri/intel/intel_generatemipmap.c b/src/mesa/drivers/dri/intel/intel_generatemipmap.c new file mode 100644 index 0000000000..b00f8019dd --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_generatemipmap.c @@ -0,0 +1,283 @@ +/* + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "main/glheader.h" +#include "main/enums.h" +#include "main/image.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/bufferobj.h" +#include "main/teximage.h" +#include "main/texenv.h" +#include "main/texobj.h" +#include "main/texstate.h" +#include "main/texparam.h" +#include "main/varray.h" +#include "main/attrib.h" +#include "main/enable.h" +#include "main/buffers.h" +#include "main/fbobject.h" +#include "main/framebuffer.h" +#include "main/renderbuffer.h" +#include "main/depth.h" +#include "main/hash.h" +#include "main/mipmap.h" +#include "main/blend.h" +#include "glapi/dispatch.h" +#include "swrast/swrast.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_pixel.h" +#include "intel_tex.h" +#include "intel_mipmap_tree.h" + +static const char *intel_fp_tex2d = + "!!ARBfp1.0\n" + "TEX result.color, fragment.texcoord[0], texture[0], 2D;\n" + "END\n"; + +static GLboolean +intel_generate_mipmap_level(GLcontext *ctx, GLuint tex_name, + int level, int width, int height) +{ + struct intel_context *intel = intel_context(ctx); + GLfloat vertices[4][2]; + GLint status; + + /* Set to source from the previous level */ + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, level - 1); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, level - 1); + + /* Set to draw into the current level */ + _mesa_FramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, + GL_COLOR_ATTACHMENT0_EXT, + GL_TEXTURE_2D, + tex_name, + level); + /* Choose to render to the color attachment. */ + _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0_EXT); + + status = _mesa_CheckFramebufferStatusEXT (GL_FRAMEBUFFER_EXT); + if (status != GL_FRAMEBUFFER_COMPLETE_EXT) + return GL_FALSE; + + intel_meta_set_passthrough_transform(intel); + + /* XXX: Doing it right would involve setting up the transformation to do + * 0-1 mapping or something, and not changing the vertex data. + */ + vertices[0][0] = 0; + vertices[0][1] = 0; + vertices[1][0] = width; + vertices[1][1] = 0; + vertices[2][0] = width; + vertices[2][1] = height; + vertices[3][0] = 0; + vertices[3][1] = height; + + _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices); + _mesa_Enable(GL_VERTEX_ARRAY); + intel_meta_set_default_texrect(intel); + + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + + intel_meta_restore_texcoords(intel); + intel_meta_restore_transform(intel); + + return GL_TRUE; +} + +static GLboolean +intel_generate_mipmap_2d(GLcontext *ctx, + GLenum target, + struct gl_texture_object *texObj) +{ + struct intel_context *intel = intel_context(ctx); + GLint old_active_texture; + int level, max_levels, start_level, end_level; + GLuint fb_name; + GLboolean success = GL_FALSE; + struct gl_framebuffer *saved_fbo = NULL; + + _mesa_PushAttrib(GL_ENABLE_BIT | GL_TEXTURE_BIT | + GL_CURRENT_BIT | GL_COLOR_BUFFER_BIT | + GL_DEPTH_BUFFER_BIT); + _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); + old_active_texture = ctx->Texture.CurrentUnit; + _mesa_reference_framebuffer(&saved_fbo, ctx->DrawBuffer); + + _mesa_Disable(GL_POLYGON_STIPPLE); + _mesa_Disable(GL_DEPTH_TEST); + _mesa_Disable(GL_STENCIL_TEST); + _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + _mesa_DepthMask(GL_FALSE); + + /* Bind the given texture to GL_TEXTURE_2D with linear filtering for our + * minification. + */ + _mesa_ActiveTextureARB(GL_TEXTURE0_ARB); + _mesa_Enable(GL_TEXTURE_2D); + _mesa_BindTexture(GL_TEXTURE_2D, texObj->Name); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, + GL_LINEAR_MIPMAP_NEAREST); + _mesa_TexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + + /* Bind the new renderbuffer to the color attachment point. */ + _mesa_GenFramebuffersEXT(1, &fb_name); + _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb_name); + + intel_meta_set_fragment_program(intel, &intel->meta.tex2d_fp, + intel_fp_tex2d); + intel_meta_set_passthrough_vertex_program(intel); + + max_levels = _mesa_max_texture_levels(ctx, texObj->Target); + start_level = texObj->BaseLevel; + end_level = texObj->MaxLevel; + + /* Loop generating level+1 from level. */ + for (level = start_level; level < end_level && level < max_levels - 1; level++) { + const struct gl_texture_image *srcImage; + int width, height; + + srcImage = _mesa_select_tex_image(ctx, texObj, target, level); + if (srcImage->Border != 0) + goto fail; + + width = srcImage->Width / 2; + if (width < 1) + width = 1; + height = srcImage->Height / 2; + if (height < 1) + height = 1; + + if (width == srcImage->Width && + height == srcImage->Height) { + /* Neither _mesa_max_texture_levels nor texObj->MaxLevel are the + * maximum texture level for the object, so break out when we've gone + * over the edge. + */ + break; + } + + /* Make sure that there's space allocated for the target level. + * We could skip this if there's already space allocated and save some + * time. + */ + _mesa_TexImage2D(GL_TEXTURE_2D, level + 1, srcImage->InternalFormat, + width, height, 0, + GL_RGBA, GL_UNSIGNED_INT, NULL); + + if (!intel_generate_mipmap_level(ctx, texObj->Name, level + 1, + width, height)) + goto fail; + } + + success = GL_TRUE; + +fail: + intel_meta_restore_fragment_program(intel); + intel_meta_restore_vertex_program(intel); + + _mesa_DeleteFramebuffersEXT(1, &fb_name); + _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); + if (saved_fbo) + _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, saved_fbo->Name); + _mesa_reference_framebuffer(&saved_fbo, NULL); + _mesa_PopClientAttrib(); + _mesa_PopAttrib(); + + return success; +} + + +/** + * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap + * level). + * + * The texture object's miptree must be mapped. + * + * It would be really nice if this was just called by Mesa whenever mipmaps + * needed to be regenerated, rather than us having to remember to do so in + * each texture image modification path. + * + * This function should also include an accelerated path. + */ +void +intel_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_texture_object *intelObj = intel_texture_object(texObj); + GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + int face, i; + + /* HW path */ + if (target == GL_TEXTURE_2D && + ctx->Extensions.EXT_framebuffer_object && + ctx->Extensions.ARB_fragment_program && + ctx->Extensions.ARB_vertex_program) { + GLboolean success; + + /* We'll be accessing this texture using GL entrypoints, which should + * be resilient against other access to this texture. + */ + _mesa_unlock_texture(ctx, texObj); + success = intel_generate_mipmap_2d(ctx, target, texObj); + _mesa_lock_texture(ctx, texObj); + + if (success) + return; + } + + /* SW path */ + intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); + _mesa_generate_mipmap(ctx, target, texObj); + intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); + + /* Update the level information in our private data in the new images, since + * it didn't get set as part of a normal TexImage path. + */ + for (face = 0; face < nr_faces; face++) { + for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { + struct intel_texture_image *intelImage; + + intelImage = intel_texture_image(texObj->Image[face][i]); + if (intelImage == NULL) + break; + + intelImage->level = i; + intelImage->face = face; + /* Unreference the miptree to signal that the new Data is a bare + * pointer from mesa. + */ + intel_miptree_release(intel, &intelImage->mt); + } + } +} diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 6e1e034e53..a0d8f0c27a 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -57,14 +57,16 @@ intel_miptree_create_internal(struct intel_context *intel, GLuint last_level, GLuint width0, GLuint height0, - GLuint depth0, GLuint cpp, GLuint compress_byte) + GLuint depth0, GLuint cpp, GLuint compress_byte, + uint32_t tiling) { GLboolean ok; struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1); - DBG("%s target %s format %s level %d..%d\n", __FUNCTION__, + DBG("%s target %s format %s level %d..%d <-- %p\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), - _mesa_lookup_enum_by_nr(internal_format), first_level, last_level); + _mesa_lookup_enum_by_nr(internal_format), + first_level, last_level, mt); mt->target = target_to_target(target); mt->internal_format = internal_format; @@ -80,15 +82,16 @@ intel_miptree_create_internal(struct intel_context *intel, #ifdef I915 if (IS_945(intel->intelScreen->deviceID)) - ok = i945_miptree_layout(intel, mt); + ok = i945_miptree_layout(intel, mt, tiling); else - ok = i915_miptree_layout(intel, mt); + ok = i915_miptree_layout(intel, mt, tiling); #else - ok = brw_miptree_layout(intel, mt); + ok = brw_miptree_layout(intel, mt, tiling); #endif if (!ok) { free(mt); + DBG("%s not okay - returning NULL\n", __FUNCTION__); return NULL; } @@ -98,6 +101,7 @@ intel_miptree_create_internal(struct intel_context *intel, struct intel_mipmap_tree * intel_miptree_create(struct intel_context *intel, GLenum target, + GLenum base_format, GLenum internal_format, GLuint first_level, GLuint last_level, @@ -107,10 +111,23 @@ intel_miptree_create(struct intel_context *intel, GLboolean expect_accelerated_upload) { struct intel_mipmap_tree *mt; + uint32_t tiling; + + if (intel->use_texture_tiling && compress_byte == 0 && + intel->intelScreen->kernel_exec_fencing) { + if (IS_965(intel->intelScreen->deviceID) && + (base_format == GL_DEPTH_COMPONENT || + base_format == GL_DEPTH_STENCIL_EXT)) + tiling = I915_TILING_Y; + else + tiling = I915_TILING_X; + } else + tiling = I915_TILING_NONE; mt = intel_miptree_create_internal(intel, target, internal_format, first_level, last_level, width0, - height0, depth0, cpp, compress_byte); + height0, depth0, cpp, compress_byte, + tiling); /* * pitch == 0 || height == 0 indicates the null texture */ @@ -118,6 +135,7 @@ intel_miptree_create(struct intel_context *intel, return NULL; mt->region = intel_region_alloc(intel, + tiling, mt->cpp, mt->pitch, mt->total_height, @@ -147,7 +165,8 @@ intel_miptree_create_for_region(struct intel_context *intel, mt = intel_miptree_create_internal(intel, target, internal_format, first_level, last_level, region->width, region->height, 1, - region->cpp, compress_byte); + region->cpp, compress_byte, + I915_TILING_NONE); if (!mt) return mt; #if 0 @@ -185,6 +204,7 @@ intel_miptree_create_for_region(struct intel_context *intel, int intel_miptree_pitch_align (struct intel_context *intel, struct intel_mipmap_tree *mt, + uint32_t tiling, int pitch) { #ifdef I915 @@ -205,6 +225,11 @@ int intel_miptree_pitch_align (struct intel_context *intel, pitch_align = 4; } + if (tiling == I915_TILING_X) + pitch_align = 512; + else if (tiling == I915_TILING_Y) + pitch_align = 128; + pitch = ALIGN(pitch * mt->cpp, pitch_align); #ifdef I915 @@ -475,6 +500,7 @@ intel_miptree_image_copy(struct intel_context *intel, const GLuint *dst_depth_offset = intel_miptree_depth_offsets(dst, level); const GLuint *src_depth_offset = intel_miptree_depth_offsets(src, level); GLuint i; + GLboolean success; if (dst->compressed) { GLuint alignment = intel_compressed_alignment(dst->internal_format); @@ -483,12 +509,26 @@ intel_miptree_image_copy(struct intel_context *intel, } for (i = 0; i < depth; i++) { - intel_region_copy(intel, - dst->region, dst_offset + dst_depth_offset[i], - 0, - 0, - src->region, src_offset + src_depth_offset[i], - 0, 0, width, height); + success = intel_region_copy(intel, + dst->region, dst_offset + dst_depth_offset[i], + 0, 0, + src->region, src_offset + src_depth_offset[i], + 0, 0, width, height, GL_COPY); + if (!success) { + GLubyte *src_ptr, *dst_ptr; + + src_ptr = intel_region_map(intel, src->region); + dst_ptr = intel_region_map(intel, dst->region); + + _mesa_copy_rect(dst_ptr + dst_offset + dst_depth_offset[i], + dst->cpp, + dst->pitch, + 0, 0, width, height, + src_ptr + src_offset + src_depth_offset[i], + src->pitch, + 0, 0); + intel_region_unmap(intel, src->region); + intel_region_unmap(intel, dst->region); + } } - } diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h index 4060b9df78..2a809cfda5 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h @@ -126,6 +126,7 @@ struct intel_mipmap_tree struct intel_mipmap_tree *intel_miptree_create(struct intel_context *intel, GLenum target, + GLenum base_format, GLenum internal_format, GLuint first_level, GLuint last_level, @@ -148,6 +149,7 @@ intel_miptree_create_for_region(struct intel_context *intel, int intel_miptree_pitch_align (struct intel_context *intel, struct intel_mipmap_tree *mt, + uint32_t tiling, int pitch); void intel_miptree_reference(struct intel_mipmap_tree **dst, @@ -218,10 +220,13 @@ void intel_miptree_image_copy(struct intel_context *intel, /* i915_mipmap_tree.c: */ GLboolean i915_miptree_layout(struct intel_context *intel, - struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *mt, + uint32_t tiling); GLboolean i945_miptree_layout(struct intel_context *intel, - struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *mt, + uint32_t tiling); GLboolean brw_miptree_layout(struct intel_context *intel, - struct intel_mipmap_tree *mt); + struct intel_mipmap_tree *mt, + uint32_t tiling); #endif diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c index fc0ac0b79c..da9ccb23f1 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.c +++ b/src/mesa/drivers/dri/intel/intel_pixel.c @@ -27,9 +27,12 @@ #include "main/enums.h" #include "main/state.h" +#include "main/bufferobj.h" #include "main/context.h" #include "main/enable.h" #include "main/matrix.h" +#include "main/texstate.h" +#include "main/varray.h" #include "main/viewport.h" #include "swrast/swrast.h" #include "shader/arbprogram.h" @@ -334,6 +337,85 @@ intel_meta_restore_fragment_program(struct intel_context *intel) _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); } +static const float default_texcoords[4][2] = { { 0.0, 0.0 }, + { 1.0, 0.0 }, + { 1.0, 1.0 }, + { 0.0, 1.0 } }; + +void +intel_meta_set_default_texrect(struct intel_context *intel) +{ + GLcontext *ctx = &intel->ctx; + struct gl_client_array *old_texcoord_array; + + intel->meta.saved_active_texture = ctx->Texture.CurrentUnit; + if (intel->meta.saved_array_vbo == NULL) { + _mesa_reference_buffer_object(ctx, &intel->meta.saved_array_vbo, + ctx->Array.ArrayBufferObj); + } + + old_texcoord_array = &ctx->Array.ArrayObj->TexCoord[0]; + intel->meta.saved_texcoord_type = old_texcoord_array->Type; + intel->meta.saved_texcoord_size = old_texcoord_array->Size; + intel->meta.saved_texcoord_stride = old_texcoord_array->Stride; + intel->meta.saved_texcoord_enable = old_texcoord_array->Enabled; + intel->meta.saved_texcoord_ptr = old_texcoord_array->Ptr; + _mesa_reference_buffer_object(ctx, &intel->meta.saved_texcoord_vbo, + old_texcoord_array->BufferObj); + + _mesa_ClientActiveTextureARB(GL_TEXTURE0); + + if (intel->meta.texcoord_vbo == NULL) { + GLuint vbo_name; + + _mesa_GenBuffersARB(1, &vbo_name); + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, vbo_name); + _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(default_texcoords), + default_texcoords, GL_STATIC_DRAW_ARB); + _mesa_reference_buffer_object(ctx, &intel->meta.texcoord_vbo, + ctx->Array.ArrayBufferObj); + } else { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, + intel->meta.texcoord_vbo->Name); + } + _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), NULL); + + _mesa_Enable(GL_TEXTURE_COORD_ARRAY); +} + +void +intel_meta_restore_texcoords(struct intel_context *intel) +{ + GLcontext *ctx = &intel->ctx; + + /* Restore the old TexCoordPointer */ + if (intel->meta.saved_texcoord_vbo) { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, + intel->meta.saved_texcoord_vbo->Name); + _mesa_reference_buffer_object(ctx, &intel->meta.saved_texcoord_vbo, NULL); + } else { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + } + + _mesa_TexCoordPointer(intel->meta.saved_texcoord_size, + intel->meta.saved_texcoord_type, + intel->meta.saved_texcoord_stride, + intel->meta.saved_texcoord_ptr); + if (!intel->meta.saved_texcoord_enable) + _mesa_Disable(GL_TEXTURE_COORD_ARRAY); + + _mesa_ClientActiveTextureARB(GL_TEXTURE0 + + intel->meta.saved_active_texture); + + if (intel->meta.saved_array_vbo) { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, + intel->meta.saved_array_vbo->Name); + _mesa_reference_buffer_object(ctx, &intel->meta.saved_array_vbo, NULL); + } else { + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + } +} + void intelInitPixelFuncs(struct dd_function_table *functions) { @@ -342,10 +424,8 @@ intelInitPixelFuncs(struct dd_function_table *functions) functions->Bitmap = intelBitmap; functions->CopyPixels = intelCopyPixels; functions->DrawPixels = intelDrawPixels; -#ifdef I915 - functions->ReadPixels = intelReadPixels; -#endif } + functions->ReadPixels = intelReadPixels; } void @@ -355,5 +435,7 @@ intel_free_pixel_state(struct intel_context *intel) _mesa_reference_vertprog(ctx, &intel->meta.passthrough_vp, NULL); _mesa_reference_fragprog(ctx, &intel->meta.bitmap_fp, NULL); + _mesa_reference_fragprog(ctx, &intel->meta.tex2d_fp, NULL); + _mesa_reference_buffer_object(ctx, &intel->meta.texcoord_vbo, NULL); } diff --git a/src/mesa/drivers/dri/intel/intel_pixel.h b/src/mesa/drivers/dri/intel/intel_pixel.h index cb41fa182c..6acf0813c8 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.h +++ b/src/mesa/drivers/dri/intel/intel_pixel.h @@ -40,6 +40,9 @@ void intel_meta_set_fragment_program(struct intel_context *intel, const char *prog_string); void intel_meta_restore_fragment_program(struct intel_context *intel); void intel_free_pixel_state(struct intel_context *intel); +void intel_meta_set_default_texrect(struct intel_context *intel); +void intel_meta_set_default_texrect(struct intel_context *intel); +void intel_meta_restore_texcoords(struct intel_context *intel); GLboolean intel_check_blit_fragment_ops(GLcontext * ctx, GLboolean src_alpha_is_one); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index a2ccae1b7d..e678cd2c26 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -194,7 +194,7 @@ do_blit_bitmap( GLcontext *ctx, struct gl_framebuffer *fb = ctx->DrawBuffer; GLfloat tmpColor[4]; GLubyte ubcolor[4]; - GLuint color8888, color565; + GLuint color; unsigned int num_cliprects; drm_clip_rect_t *cliprects; int x_off, y_off; @@ -232,8 +232,11 @@ do_blit_bitmap( GLcontext *ctx, UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[2], tmpColor[2]); UNCLAMPED_FLOAT_TO_UBYTE(ubcolor[3], tmpColor[3]); - color8888 = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1], ubcolor[2], ubcolor[3]); - color565 = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]); + if (dst->cpp == 2) + color = INTEL_PACKCOLOR565(ubcolor[0], ubcolor[1], ubcolor[2]); + else + color = INTEL_PACKCOLOR8888(ubcolor[0], ubcolor[1], + ubcolor[2], ubcolor[3]); if (!intel_check_blit_fragment_ops(ctx, tmpColor[3] == 1.0F)) return GL_FALSE; @@ -307,21 +310,21 @@ do_blit_bitmap( GLcontext *ctx, fb->Name == 0 ? GL_TRUE : GL_FALSE) == 0) continue; - /* - */ - intelEmitImmediateColorExpandBlit( intel, - dst->cpp, - (GLubyte *)stipple, - sz, - (dst->cpp == 2) ? color565 : color8888, - dst->pitch, - dst->buffer, - 0, - dst->tiling, - box_x + px, - box_y + py, - w, h, - logic_op); + if (!intelEmitImmediateColorExpandBlit(intel, + dst->cpp, + (GLubyte *)stipple, + sz, + color, + dst->pitch, + dst->buffer, + 0, + dst->tiling, + box_x + px, + box_y + py, + w, h, + logic_op)) { + return GL_FALSE; + } } } } @@ -360,7 +363,6 @@ intel_texture_bitmap(GLcontext * ctx, "END\n"; GLuint texname; GLfloat vertices[4][4]; - GLfloat texcoords[4][2]; GLint old_active_texture; GLubyte *unpacked_bitmap; GLubyte *a8_bitmap; @@ -493,22 +495,12 @@ intel_texture_bitmap(GLcontext * ctx, vertices[3][2] = dst_z; vertices[3][3] = 1.0; - texcoords[0][0] = 0.0; - texcoords[0][1] = 0.0; - texcoords[1][0] = 1.0; - texcoords[1][1] = 0.0; - texcoords[2][0] = 1.0; - texcoords[2][1] = 1.0; - texcoords[3][0] = 0.0; - texcoords[3][1] = 1.0; - _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices); - _mesa_ClientActiveTextureARB(GL_TEXTURE0); - _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); - _mesa_Enable(GL_TEXTURE_COORD_ARRAY); + intel_meta_set_default_texrect(intel); _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + intel_meta_restore_texcoords(intel); intel_meta_restore_transform(intel); intel_meta_restore_fragment_program(intel); intel_meta_restore_vertex_program(intel); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_copy.c b/src/mesa/drivers/dri/intel/intel_pixel_copy.c index d50dd68092..f523d3eead 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_copy.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_copy.c @@ -362,14 +362,16 @@ do_blit_copypixels(GLcontext * ctx, &clip_x, &clip_y, &clip_w, &clip_h)) continue; - intelEmitCopyBlit(intel, dst->cpp, - src->pitch, src->buffer, 0, src->tiling, - dst->pitch, dst->buffer, 0, dst->tiling, - clip_x + delta_x, clip_y + delta_y, /* srcx, srcy */ - clip_x, clip_y, /* dstx, dsty */ - clip_w, clip_h, - ctx->Color.ColorLogicOpEnabled ? - ctx->Color.LogicOp : GL_COPY); + if (!intel_region_copy(intel, + dst, 0, clip_x, clip_y, + src, 0, clip_x + delta_x, clip_y + delta_y, + clip_w, clip_h, + ctx->Color.ColorLogicOpEnabled ? + ctx->Color.LogicOp : GL_COPY)) { + DBG("%s: blit failure\n", __FUNCTION__); + UNLOCK_HARDWARE(intel); + return GL_FALSE; + } } } out: diff --git a/src/mesa/drivers/dri/intel/intel_pixel_draw.c b/src/mesa/drivers/dri/intel/intel_pixel_draw.c index d80069dd58..d79d625f77 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_draw.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_draw.c @@ -70,7 +70,6 @@ intel_texture_drawpixels(GLcontext * ctx, struct intel_context *intel = intel_context(ctx); GLuint texname; GLfloat vertices[4][4]; - GLfloat texcoords[4][2]; GLfloat z; GLint old_active_texture; GLenum internalFormat; @@ -177,22 +176,13 @@ intel_texture_drawpixels(GLcontext * ctx, vertices[3][2] = z; vertices[3][3] = 1.0; - texcoords[0][0] = 0.0; - texcoords[0][1] = 0.0; - texcoords[1][0] = 1.0; - texcoords[1][1] = 0.0; - texcoords[2][0] = 1.0; - texcoords[2][1] = 1.0; - texcoords[3][0] = 0.0; - texcoords[3][1] = 1.0; - _mesa_VertexPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &vertices); - _mesa_ClientActiveTextureARB(GL_TEXTURE0); - _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); - _mesa_Enable(GL_TEXTURE_COORD_ARRAY); + intel_meta_set_default_texrect(intel); + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + intel_meta_restore_texcoords(intel); intel_meta_restore_transform(intel); _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); @@ -216,7 +206,6 @@ intel_stencil_drawpixels(GLcontext * ctx, struct intel_context *intel = intel_context(ctx); GLuint texname, rb_name, fb_name, old_fb_name; GLfloat vertices[4][2]; - GLfloat texcoords[4][2]; struct intel_renderbuffer *irb; struct intel_renderbuffer *depth_irb; struct gl_renderbuffer *rb; @@ -359,7 +348,6 @@ intel_stencil_drawpixels(GLcontext * ctx, _mesa_free(stencil_pixels); intel_meta_set_passthrough_transform(intel); - vertices[0][0] = x; vertices[0][1] = y; vertices[1][0] = x + width * ctx->Pixel.ZoomX; @@ -369,22 +357,13 @@ intel_stencil_drawpixels(GLcontext * ctx, vertices[3][0] = x; vertices[3][1] = y + height * ctx->Pixel.ZoomY; - texcoords[0][0] = 0.0; - texcoords[0][1] = 0.0; - texcoords[1][0] = 1.0; - texcoords[1][1] = 0.0; - texcoords[2][0] = 1.0; - texcoords[2][1] = 1.0; - texcoords[3][0] = 0.0; - texcoords[3][1] = 1.0; - _mesa_VertexPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &vertices); - _mesa_ClientActiveTextureARB(GL_TEXTURE0); - _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), &texcoords); _mesa_Enable(GL_VERTEX_ARRAY); - _mesa_Enable(GL_TEXTURE_COORD_ARRAY); + intel_meta_set_default_texrect(intel); + _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4); + intel_meta_restore_texcoords(intel); intel_meta_restore_transform(intel); _mesa_ActiveTextureARB(GL_TEXTURE0_ARB + old_active_texture); diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c b/src/mesa/drivers/dri/intel/intel_pixel_read.c new file mode 100644 index 0000000000..8713463ace --- /dev/null +++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c @@ -0,0 +1,324 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/glheader.h" +#include "main/enums.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "main/image.h" +#include "main/bufferobj.h" +#include "main/state.h" +#include "swrast/swrast.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_blit.h" +#include "intel_buffers.h" +#include "intel_regions.h" +#include "intel_pixel.h" +#include "intel_buffer_objects.h" + +/* For many applications, the new ability to pull the source buffers + * back out of the GTT and then do the packing/conversion operations + * in software will be as much of an improvement as trying to get the + * blitter and/or texture engine to do the work. + * + * This step is gated on private backbuffers. + * + * Obviously the frontbuffer can't be pulled back, so that is either + * an argument for blit/texture readpixels, or for blitting to a + * temporary and then pulling that back. + * + * When the destination is a pbo, however, it's not clear if it is + * ever going to be pulled to main memory (though the access param + * will be a good hint). So it sounds like we do want to be able to + * choose between blit/texture implementation on the gpu and pullback + * and cpu-based copying. + * + * Unless you can magically turn client memory into a PBO for the + * duration of this call, there will be a cpu-based copying step in + * any case. + */ + + +static GLboolean +do_texture_readpixels(GLcontext * ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, + struct intel_region *dest_region) +{ +#if 0 + struct intel_context *intel = intel_context(ctx); + intelScreenPrivate *screen = intel->intelScreen; + GLint pitch = pack->RowLength ? pack->RowLength : width; + __DRIdrawablePrivate *dPriv = intel->driDrawable; + int textureFormat; + GLenum glTextureFormat; + int destFormat, depthFormat, destPitch; + drm_clip_rect_t tmp; + + if (INTEL_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); + + + if (ctx->_ImageTransferState || + pack->SwapBytes || pack->LsbFirst || !pack->Invert) { + if (INTEL_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: check_color failed\n", __FUNCTION__); + return GL_FALSE; + } + + intel->vtbl.meta_texrect_source(intel, intel_readbuf_region(intel)); + + if (!intel->vtbl.meta_render_dest(intel, dest_region, type, format)) { + if (INTEL_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: couldn't set dest %s/%s\n", + __FUNCTION__, + _mesa_lookup_enum_by_nr(type), + _mesa_lookup_enum_by_nr(format)); + return GL_FALSE; + } + + LOCK_HARDWARE(intel); + + if (intel->driDrawable->numClipRects) { + intel->vtbl.install_meta_state(intel); + intel->vtbl.meta_no_depth_write(intel); + intel->vtbl.meta_no_stencil_write(intel); + + if (!driClipRectToFramebuffer(ctx->ReadBuffer, &x, &y, &width, &height)) { + UNLOCK_HARDWARE(intel); + SET_STATE(i830, state); + if (INTEL_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s: cliprect failed\n", __FUNCTION__); + return GL_TRUE; + } + + y = dPriv->h - y - height; + x += dPriv->x; + y += dPriv->y; + + + /* Set the frontbuffer up as a large rectangular texture. + */ + intel->vtbl.meta_tex_rect_source(intel, src_region, textureFormat); + + + intel->vtbl.meta_texture_blend_replace(i830, glTextureFormat); + + + /* Set the 3d engine to draw into the destination region: + */ + + intel->vtbl.meta_draw_region(intel, dest_region); + intel->vtbl.meta_draw_format(intel, destFormat, depthFormat); /* ?? */ + + + /* Draw a single quad, no cliprects: + */ + intel->vtbl.meta_disable_cliprects(intel); + + intel->vtbl.draw_quad(intel, + 0, width, 0, height, + 0x00ff00ff, x, x + width, y, y + height); + + intel->vtbl.leave_meta_state(intel); + } + UNLOCK_HARDWARE(intel); + + intel_region_wait_fence(ctx, dest_region); /* required by GL */ + return GL_TRUE; +#endif + + return GL_FALSE; +} + + + + +static GLboolean +do_blit_readpixels(GLcontext * ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, GLvoid * pixels) +{ + struct intel_context *intel = intel_context(ctx); + struct intel_region *src = intel_readbuf_region(intel); + struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj); + GLuint dst_offset; + GLuint rowLength; + + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s\n", __FUNCTION__); + + if (!src) + return GL_FALSE; + + if (pack->BufferObj->Name) { + /* XXX This validation should be done by core mesa: + */ + if (!_mesa_validate_pbo_access(2, pack, width, height, 1, + format, type, pixels)) { + _mesa_error(ctx, GL_INVALID_OPERATION, "glDrawPixels"); + return GL_TRUE; + } + } + else { + /* PBO only for now: + */ + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - not PBO\n", __FUNCTION__); + return GL_FALSE; + } + + + if (ctx->_ImageTransferState || + !intel_check_blit_format(src, format, type)) { + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - bad format for blit\n", __FUNCTION__); + return GL_FALSE; + } + + if (pack->Alignment != 1 || pack->SwapBytes || pack->LsbFirst) { + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s: bad packing params\n", __FUNCTION__); + return GL_FALSE; + } + + if (pack->RowLength > 0) + rowLength = pack->RowLength; + else + rowLength = width; + + if (pack->Invert) { + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s: MESA_PACK_INVERT not done yet\n", __FUNCTION__); + return GL_FALSE; + } + else { + rowLength = -rowLength; + } + + /* XXX 64-bit cast? */ + dst_offset = (GLuint) _mesa_image_address(2, pack, pixels, width, height, + format, type, 0, 0, 0); + + + /* Although the blits go on the command buffer, need to do this and + * fire with lock held to guarentee cliprects are correct. + */ + intelFlush(&intel->ctx); + LOCK_HARDWARE(intel); + + if (intel->driDrawable->numClipRects) { + GLboolean all = (width * height * src->cpp == dst->Base.Size && + x == 0 && dst_offset == 0); + + dri_bo *dst_buffer = intel_bufferobj_buffer(intel, dst, + all ? INTEL_WRITE_FULL : + INTEL_WRITE_PART); + __DRIdrawablePrivate *dPriv = intel->driDrawable; + int nbox = dPriv->numClipRects; + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t rect; + drm_clip_rect_t src_rect; + int i; + + src_rect.x1 = dPriv->x + x; + src_rect.y1 = dPriv->y + dPriv->h - (y + height); + src_rect.x2 = src_rect.x1 + width; + src_rect.y2 = src_rect.y1 + height; + + + + for (i = 0; i < nbox; i++) { + if (!intel_intersect_cliprects(&rect, &src_rect, &box[i])) + continue; + + if (!intelEmitCopyBlit(intel, + src->cpp, + src->pitch, src->buffer, 0, src->tiling, + rowLength, dst_buffer, dst_offset, GL_FALSE, + rect.x1, + rect.y1, + rect.x1 - src_rect.x1, + rect.y2 - src_rect.y2, + rect.x2 - rect.x1, rect.y2 - rect.y1, + GL_COPY)) { + UNLOCK_HARDWARE(intel); + return GL_FALSE; + } + } + } + UNLOCK_HARDWARE(intel); + + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s - DONE\n", __FUNCTION__); + + return GL_TRUE; +} + +void +intelReadPixels(GLcontext * ctx, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, GLvoid * pixels) +{ + if (INTEL_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "%s\n", __FUNCTION__); + + intelFlush(ctx); + +#ifdef I915 + if (do_blit_readpixels + (ctx, x, y, width, height, format, type, pack, pixels)) + return; + + if (do_texture_readpixels + (ctx, x, y, width, height, format, type, pack, pixels)) + return; +#else + (void)do_blit_readpixels; + (void)do_texture_readpixels; +#endif + + if (INTEL_DEBUG & DEBUG_PIXEL) + _mesa_printf("%s: fallback to swrast\n", __FUNCTION__); + + /* Update Mesa state before calling down into _swrast_ReadPixels, as + * the spans code requires the computed buffer states to be up to date, + * but _swrast_ReadPixels only updates Mesa state after setting up + * the spans code. + */ + + if (ctx->NewState) + _mesa_update_state(ctx); + + _swrast_ReadPixels(ctx, x, y, width, height, format, type, pack, pixels); +} diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 57ac8f0cc1..d19f1bae34 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -189,6 +189,19 @@ #define S7_DEPTH_OFFSET_CONST_MASK ~0 +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + /* Primitive dispatch on 830-945 */ #define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) #define PRIM_INDIRECT (1<<23) diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index 0aa5b8c02c..4ce7f12e40 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -52,17 +52,75 @@ #define FILE_DEBUG_FLAG DEBUG_REGION +/* This should be set to the maximum backtrace size desired. + * Set it to 0 to disable backtrace debugging. + */ +#define DEBUG_BACKTRACE_SIZE 0 + +#if DEBUG_BACKTRACE_SIZE == 0 +/* Use the standard debug output */ +#define _DBG(...) DBG(__VA_ARGS__) +#else +/* Use backtracing debug output */ +#define _DBG(...) {debug_backtrace(); DBG(__VA_ARGS__);} + +/* Backtracing debug support */ +#include <execinfo.h> + +static void +debug_backtrace(void) +{ + void *trace[DEBUG_BACKTRACE_SIZE]; + char **strings = NULL; + int traceSize; + register int i; + + traceSize = backtrace(trace, DEBUG_BACKTRACE_SIZE); + strings = backtrace_symbols(trace, traceSize); + if (strings == NULL) { + DBG("no backtrace:"); + return; + } + + /* Spit out all the strings with a colon separator. Ignore + * the first, since we don't really care about the call + * to debug_backtrace() itself. Skip until the final "/" in + * the trace to avoid really long lines. + */ + for (i = 1; i < traceSize; i++) { + char *p = strings[i], *slash = strings[i]; + while (*p) { + if (*p++ == '/') { + slash = p; + } + } + + DBG("%s:", slash); + } + + /* Free up the memory, and we're done */ + free(strings); +} + +#endif + + + /* XXX: Thread safety? */ GLubyte * intel_region_map(struct intel_context *intel, struct intel_region *region) { - DBG("%s\n", __FUNCTION__); + _DBG("%s %p\n", __FUNCTION__, region); if (!region->map_refcount++) { if (region->pbo) intel_region_cow(intel, region); - dri_bo_map(region->buffer, GL_TRUE); + if (region->tiling != I915_TILING_NONE && + intel->intelScreen->kernel_exec_fencing) + drm_intel_gem_bo_map_gtt(region->buffer); + else + dri_bo_map(region->buffer, GL_TRUE); region->map = region->buffer->virtual; } @@ -72,9 +130,13 @@ intel_region_map(struct intel_context *intel, struct intel_region *region) void intel_region_unmap(struct intel_context *intel, struct intel_region *region) { - DBG("%s\n", __FUNCTION__); + _DBG("%s %p\n", __FUNCTION__, region); if (!--region->map_refcount) { - dri_bo_unmap(region->buffer); + if (region->tiling != I915_TILING_NONE && + intel->intelScreen->kernel_exec_fencing) + drm_intel_gem_bo_unmap_gtt(region->buffer); + else + dri_bo_unmap(region->buffer); region->map = NULL; } } @@ -87,10 +149,10 @@ intel_region_alloc_internal(struct intel_context *intel, { struct intel_region *region; - DBG("%s\n", __FUNCTION__); - - if (buffer == NULL) + if (buffer == NULL) { + _DBG("%s <-- NULL\n", __FUNCTION__); return NULL; + } region = calloc(sizeof(*region), 1); region->cpp = cpp; @@ -104,15 +166,18 @@ intel_region_alloc_internal(struct intel_context *intel, region->tiling = I915_TILING_NONE; region->bit_6_swizzle = I915_BIT_6_SWIZZLE_NONE; + _DBG("%s <-- %p\n", __FUNCTION__, region); return region; } struct intel_region * intel_region_alloc(struct intel_context *intel, + uint32_t tiling, GLuint cpp, GLuint width, GLuint height, GLuint pitch, GLboolean expect_accelerated_upload) { dri_bo *buffer; + struct intel_region *region; if (expect_accelerated_upload) { buffer = drm_intel_bo_alloc_for_render(intel->bufmgr, "region", @@ -122,7 +187,16 @@ intel_region_alloc(struct intel_context *intel, pitch * cpp * height, 64); } - return intel_region_alloc_internal(intel, cpp, width, height, pitch, buffer); + region = intel_region_alloc_internal(intel, cpp, width, height, + pitch, buffer); + + if (tiling != I915_TILING_NONE) { + assert(((pitch * cpp) & 127) == 0); + drm_intel_bo_set_tiling(buffer, &tiling, pitch * cpp); + drm_intel_bo_get_tiling(buffer, ®ion->tiling, ®ion->bit_6_swizzle); + } + + return region; } struct intel_region * @@ -158,7 +232,7 @@ void intel_region_reference(struct intel_region **dst, struct intel_region *src) { if (src) - DBG("%s %p %d\n", __FUNCTION__, src, src->refcount); + _DBG("%s %p %d\n", __FUNCTION__, src, src->refcount); assert(*dst == NULL); if (src) { @@ -172,10 +246,12 @@ intel_region_release(struct intel_region **region_handle) { struct intel_region *region = *region_handle; - if (region == NULL) + if (region == NULL) { + _DBG("%s NULL\n", __FUNCTION__); return; + } - DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); + _DBG("%s %p %d\n", __FUNCTION__, region, region->refcount - 1); ASSERT(region->refcount > 0); region->refcount--; @@ -251,7 +327,7 @@ intel_region_data(struct intel_context *intel, { GLboolean locked = GL_FALSE; - DBG("%s\n", __FUNCTION__); + _DBG("%s\n", __FUNCTION__); if (intel == NULL) return; @@ -284,19 +360,20 @@ intel_region_data(struct intel_context *intel, /* Copy rectangular sub-regions. Need better logic about when to * push buffers into AGP - will currently do so whenever possible. */ -void +GLboolean intel_region_copy(struct intel_context *intel, struct intel_region *dst, GLuint dst_offset, GLuint dstx, GLuint dsty, struct intel_region *src, GLuint src_offset, - GLuint srcx, GLuint srcy, GLuint width, GLuint height) + GLuint srcx, GLuint srcy, GLuint width, GLuint height, + GLenum logicop) { - DBG("%s\n", __FUNCTION__); + _DBG("%s\n", __FUNCTION__); if (intel == NULL) - return; + return GL_FALSE; if (dst->pbo) { if (dstx == 0 && @@ -308,41 +385,12 @@ intel_region_copy(struct intel_context *intel, assert(src->cpp == dst->cpp); - intelEmitCopyBlit(intel, - dst->cpp, - src->pitch, src->buffer, src_offset, src->tiling, - dst->pitch, dst->buffer, dst_offset, dst->tiling, - srcx, srcy, dstx, dsty, width, height, - GL_COPY); -} - -/* Fill a rectangular sub-region. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -void -intel_region_fill(struct intel_context *intel, - struct intel_region *dst, - GLuint dst_offset, - GLuint dstx, GLuint dsty, - GLuint width, GLuint height, GLuint color) -{ - DBG("%s\n", __FUNCTION__); - - if (intel == NULL) - return; - - if (dst->pbo) { - if (dstx == 0 && - dsty == 0 && width == dst->pitch && height == dst->height) - intel_region_release_pbo(intel, dst); - else - intel_region_cow(intel, dst); - } - - intelEmitFillBlit(intel, - dst->cpp, - dst->pitch, dst->buffer, dst_offset, dst->tiling, - dstx, dsty, width, height, color); + return intelEmitCopyBlit(intel, + dst->cpp, + src->pitch, src->buffer, src_offset, src->tiling, + dst->pitch, dst->buffer, dst_offset, dst->tiling, + srcx, srcy, dstx, dsty, width, height, + logicop); } /* Attach to a pbo, discarding our data. Effectively zero-copy upload @@ -353,9 +401,13 @@ intel_region_attach_pbo(struct intel_context *intel, struct intel_region *region, struct intel_buffer_object *pbo) { + dri_bo *buffer; + if (region->pbo == pbo) return; + _DBG("%s %p %p\n", __FUNCTION__, region, pbo); + /* If there is already a pbo attached, break the cow tie now. * Don't call intel_region_release_pbo() as that would * unnecessarily allocate a new buffer we would have to immediately @@ -371,10 +423,13 @@ intel_region_attach_pbo(struct intel_context *intel, region->buffer = NULL; } + /* make sure pbo has a buffer of its own */ + buffer = intel_bufferobj_buffer(intel, pbo, INTEL_WRITE_FULL); + region->pbo = pbo; region->pbo->region = region; - dri_bo_reference(pbo->buffer); - region->buffer = pbo->buffer; + dri_bo_reference(buffer); + region->buffer = buffer; } @@ -385,6 +440,7 @@ void intel_region_release_pbo(struct intel_context *intel, struct intel_region *region) { + _DBG("%s %p\n", __FUNCTION__, region); assert(region->buffer == region->pbo->buffer); region->pbo->region = NULL; region->pbo = NULL; @@ -412,7 +468,7 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) assert(region->cpp * region->pitch * region->height == pbo->Base.Size); - DBG("%s (%d bytes)\n", __FUNCTION__, pbo->Base.Size); + _DBG("%s %p (%d bytes)\n", __FUNCTION__, region, pbo->Base.Size); /* Now blit from the texture buffer to the new buffer: */ @@ -421,13 +477,13 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) if (!was_locked) LOCK_HARDWARE(intel); - intelEmitCopyBlit(intel, - region->cpp, - region->pitch, region->buffer, 0, region->tiling, - region->pitch, pbo->buffer, 0, region->tiling, - 0, 0, 0, 0, - region->pitch, region->height, - GL_COPY); + assert(intelEmitCopyBlit(intel, + region->cpp, + region->pitch, pbo->buffer, 0, region->tiling, + region->pitch, region->buffer, 0, region->tiling, + 0, 0, 0, 0, + region->pitch, region->height, + GL_COPY)); if (!was_locked) UNLOCK_HARDWARE(intel); @@ -459,6 +515,10 @@ intel_recreate_static(struct intel_context *intel, if (region == NULL) { region = calloc(sizeof(*region), 1); region->refcount = 1; + _DBG("%s creating new region %p\n", __FUNCTION__, region); + } + else { + _DBG("%s %p\n", __FUNCTION__, region); } if (intel->ctx.Visual.rgbBits == 24) diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h index 45e2bf4e77..0d379bdc6e 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.h +++ b/src/mesa/drivers/dri/intel/intel_regions.h @@ -73,7 +73,8 @@ struct intel_region * copied by calling intel_reference_region(). */ struct intel_region *intel_region_alloc(struct intel_context *intel, - GLuint cpp, GLuint width, + uint32_t tiling, + GLuint cpp, GLuint width, GLuint height, GLuint pitch, GLboolean expect_accelerated_upload); @@ -109,21 +110,15 @@ void intel_region_data(struct intel_context *intel, /* Copy rectangular sub-regions */ -void intel_region_copy(struct intel_context *intel, - struct intel_region *dest, - GLuint dest_offset, - GLuint destx, GLuint desty, - struct intel_region *src, - GLuint src_offset, - GLuint srcx, GLuint srcy, GLuint width, GLuint height); - -/* Fill a rectangular sub-region - */ -void intel_region_fill(struct intel_context *intel, - struct intel_region *dest, - GLuint dest_offset, - GLuint destx, GLuint desty, - GLuint width, GLuint height, GLuint color); +GLboolean +intel_region_copy(struct intel_context *intel, + struct intel_region *dest, + GLuint dest_offset, + GLuint destx, GLuint desty, + struct intel_region *src, + GLuint src_offset, + GLuint srcx, GLuint srcy, GLuint width, GLuint height, + GLenum logicop); /* Helpers for zerocopy uploads, particularly texture image uploads: */ diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 0f278b3acd..5b3fa9ead3 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -49,6 +49,10 @@ #include "i915_drm.h" #include "i830_dri.h" +#define DRI_CONF_TEXTURE_TILING(def) \ + DRI_CONF_OPT_BEGIN(texture_tiling, bool, def) \ + DRI_CONF_DESC(en, "Enable texture tiling") \ + DRI_CONF_OPT_END \ PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN @@ -64,6 +68,13 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_ENUM(1, "Enable reuse of all sizes of buffer objects") DRI_CONF_DESC_END DRI_CONF_OPT_END + + DRI_CONF_TEXTURE_TILING(false) + + DRI_CONF_OPT_BEGIN(early_z, bool, false) + DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).") + DRI_CONF_OPT_END + DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_FORCE_S3TC_ENABLE(false) @@ -76,7 +87,7 @@ PUBLIC const char __driConfigOptions[] = DRI_CONF_SECTION_END DRI_CONF_END; -const GLuint __driNConfigOptions = 8; +const GLuint __driNConfigOptions = 10; #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index ae0994b183..df63f29a42 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -158,81 +158,11 @@ timed_memcpy(void *dest, const void *src, size_t n) } #endif /* DO_DEBUG */ -/** - * Generate new mipmap data from BASE+1 to BASE+p (the minimally-sized mipmap - * level). - * - * The texture object's miptree must be mapped. - * - * It would be really nice if this was just called by Mesa whenever mipmaps - * needed to be regenerated, rather than us having to remember to do so in - * each texture image modification path. - * - * This function should also include an accelerated path. - */ -void -intel_generate_mipmap(GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - GLuint nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; - int face, i; - - _mesa_generate_mipmap(ctx, target, texObj); - - /* Update the level information in our private data in the new images, since - * it didn't get set as part of a normal TexImage path. - */ - for (face = 0; face < nr_faces; face++) { - for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { - struct intel_texture_image *intelImage; - - intelImage = intel_texture_image(texObj->Image[face][i]); - if (intelImage == NULL) - break; - - intelImage->level = i; - intelImage->face = face; - /* Unreference the miptree to signal that the new Data is a bare - * pointer from mesa. - */ - intel_miptree_release(intel, &intelImage->mt); - } - } -} - -static void intelGenerateMipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj) -{ - struct intel_context *intel = intel_context(ctx); - struct intel_texture_object *intelObj = intel_texture_object(texObj); - - intel_tex_map_level_images(intel, intelObj, texObj->BaseLevel); - intel_generate_mipmap(ctx, target, texObj); - intel_tex_unmap_level_images(intel, intelObj, texObj->BaseLevel); -} - void intelInitTextureFuncs(struct dd_function_table *functions) { functions->ChooseTextureFormat = intelChooseTextureFormat; - functions->TexImage1D = intelTexImage1D; - functions->TexImage2D = intelTexImage2D; - functions->TexImage3D = intelTexImage3D; - functions->TexSubImage1D = intelTexSubImage1D; - functions->TexSubImage2D = intelTexSubImage2D; - functions->TexSubImage3D = intelTexSubImage3D; - functions->CopyTexImage1D = intelCopyTexImage1D; - functions->CopyTexImage2D = intelCopyTexImage2D; - functions->CopyTexSubImage1D = intelCopyTexSubImage1D; - functions->CopyTexSubImage2D = intelCopyTexSubImage2D; - functions->GetTexImage = intelGetTexImage; - functions->GenerateMipmap = intelGenerateMipmap; - - /* compressed texture functions */ - functions->CompressedTexImage2D = intelCompressedTexImage2D; - functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D; - functions->GetCompressedTexImage = intelGetCompressedTexImage; + functions->GenerateMipmap = intel_generate_mipmap; functions->NewTextureObject = intelNewTextureObject; functions->NewTextureImage = intelNewTextureImage; diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h index f5372d82fb..471aa2a240 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.h +++ b/src/mesa/drivers/dri/intel/intel_tex.h @@ -35,116 +35,17 @@ void intelInitTextureFuncs(struct dd_function_table *functions); +void intelInitTextureImageFuncs(struct dd_function_table *functions); + +void intelInitTextureSubImageFuncs(struct dd_function_table *functions); + +void intelInitTextureCopyImageFuncs(struct dd_function_table *functions); + const struct gl_texture_format *intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, GLenum format, GLenum type); - -void intelTexImage3D(GLcontext * ctx, - GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint depth, - GLint border, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexSubImage3D(GLcontext * ctx, - GLenum target, - GLint level, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexImage2D(GLcontext * ctx, - GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexSubImage2D(GLcontext * ctx, - GLenum target, - GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexImage1D(GLcontext * ctx, - GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint border, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelTexSubImage1D(GLcontext * ctx, - GLenum target, - GLint level, - GLint xoffset, - GLsizei width, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLint border); - -void intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, - GLenum internalFormat, - GLint x, GLint y, GLsizei width, GLsizei height, - GLint border); - -void intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, - GLint xoffset, GLint x, GLint y, GLsizei width); - -void intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, - GLint x, GLint y, GLsizei width, GLsizei height); - -void intelGetTexImage(GLcontext * ctx, GLenum target, GLint level, - GLenum format, GLenum type, GLvoid * pixels, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ); - -void intelCompressedTexSubImage2D(GLcontext * ctx, - GLenum target, - GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLsizei imageSize, - const GLvoid * pixels, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - -void intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, - GLvoid *pixels, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage); - void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch); void intelSetTexBuffer(__DRIcontext *pDRICtx, diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 90bbb8c6bb..0335c13307 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -107,6 +107,9 @@ do_copy_texsubimage(struct intel_context *intel, intelFlush(ctx); LOCK_HARDWARE(intel); { + drm_intel_bo *dst_bo = intel_region_buffer(intel, + intelImage->mt->region, + INTEL_WRITE_PART); GLuint image_offset = intel_miptree_image_offset(intelImage->mt, intelImage->face, intelImage->level); @@ -118,8 +121,12 @@ do_copy_texsubimage(struct intel_context *intel, dstx += x - orig_x; dsty += y - orig_y; - /* image_offset may be non-page-aligned, but that's illegal for tiling. */ - assert(intelImage->mt->region->tiling == I915_TILING_NONE); + /* Can't blit to tiled buffers with non-tile-aligned offset. */ + if (intelImage->mt->region->tiling != I915_TILING_NONE && + (image_offset & 4095) != 0) { + UNLOCK_HARDWARE(intel); + return GL_FALSE; + } if (ctx->ReadBuffer->Name == 0) { /* reading from a window, adjust x, y */ @@ -140,35 +147,35 @@ do_copy_texsubimage(struct intel_context *intel, src_pitch = src->pitch; } - intelEmitCopyBlit(intel, - intelImage->mt->cpp, - src_pitch, - src->buffer, - 0, - src->tiling, - intelImage->mt->pitch, - intelImage->mt->region->buffer, - image_offset, - intelImage->mt->region->tiling, - x, y, dstx, dsty, width, height, - GL_COPY); + if (!intelEmitCopyBlit(intel, + intelImage->mt->cpp, + src_pitch, + src->buffer, + 0, + src->tiling, + intelImage->mt->pitch, + dst_bo, + image_offset, + intelImage->mt->region->tiling, + x, y, dstx, dsty, width, height, + GL_COPY)) { + UNLOCK_HARDWARE(intel); + return GL_FALSE; + } } UNLOCK_HARDWARE(intel); /* GL_SGIS_generate_mipmap */ if (intelImage->level == texObj->BaseLevel && texObj->GenerateMipmap) { - ctx->Driver.GenerateMipmap(ctx, target, texObj); + intel_generate_mipmap(ctx, target, texObj); } return GL_TRUE; } - - - -void +static void intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLint border) @@ -214,7 +221,8 @@ intelCopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, width, border); } -void + +static void intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, GLenum internalFormat, GLint x, GLint y, GLsizei width, GLsizei height, @@ -262,7 +270,7 @@ intelCopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, } -void +static void intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width) { @@ -287,8 +295,7 @@ intelCopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, } - -void +static void intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height) @@ -301,7 +308,6 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, _mesa_select_tex_image(ctx, texObj, target, level); GLenum internalFormat = texImage->InternalFormat; - /* Need to check texture is compatible with source format. */ @@ -316,3 +322,13 @@ intelCopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, xoffset, yoffset, x, y, width, height); } } + + +void +intelInitTextureCopyImageFuncs(struct dd_function_table *functions) +{ + functions->CopyTexImage1D = intelCopyTexImage1D; + functions->CopyTexImage2D = intelCopyTexImage2D; + functions->CopyTexSubImage1D = intelCopyTexSubImage1D; + functions->CopyTexSubImage2D = intelCopyTexSubImage2D; +} diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 5e61e9e95e..c5f5220837 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -131,6 +131,7 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel, comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat->MesaFormat); intelObj->mt = intel_miptree_create(intel, intelObj->base.Target, + intelImage->base._BaseFormat, intelImage->base.InternalFormat, firstLevel, lastLevel, @@ -205,7 +206,7 @@ try_pbo_upload(struct intel_context *intel, GLuint src_offset, src_stride; GLuint dst_offset, dst_stride; - if (!pbo || + if (unpack->BufferObj->Name == 0 || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { DBG("%s: failure 1\n", __FUNCTION__); @@ -235,12 +236,15 @@ try_pbo_upload(struct intel_context *intel, INTEL_WRITE_FULL); - intelEmitCopyBlit(intel, - intelImage->mt->cpp, - src_stride, src_buffer, src_offset, GL_FALSE, - dst_stride, dst_buffer, dst_offset, GL_FALSE, - 0, 0, 0, 0, width, height, - GL_COPY); + if (!intelEmitCopyBlit(intel, + intelImage->mt->cpp, + src_stride, src_buffer, src_offset, GL_FALSE, + dst_stride, dst_buffer, dst_offset, GL_FALSE, + 0, 0, 0, 0, width, height, + GL_COPY)) { + UNLOCK_HARDWARE(intel); + return GL_FALSE; + } } UNLOCK_HARDWARE(intel); @@ -248,7 +252,6 @@ try_pbo_upload(struct intel_context *intel, } - static GLboolean try_pbo_zcopy(struct intel_context *intel, struct intel_texture_image *intelImage, @@ -261,7 +264,7 @@ try_pbo_zcopy(struct intel_context *intel, GLuint src_offset, src_stride; GLuint dst_offset, dst_stride; - if (!pbo || + if (unpack->BufferObj->Name == 0 || intel->ctx._ImageTransferState || unpack->SkipPixels || unpack->SkipRows) { DBG("%s: failure 1\n", __FUNCTION__); @@ -293,10 +296,6 @@ try_pbo_zcopy(struct intel_context *intel, } - - - - static void intelTexImage(GLcontext * ctx, GLint dims, @@ -307,7 +306,8 @@ intelTexImage(GLcontext * ctx, GLenum format, GLenum type, const void *pixels, const struct gl_pixelstore_attrib *unpack, struct gl_texture_object *texObj, - struct gl_texture_image *texImage, GLsizei imageSize, int compressed) + struct gl_texture_image *texImage, GLsizei imageSize, + GLboolean compressed) { struct intel_context *intel = intel_context(ctx); struct intel_texture_object *intelObj = intel_texture_object(texObj); @@ -316,7 +316,6 @@ intelTexImage(GLcontext * ctx, GLint postConvHeight = height; GLint texelBytes, sizeInBytes; GLuint dstRowStride = 0, srcRowStride = texImage->RowStride; - GLboolean needs_map; DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); @@ -414,7 +413,9 @@ intelTexImage(GLcontext * ctx, * a miptree, so create one just for our level and store it in the image. * It'll get moved into the object miptree at validate time. */ - intelImage->mt = intel_miptree_create(intel, target, internalFormat, + intelImage->mt = intel_miptree_create(intel, target, + intelImage->base.TexFormat->BaseFormat, + internalFormat, level, level, width, height, depth, intelImage->base.TexFormat->TexelBytes, @@ -426,7 +427,7 @@ intelTexImage(GLcontext * ctx, */ if (dims <= 2 && intelImage->mt && - intel_buffer_object(unpack->BufferObj) && + unpack->BufferObj->Name != 0 && check_pbo_format(internalFormat, format, type, intelImage->base.TexFormat)) { @@ -464,8 +465,6 @@ intelTexImage(GLcontext * ctx, DBG("pbo upload failed\n"); } - - /* intelCopyTexImage calls this function with pixels == NULL, with * the expectation that the mipmap tree will be set up but nothing * more will be done. This is where those calls return: @@ -482,15 +481,8 @@ intelTexImage(GLcontext * ctx, LOCK_HARDWARE(intel); - /* Two cases where we need a mapping of the miptree: when the user supplied - * data is mapped as well (non-PBO, memcpy upload) or when we're going to do - * (software) mipmap generation. - */ - needs_map = (pixels != NULL) || (level == texObj->BaseLevel && - texObj->GenerateMipmap); - if (intelImage->mt) { - if (needs_map) + if (pixels != NULL) texImage->Data = intel_miptree_image_map(intel, intelImage->mt, intelImage->face, @@ -547,25 +539,26 @@ intelTexImage(GLcontext * ctx, format, type, pixels, unpack)) { _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); } - - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); - } } _mesa_unmap_teximage_pbo(ctx, unpack); if (intelImage->mt) { - if (needs_map) + if (pixels != NULL) intel_miptree_image_unmap(intel, intelImage->mt); texImage->Data = NULL; } UNLOCK_HARDWARE(intel); + + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + intel_generate_mipmap(ctx, target, texObj); + } } -void + +static void intelTexImage3D(GLcontext * ctx, GLenum target, GLint level, GLint internalFormat, @@ -578,11 +571,11 @@ intelTexImage3D(GLcontext * ctx, { intelTexImage(ctx, 3, target, level, internalFormat, width, height, depth, border, - format, type, pixels, unpack, texObj, texImage, 0, 0); + format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE); } -void +static void intelTexImage2D(GLcontext * ctx, GLenum target, GLint level, GLint internalFormat, @@ -594,10 +587,11 @@ intelTexImage2D(GLcontext * ctx, { intelTexImage(ctx, 2, target, level, internalFormat, width, height, 1, border, - format, type, pixels, unpack, texObj, texImage, 0, 0); + format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE); } -void + +static void intelTexImage1D(GLcontext * ctx, GLenum target, GLint level, GLint internalFormat, @@ -609,21 +603,24 @@ intelTexImage1D(GLcontext * ctx, { intelTexImage(ctx, 1, target, level, internalFormat, width, 1, 1, border, - format, type, pixels, unpack, texObj, texImage, 0, 0); + format, type, pixels, unpack, texObj, texImage, 0, GL_FALSE); } -void intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) + +static void +intelCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLsizei imageSize, const GLvoid *data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage ) { intelTexImage(ctx, 2, target, level, internalFormat, width, height, 1, border, - 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, 1); + 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, GL_TRUE); } + /** * Need to map texture image into memory before copying image data, * then unmap it. @@ -632,7 +629,7 @@ static void intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level, GLenum format, GLenum type, GLvoid * pixels, struct gl_texture_object *texObj, - struct gl_texture_image *texImage, int compressed) + struct gl_texture_image *texImage, GLboolean compressed) { struct intel_context *intel = intel_context(ctx); struct intel_texture_image *intelImage = intel_texture_image(texImage); @@ -686,28 +683,29 @@ intel_get_tex_image(GLcontext * ctx, GLenum target, GLint level, } } -void + +static void intelGetTexImage(GLcontext * ctx, GLenum target, GLint level, GLenum format, GLenum type, GLvoid * pixels, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { intel_get_tex_image(ctx, target, level, format, type, pixels, - texObj, texImage, 0); - - + texObj, texImage, GL_FALSE); } -void + +static void intelGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, GLvoid *pixels, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { intel_get_tex_image(ctx, target, level, 0, 0, pixels, - texObj, texImage, 1); + texObj, texImage, GL_TRUE); } + void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch) @@ -816,3 +814,16 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) */ intelSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); } + + +void +intelInitTextureImageFuncs(struct dd_function_table *functions) +{ + functions->TexImage1D = intelTexImage1D; + functions->TexImage2D = intelTexImage2D; + functions->TexImage3D = intelTexImage3D; + functions->GetTexImage = intelGetTexImage; + + functions->CompressedTexImage2D = intelCompressedTexImage2D; + functions->GetCompressedTexImage = intelGetCompressedTexImage; +} diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.c b/src/mesa/drivers/dri/intel/intel_tex_layout.c index e6f9a41779..2c1b722b7f 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_layout.c +++ b/src/mesa/drivers/dri/intel/intel_tex_layout.c @@ -52,7 +52,9 @@ GLuint intel_compressed_alignment(GLenum internalFormat) return alignment; } -void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt ) +void i945_miptree_layout_2d( struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling ) { GLint align_h = 2, align_w = 4; GLuint level; @@ -92,7 +94,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tr /* Pitch must be a whole number of dwords, even though we * express it in texels. */ - mt->pitch = intel_miptree_pitch_align (intel, mt, mt->pitch); + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch); mt->total_height = 0; for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { diff --git a/src/mesa/drivers/dri/intel/intel_tex_layout.h b/src/mesa/drivers/dri/intel/intel_tex_layout.h index dbc90e6f9b..7bc25b6bcb 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_layout.h +++ b/src/mesa/drivers/dri/intel/intel_tex_layout.h @@ -38,5 +38,7 @@ static GLuint minify( GLuint d ) return MAX2(1, d>>1); } -extern void i945_miptree_layout_2d( struct intel_context *intel, struct intel_mipmap_tree *mt ); +extern void i945_miptree_layout_2d(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling); extern GLuint intel_compressed_alignment(GLenum); diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c index f86de56897..1f27131dac 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c @@ -101,11 +101,6 @@ intelTexSubimage(GLcontext * ctx, _mesa_error(ctx, GL_OUT_OF_MEMORY, "intelTexSubImage"); } - /* GL_SGIS_generate_mipmap */ - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, texObj); - } - _mesa_unmap_teximage_pbo(ctx, packing); if (intelImage->mt) { @@ -114,13 +109,15 @@ intelTexSubimage(GLcontext * ctx, } UNLOCK_HARDWARE(intel); -} - - + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + intel_generate_mipmap(ctx, target, texObj); + } +} -void +static void intelTexSubImage3D(GLcontext * ctx, GLenum target, GLint level, @@ -132,18 +129,15 @@ intelTexSubImage3D(GLcontext * ctx, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - intelTexSubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, width, height, depth, format, type, pixels, packing, texObj, texImage); - } - -void +static void intelTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, @@ -155,17 +149,15 @@ intelTexSubImage2D(GLcontext * ctx, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - intelTexSubimage(ctx, 2, target, level, xoffset, yoffset, 0, width, height, 1, format, type, pixels, packing, texObj, texImage); - } -void +static void intelTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, @@ -182,10 +174,9 @@ intelTexSubImage1D(GLcontext * ctx, xoffset, 0, 0, width, 1, 1, format, type, pixels, packing, texObj, texImage); - } -void +static void intelCompressedTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, @@ -199,3 +190,14 @@ intelCompressedTexSubImage2D(GLcontext * ctx, fprintf(stderr, "stubbed CompressedTexSubImage2D: %dx%d@%dx%d\n", width, height, xoffset, yoffset); } + + + +void +intelInitTextureSubImageFuncs(struct dd_function_table *functions) +{ + functions->TexSubImage1D = intelTexSubImage1D; + functions->TexSubImage2D = intelTexSubImage2D; + functions->TexSubImage3D = intelTexSubImage3D; + functions->CompressedTexSubImage2D = intelCompressedTexSubImage2D; +} diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index 05a375e1f3..a284d5475f 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -199,6 +199,7 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) if (!intelObj->mt) { intelObj->mt = intel_miptree_create(intel, intelObj->base.Target, + firstImage->base._BaseFormat, firstImage->base.InternalFormat, intelObj->firstLevel, intelObj->lastLevel, @@ -241,7 +242,7 @@ intel_tex_map_level_images(struct intel_context *intel, struct intel_texture_image *intelImage = intel_texture_image(intelObj->base.Image[face][level]); - if (intelImage->mt) { + if (intelImage && intelImage->mt) { intelImage->base.Data = intel_miptree_image_map(intel, intelImage->mt, @@ -268,7 +269,7 @@ intel_tex_unmap_level_images(struct intel_context *intel, struct intel_texture_image *intelImage = intel_texture_image(intelObj->base.Image[face][level]); - if (intelImage->mt) { + if (intelImage && intelImage->mt) { intel_miptree_image_unmap(intel, intelImage->mt); intelImage->base.Data = NULL; } diff --git a/src/mesa/drivers/dri/r200/.gitignore b/src/mesa/drivers/dri/r200/.gitignore index 3773d8ea73..2f9cd1a987 100644 --- a/src/mesa/drivers/dri/r200/.gitignore +++ b/src/mesa/drivers/dri/r200/.gitignore @@ -1,3 +1,15 @@ +radeon_bocs_wrapper.h +radeon_bo_legacy.[ch] radeon_chipset.h -radeon_screen.* +radeon_cmdbuf.h +radeon_common.[ch] +radeon_common_context.[ch] +radeon_cs_legacy.[ch] +radeon_dma.[ch] +radeon_fbo.c +radeon_lock.[ch] +radeon_mipmap_tree.[ch] +radeon_screen.[ch] +radeon_span.[ch] +radeon_texture.[ch] server diff --git a/src/mesa/drivers/dri/r200/Makefile b/src/mesa/drivers/dri/r200/Makefile index e9144ac75c..6a246edf7c 100644 --- a/src/mesa/drivers/dri/r200/Makefile +++ b/src/mesa/drivers/dri/r200/Makefile @@ -3,6 +3,8 @@ TOP = ../../../../.. include $(TOP)/configs/current +CFLAGS += $(RADEON_CFLAGS) + LIBNAME = r200_dri.so MINIGLX_SOURCES = server/radeon_dri.c @@ -11,25 +13,36 @@ ifeq ($(USING_EGL), 1) EGL_SOURCES = server/radeon_egl.c endif +RADEON_COMMON_SOURCES = \ + radeon_texture.c \ + radeon_common_context.c \ + radeon_common.c \ + radeon_dma.c \ + radeon_lock.c \ + radeon_bo_legacy.c \ + radeon_cs_legacy.c \ + radeon_mipmap_tree.c \ + radeon_span.c \ + radeon_fbo.c + + DRIVER_SOURCES = r200_context.c \ r200_ioctl.c \ - r200_lock.c \ r200_state.c \ r200_state_init.c \ r200_cmdbuf.c \ r200_pixel.c \ r200_tex.c \ - r200_texmem.c \ r200_texstate.c \ r200_tcl.c \ r200_swtcl.c \ - r200_span.c \ r200_maos.c \ r200_sanity.c \ r200_fragshader.c \ r200_vertprog.c \ radeon_screen.c \ - $(EGL_SOURCES) + $(EGL_SOURCES) \ + $(RADEON_COMMON_SOURCES) C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) @@ -48,7 +61,30 @@ SYMLINKS = \ COMMON_SYMLINKS = \ radeon_chipset.h \ radeon_screen.c \ - radeon_screen.h + radeon_screen.h \ + radeon_bo_legacy.c \ + radeon_cs_legacy.c \ + radeon_bo_legacy.h \ + radeon_cs_legacy.h \ + radeon_bocs_wrapper.h \ + radeon_span.h \ + radeon_span.c \ + radeon_lock.c \ + radeon_lock.h \ + radeon_common.c \ + radeon_common_context.c \ + radeon_common_context.h \ + radeon_common.h \ + radeon_cmdbuf.h \ + radeon_mipmap_tree.c \ + radeon_mipmap_tree.h \ + radeon_texture.c \ + radeon_texture.h \ + radeon_dma.c \ + radeon_dma.h \ + radeon_fbo.c + +DRI_LIB_DEPS += $(RADEON_LDFLAGS) ##### TARGETS ##### diff --git a/src/mesa/drivers/dri/r200/r200_cmdbuf.c b/src/mesa/drivers/dri/r200/r200_cmdbuf.c index e1633772a1..df9dd83344 100644 --- a/src/mesa/drivers/dri/r200/r200_cmdbuf.c +++ b/src/mesa/drivers/dri/r200/r200_cmdbuf.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast/swrast.h" #include "main/simple_list.h" +#include "radeon_common.h" #include "r200_context.h" #include "r200_state.h" #include "r200_ioctl.h" @@ -45,18 +46,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_sanity.h" #include "radeon_reg.h" -static void print_state_atom( struct r200_state_atom *state ) -{ - int i; - - fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size); - - if (0 & R200_DEBUG & DEBUG_VERBOSE) - for (i = 0 ; i < state->cmd_size ; i++) - fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]); - -} - /* The state atoms will be emitted in the order they appear in the atom list, * so this step is important. */ @@ -64,141 +53,85 @@ void r200SetUpAtomList( r200ContextPtr rmesa ) { int i, mtu; - mtu = rmesa->glCtx->Const.MaxTextureUnits; - - make_empty_list(&rmesa->hw.atomlist); - rmesa->hw.atomlist.name = "atom-list"; - - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ctx ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.set ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lin ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msk ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpt ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vtx ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vap ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vte ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msc ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cst ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.zbs ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcl ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msl ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcg ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.grd ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.atf ); + mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits; + + make_empty_list(&rmesa->radeon.hw.atomlist); + rmesa->radeon.hw.atomlist.name = "atom-list"; + + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ctx ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.set ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lin ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msk ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpt ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vtx ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vap ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vte ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msc ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cst ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.zbs ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcl ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.msl ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tcg ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.grd ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.fog ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tam ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tf ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.atf ); for (i = 0; i < mtu; ++i) - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i] ); for (i = 0; i < mtu; ++i) - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i] ); for (i = 0; i < 6; ++i) - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[0] ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.afs[1] ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pix[i] ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[0] ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.afs[1] ); for (i = 0; i < 8; ++i) - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i] ); for (i = 0; i < 3 + mtu; ++i) - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mat[i] ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.eye ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.glt ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i] ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.eye ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.glt ); for (i = 0; i < 2; ++i) - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mtl[i] ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.mtl[i] ); for (i = 0; i < 6; ++i) - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ucp[i] ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.spr ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ptp ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.prf ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pvs ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[0] ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpp[1] ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[0] ); - insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpi[1] ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i] ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.spr ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.ptp ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.prf ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.pvs ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[0] ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpp[1] ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[0] ); + insert_at_tail( &rmesa->radeon.hw.atomlist, &rmesa->hw.vpi[1] ); } -static void r200SaveHwState( r200ContextPtr rmesa ) +void r200EmitScissor(r200ContextPtr rmesa) { - struct r200_state_atom *atom; - char * dest = rmesa->backup_store.cmd_buf; - - if (R200_DEBUG & DEBUG_STATE) - fprintf(stderr, "%s\n", __FUNCTION__); - - rmesa->backup_store.cmd_used = 0; - - foreach( atom, &rmesa->hw.atomlist ) { - if ( atom->check( rmesa->glCtx, atom->idx ) ) { - int size = atom->cmd_size * 4; - memcpy( dest, atom->cmd, size); - dest += size; - rmesa->backup_store.cmd_used += size; - if (R200_DEBUG & DEBUG_STATE) - print_state_atom( atom ); - } - } - - assert( rmesa->backup_store.cmd_used <= R200_CMD_BUF_SZ ); - if (R200_DEBUG & DEBUG_STATE) - fprintf(stderr, "Returning to r200EmitState\n"); -} - -void r200EmitState( r200ContextPtr rmesa ) -{ - char *dest; - int mtu; - struct r200_state_atom *atom; - - if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS)) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (rmesa->save_on_next_emit) { - r200SaveHwState(rmesa); - rmesa->save_on_next_emit = GL_FALSE; - } - - if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty) - return; - - mtu = rmesa->glCtx->Const.MaxTextureUnits; - - /* To avoid going across the entire set of states multiple times, just check - * for enough space for the case of emitting all state, and inline the - * r200AllocCmdBuf code here without all the checks. - */ - r200EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size ); - - /* we need to calculate dest after EnsureCmdBufSpace - as we may flush the buffer - airlied */ - dest = rmesa->store.cmd_buf + rmesa->store.cmd_used; - if (R200_DEBUG & DEBUG_STATE) { - foreach( atom, &rmesa->hw.atomlist ) { - if ( atom->dirty || rmesa->hw.all_dirty ) { - if ( atom->check( rmesa->glCtx, atom->idx ) ) - print_state_atom( atom ); - else - fprintf(stderr, "skip state %s\n", atom->name); - } - } - } - - foreach( atom, &rmesa->hw.atomlist ) { - if ( rmesa->hw.all_dirty ) - atom->dirty = GL_TRUE; - if ( atom->dirty ) { - if ( atom->check( rmesa->glCtx, atom->idx ) ) { - int size = atom->cmd_size * 4; - memcpy( dest, atom->cmd, size); - dest += size; - rmesa->store.cmd_used += size; - atom->dirty = GL_FALSE; - } - } - } - - assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ ); - - rmesa->hw.is_dirty = GL_FALSE; - rmesa->hw.all_dirty = GL_FALSE; + BATCH_LOCALS(&rmesa->radeon); + if (!rmesa->radeon.radeonScreen->kernel_mm) { + return; + } + if (rmesa->radeon.state.scissor.enabled) { + BEGIN_BATCH(8); + OUT_BATCH(CP_PACKET0(R200_RE_CNTL, 0)); + OUT_BATCH(R200_SCISSOR_ENABLE | rmesa->hw.set.cmd[SET_RE_CNTL]); + OUT_BATCH(CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0)); + OUT_BATCH(R200_SCISSOR_ENABLE_0); + OUT_BATCH(CP_PACKET0(R200_RE_SCISSOR_TL_0, 0)); + OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) | + rmesa->radeon.state.scissor.rect.x1); + OUT_BATCH(CP_PACKET0(R200_RE_SCISSOR_BR_0, 0)); + OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2 - 1) << 16) | + (rmesa->radeon.state.scissor.rect.x2 - 1)); + END_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(CP_PACKET0(R200_RE_CNTL, 0)); + OUT_BATCH(rmesa->hw.set.cmd[SET_RE_CNTL] & ~R200_SCISSOR_ENABLE); + OUT_BATCH(CP_PACKET0(R200_RE_AUX_SCISSOR_CNTL, 0)); + OUT_BATCH(0); + END_BATCH(); + } } /* Fire a section of the retained (indexed_verts) buffer as a regular @@ -208,51 +141,81 @@ void r200EmitVbufPrim( r200ContextPtr rmesa, GLuint primitive, GLuint vertex_nr ) { - drm_radeon_cmd_header_t *cmd; + BATCH_LOCALS(&rmesa->radeon); assert(!(primitive & R200_VF_PRIM_WALK_IND)); - r200EmitState( rmesa ); + radeonEmitState(&rmesa->radeon); if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS)) fprintf(stderr, "%s cmd_used/4: %d prim %x nr %d\n", __FUNCTION__, rmesa->store.cmd_used/4, primitive, vertex_nr); - - cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VBUF_BUFSZ, - __FUNCTION__ ); - cmd[0].i = 0; - cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; - cmd[1].i = R200_CP_CMD_3D_DRAW_VBUF_2; - cmd[2].i = (primitive | - R200_VF_PRIM_WALK_LIST | - R200_VF_COLOR_ORDER_RGBA | - (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT)); + r200EmitScissor(rmesa); + + BEGIN_BATCH(3); + OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_VBUF_2, 0); + OUT_BATCH(primitive | R200_VF_PRIM_WALK_LIST | R200_VF_COLOR_ORDER_RGBA | + (vertex_nr << R200_VF_VERTEX_NUMBER_SHIFT)); + END_BATCH(); } +static void r200FireEB(r200ContextPtr rmesa, int vertex_count, int type) +{ + BATCH_LOCALS(&rmesa->radeon); + + if (vertex_count > 0) { + r200EmitScissor(rmesa); + BEGIN_BATCH(8+2); + OUT_BATCH_PACKET3_CLIP(R200_CP_CMD_3D_DRAW_INDX_2, 0); + OUT_BATCH(R200_VF_PRIM_WALK_IND | + R200_VF_COLOR_ORDER_RGBA | + ((vertex_count + 0) << 16) | + type); + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2); + OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810); + OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset, + rmesa->radeon.tcl.elt_dma_bo, + rmesa->radeon.tcl.elt_dma_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH((vertex_count + 1)/2); + } else { + OUT_BATCH_PACKET3(R200_CP_CMD_INDX_BUFFER, 2); + OUT_BATCH((0x80 << 24) | (0 << 16) | 0x810); + OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset); + OUT_BATCH((vertex_count + 1)/2); + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.elt_dma_bo, + RADEON_GEM_DOMAIN_GTT, 0, 0); + } + END_BATCH(); + } +} -void r200FlushElts( r200ContextPtr rmesa ) +void r200FlushElts(GLcontext *ctx) { - int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start); - int dwords; - int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 12)) / 2; + r200ContextPtr rmesa = R200_CONTEXT(ctx); + int nr, elt_used = rmesa->tcl.elt_used; if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_PRIMS)) - fprintf(stderr, "%s\n", __FUNCTION__); + fprintf(stderr, "%s %x %d\n", __FUNCTION__, rmesa->tcl.hw_primitive, elt_used); - assert( rmesa->dma.flush == r200FlushElts ); - rmesa->dma.flush = NULL; + assert( rmesa->radeon.dma.flush == r200FlushElts ); + rmesa->radeon.dma.flush = NULL; - /* Cope with odd number of elts: - */ - rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2; - dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4; + nr = elt_used / 2; - cmd[1] |= (dwords - 3) << 16; - cmd[2] |= nr << R200_VF_VERTEX_NUMBER_SHIFT; + radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo); + + r200FireEB(rmesa, nr, rmesa->tcl.hw_primitive); + + radeon_bo_unref(rmesa->radeon.tcl.elt_dma_bo); + rmesa->radeon.tcl.elt_dma_bo = NULL; if (R200_DEBUG & DEBUG_SYNC) { fprintf(stderr, "%s: Syncing\n", __FUNCTION__); - r200Finish( rmesa->glCtx ); + radeonFinish( rmesa->radeon.glCtx ); } } @@ -261,7 +224,6 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, GLuint primitive, GLuint min_nr ) { - drm_radeon_cmd_header_t *cmd; GLushort *retval; if (R200_DEBUG & DEBUG_IOCTL) @@ -269,30 +231,30 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, assert((primitive & R200_VF_PRIM_WALK_IND)); - r200EmitState( rmesa ); - - cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, ELTS_BUFSZ(min_nr), - __FUNCTION__ ); - cmd[0].i = 0; - cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; - cmd[1].i = R200_CP_CMD_3D_DRAW_INDX_2; - cmd[2].i = (primitive | - R200_VF_PRIM_WALK_IND | - R200_VF_COLOR_ORDER_RGBA); + radeonEmitState(&rmesa->radeon); + + rmesa->radeon.tcl.elt_dma_bo = radeon_bo_open(rmesa->radeon.radeonScreen->bom, + 0, R200_ELT_BUF_SZ, 4, + RADEON_GEM_DOMAIN_GTT, 0); + rmesa->radeon.tcl.elt_dma_offset = 0; + rmesa->tcl.elt_used = min_nr * 2; + radeon_validate_bo(&rmesa->radeon, rmesa->radeon.tcl.elt_dma_bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (radeon_revalidate_bos(rmesa->radeon.glCtx) == GL_FALSE) + fprintf(stderr,"failure to revalidate BOs - badness\n"); + + radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); + retval = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; - retval = (GLushort *)(cmd+3); if (R200_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s: header 0x%x prim %x \n", - __FUNCTION__, - cmd[1].i, primitive); - - assert(!rmesa->dma.flush); - rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; - rmesa->dma.flush = r200FlushElts; + fprintf(stderr, "%s: header prim %x \n", + __FUNCTION__, primitive); - rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf; + assert(!rmesa->radeon.dma.flush); + rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + rmesa->radeon.dma.flush = r200FlushElts; return retval; } @@ -300,129 +262,119 @@ GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, void r200EmitVertexAOS( r200ContextPtr rmesa, - GLuint vertex_size, - GLuint offset ) + GLuint vertex_size, + struct radeon_bo *bo, + GLuint offset ) { - drm_radeon_cmd_header_t *cmd; + BATCH_LOCALS(&rmesa->radeon); if (R200_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL)) fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n", __FUNCTION__, vertex_size, offset); - cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, VERT_AOS_BUFSZ, - __FUNCTION__ ); - cmd[0].header.cmd_type = RADEON_CMD_PACKET3; - cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (2 << 16); - cmd[2].i = 1; - cmd[3].i = vertex_size | (vertex_size << 8); - cmd[4].i = offset; + BEGIN_BATCH(7); + OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, 2); + OUT_BATCH(1); + OUT_BATCH(vertex_size | (vertex_size << 8)); + OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); } - -void r200EmitAOS( r200ContextPtr rmesa, - struct r200_dma_region **component, - GLuint nr, - GLuint offset ) +void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset) { - drm_radeon_cmd_header_t *cmd; - int sz = AOS_BUFSZ(nr); + BATCH_LOCALS(&rmesa->radeon); + uint32_t voffset; + int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; int i; - int *tmp; - - if (R200_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s nr arrays: %d\n", __FUNCTION__, nr); - - cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sz, __FUNCTION__ ); - cmd[0].i = 0; - cmd[0].header.cmd_type = RADEON_CMD_PACKET3; - cmd[1].i = R200_CP_CMD_3D_LOAD_VBPNTR | (((sz / sizeof(int)) - 3) << 16); - cmd[2].i = nr; - tmp = &cmd[0].i; - cmd += 3; - - for (i = 0 ; i < nr ; i++) { - if (i & 1) { - cmd[0].i |= ((component[i]->aos_stride << 24) | - (component[i]->aos_size << 16)); - cmd[2].i = (component[i]->aos_start + - offset * component[i]->aos_stride * 4); - cmd += 3; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, + offset); + + BEGIN_BATCH(sz+2+ (nr*2)); + OUT_BATCH_PACKET3(R200_CP_CMD_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); + + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) | + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[i].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[i+1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); } - else { - cmd[0].i = ((component[i]->aos_stride << 8) | - (component[i]->aos_size << 0)); - cmd[1].i = (component[i]->aos_start + - offset * component[i]->aos_stride * 4); + + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[nr - 1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + } else { + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) | + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH(voffset); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + OUT_BATCH(voffset); + } + + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + OUT_BATCH(voffset); + } + for (i = 0; i + 1 < nr; i += 2) { + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[i+0].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[i+1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + if (nr & 1) { + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[nr-1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); } } - - if (R200_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "%s:\n", __FUNCTION__); - for (i = 0 ; i < sz ; i++) - fprintf(stderr, " %d: %x\n", i, tmp[i]); - } -} - -void r200EmitBlit( r200ContextPtr rmesa, - GLuint color_fmt, - GLuint src_pitch, - GLuint src_offset, - GLuint dst_pitch, - GLuint dst_offset, - GLint srcx, GLint srcy, - GLint dstx, GLint dsty, - GLuint w, GLuint h ) -{ - drm_radeon_cmd_header_t *cmd; - - if (R200_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", - __FUNCTION__, - src_pitch, src_offset, srcx, srcy, - dst_pitch, dst_offset, dstx, dsty, - w, h); - - assert( (src_pitch & 63) == 0 ); - assert( (dst_pitch & 63) == 0 ); - assert( (src_offset & 1023) == 0 ); - assert( (dst_offset & 1023) == 0 ); - assert( w < (1<<16) ); - assert( h < (1<<16) ); - - cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 8 * sizeof(int), - __FUNCTION__ ); - - - cmd[0].header.cmd_type = RADEON_CMD_PACKET3; - cmd[1].i = R200_CP_CMD_BITBLT_MULTI | (5 << 16); - cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | - RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_NONE | - (color_fmt << 8) | - RADEON_GMC_SRC_DATATYPE_COLOR | - RADEON_ROP3_S | - RADEON_DP_SRC_SOURCE_MEMORY | - RADEON_GMC_CLR_CMP_CNTL_DIS | - RADEON_GMC_WR_MSK_DIS ); - - cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10); - cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10); - cmd[5].i = (srcx << 16) | srcy; - cmd[6].i = (dstx << 16) | dsty; /* dst */ - cmd[7].i = (w << 16) | h; -} - - -void r200EmitWait( r200ContextPtr rmesa, GLuint flags ) -{ - drm_radeon_cmd_header_t *cmd; - - assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) ); - - cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, 1 * sizeof(int), - __FUNCTION__ ); - cmd[0].i = 0; - cmd[0].wait.cmd_type = RADEON_CMD_WAIT; - cmd[0].wait.flags = flags; + END_BATCH(); } diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 058296eab2..9a92a32079 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -54,7 +54,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_context.h" #include "r200_ioctl.h" #include "r200_state.h" -#include "r200_span.h" #include "r200_pixel.h" #include "r200_tex.h" #include "r200_swtcl.h" @@ -62,6 +61,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_maos.h" #include "r200_vertprog.h" +#include "radeon_span.h" + #define need_GL_ARB_vertex_program #define need_GL_ATI_fragment_shader #define need_GL_EXT_blend_minmax @@ -71,6 +72,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_EXT_blend_func_separate #define need_GL_NV_vertex_program #define need_GL_ARB_point_parameters +#define need_GL_EXT_framebuffer_object #include "extension_helper.h" #define DRIVER_DATE "20060602" @@ -78,9 +80,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "vblank.h" #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ -#ifndef R200_DEBUG -int R200_DEBUG = (0); -#endif /* Return various strings for glGetString(). */ @@ -89,8 +88,8 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name ) r200ContextPtr rmesa = R200_CONTEXT(ctx); static char buffer[128]; unsigned offset; - GLuint agp_mode = (rmesa->r200Screen->card_type == RADEON_CARD_PCI)? 0 : - rmesa->r200Screen->AGPMode; + GLuint agp_mode = (rmesa->radeon.radeonScreen->card_type == RADEON_CARD_PCI)? 0 : + rmesa->radeon.radeonScreen->AGPMode; switch ( name ) { case GL_VENDOR: @@ -101,7 +100,7 @@ static const GLubyte *r200GetString( GLcontext *ctx, GLenum name ) agp_mode ); sprintf( & buffer[ offset ], " %sTCL", - !(rmesa->TclFallback & R200_TCL_FALLBACK_TCL_DISABLE) + !(rmesa->radeon.TclFallback & R200_TCL_FALLBACK_TCL_DISABLE) ? "" : "NO-" ); return (GLubyte *)buffer; @@ -126,6 +125,7 @@ const struct dri_extension card_extensions[] = { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, { "GL_EXT_blend_subtract", NULL }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + { "GL_EXT_packed_depth_stencil", NULL}, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, { "GL_EXT_stencil_wrap", NULL }, { "GL_EXT_texture_edge_clamp", NULL }, @@ -167,6 +167,11 @@ const struct dri_extension point_extensions[] = { { NULL, NULL } }; +const struct dri_extension mm_extensions[] = { + { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, + { NULL, NULL } +}; + extern const struct tnl_pipeline_stage _r200_render_stage; extern const struct tnl_pipeline_stage _r200_tcl_stage; @@ -234,6 +239,39 @@ static const struct dri_debug_control debug_control[] = { NULL, 0 } }; +static void r200_get_lock(radeonContextPtr radeon) +{ + r200ContextPtr rmesa = (r200ContextPtr)radeon; + drm_radeon_sarea_t *sarea = radeon->sarea; + + R200_STATECHANGE( rmesa, ctx ); + if (rmesa->radeon.sarea->tiling_enabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; + } + else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE; + + if ( sarea->ctx_owner != rmesa->radeon.dri.hwContext ) { + sarea->ctx_owner = rmesa->radeon.dri.hwContext; + if (!radeon->radeonScreen->kernel_mm) + radeon_bo_legacy_texture_age(radeon->radeonScreen->bom); + } + +} + +static void r200_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) +{ +} + + +static void r200_init_vtbl(radeonContextPtr radeon) +{ + radeon->vtbl.get_lock = r200_get_lock; + radeon->vtbl.update_viewport_offset = r200UpdateViewportOffset; + radeon->vtbl.emit_cs_header = r200_vtbl_emit_cs_header; + radeon->vtbl.swtcl_flush = r200_swtcl_flush; + radeon->vtbl.fallback = r200Fallback; +} + /* Create the device specific rendering context. */ @@ -245,9 +283,9 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private); struct dd_function_table functions; r200ContextPtr rmesa; - GLcontext *ctx, *shareCtx; + GLcontext *ctx; int i; - int tcl_mode, fthrottle_mode; + int tcl_mode; assert(glVisual); assert(driContextPriv); @@ -257,7 +295,8 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, rmesa = (r200ContextPtr) CALLOC( sizeof(*rmesa) ); if ( !rmesa ) return GL_FALSE; - + + r200_init_vtbl(&rmesa->radeon); /* init exp fog table data */ r200InitStaticFogData(); @@ -265,12 +304,12 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, * Do this here so that initialMaxAnisotropy is set before we create * the default textures. */ - driParseConfigFiles (&rmesa->optionCache, &screen->optionCache, + driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache, screen->driScreen->myNum, "r200"); - rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache, - "def_max_anisotropy"); + rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache, + "def_max_anisotropy"); - if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) { + if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) { if ( sPriv->drm_version.minor < 13 ) fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " "disabling.\n", sPriv->drm_version.minor ); @@ -291,59 +330,15 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, r200InitTextureFuncs(&functions); r200InitShaderFuncs(&functions); - /* Allocate and initialize the Mesa context */ - if (sharedContextPrivate) - shareCtx = ((r200ContextPtr) sharedContextPrivate)->glCtx; - else - shareCtx = NULL; - rmesa->glCtx = _mesa_create_context(glVisual, shareCtx, - &functions, (void *) rmesa); - if (!rmesa->glCtx) { - FREE(rmesa); - return GL_FALSE; - } - driContextPriv->driverPrivate = rmesa; - - /* Init r200 context data */ - rmesa->dri.context = driContextPriv; - rmesa->dri.screen = sPriv; - rmesa->dri.drawable = NULL; /* Set by XMesaMakeCurrent */ - rmesa->dri.hwContext = driContextPriv->hHWContext; - rmesa->dri.hwLock = &sPriv->pSAREA->lock; - rmesa->dri.fd = sPriv->fd; - rmesa->dri.drmMinor = sPriv->drm_version.minor; - - rmesa->r200Screen = screen; - rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA + - screen->sarea_priv_offset); - - - rmesa->dma.buf0_address = rmesa->r200Screen->buffers->list[0].address; - - (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) ); - make_empty_list( & rmesa->swapped ); - - rmesa->nr_heaps = 1 /* screen->numTexHeaps */ ; - assert(rmesa->nr_heaps < RADEON_NR_TEX_HEAPS); - for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { - rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa, - screen->texSize[i], - 12, - RADEON_NR_TEX_REGIONS, - (drmTextureRegionPtr)rmesa->sarea->tex_list[i], - & rmesa->sarea->tex_age[i], - & rmesa->swapped, - sizeof( r200TexObj ), - (destroy_texture_object_t *) r200DestroyTexObj ); + if (!radeonInitContext(&rmesa->radeon, &functions, + glVisual, driContextPriv, + sharedContextPrivate)) { + FREE(rmesa); + return GL_FALSE; } - rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache, - "texture_depth"); - if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) - rmesa->texture_depth = ( screen->cpp == 4 ) ? - DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; - rmesa->swtcl.RenderIndex = ~0; - rmesa->hw.all_dirty = 1; + rmesa->radeon.swtcl.RenderIndex = ~0; + rmesa->radeon.hw.all_dirty = 1; /* Set the maximum texture size small enough that we can guarentee that * all texture units can bind a maximal texture and have all of them in @@ -351,29 +346,20 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, * setting allow larger textures. */ - ctx = rmesa->glCtx; - ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache, + ctx = rmesa->radeon.glCtx; + ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache, "texture_units"); ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits; ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits; - i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures"); - - driCalculateMaxTextureLevels( rmesa->texture_heaps, - rmesa->nr_heaps, - & ctx->Const, - 4, - 11, /* max 2D texture size is 2048x2048 */ -#if ENABLE_HW_3D_TEXTURE - 8, /* max 3D texture size is 256^3 */ -#else - 0, /* 3D textures unsupported */ -#endif - 11, /* max cube texture size is 2048x2048 */ - 11, /* max texture rectangle size is 2048x2048 */ - 12, - GL_FALSE, - i ); + i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures"); + + /* FIXME: When no memory manager is available we should set this + * to some reasonable value based on texture memory pool size */ + ctx->Const.MaxTextureLevels = 12; + ctx->Const.Max3DTextureLevels = 9; + ctx->Const.MaxCubeTextureLevels = 12; + ctx->Const.MaxTextureRectSize = 2048; ctx->Const.MaxTextureMaxAnisotropy = 16.0; @@ -383,7 +369,7 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, ctx->Const.MinPointSizeAA = 1.0; ctx->Const.MaxPointSizeAA = 1.0; ctx->Const.PointSizeGranularity = 0.0625; - if (rmesa->r200Screen->drmSupportsPointSprites) + if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) ctx->Const.MaxPointSize = 2047.0; else ctx->Const.MaxPointSize = 1.0; @@ -441,32 +427,35 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, _math_matrix_set_identity( &rmesa->tmpmat ); driInitExtensions( ctx, card_extensions, GL_TRUE ); - if (!(rmesa->r200Screen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) { + + if (rmesa->radeon.radeonScreen->kernel_mm) + driInitExtensions(ctx, mm_extensions, GL_FALSE); + if (!(rmesa->radeon.radeonScreen->chip_flags & R200_CHIPSET_YCBCR_BROKEN)) { /* yuv textures don't work with some chips - R200 / rv280 okay so far others get the bit ordering right but don't actually do YUV-RGB conversion */ _mesa_enable_extension( ctx, "GL_MESA_ycbcr_texture" ); } - if (rmesa->glCtx->Mesa_DXTn) { + if (rmesa->radeon.glCtx->Mesa_DXTn) { _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); _mesa_enable_extension( ctx, "GL_S3_s3tc" ); } - else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) { + else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) { _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); } - if (rmesa->r200Screen->drmSupportsCubeMapsR200) + if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200) _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" ); - if (rmesa->r200Screen->drmSupportsBlendColor) { + if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) { driInitExtensions( ctx, blend_extensions, GL_FALSE ); } - if(rmesa->r200Screen->drmSupportsVertexProgram) + if(rmesa->radeon.radeonScreen->drmSupportsVertexProgram) driInitSingleExtension( ctx, ARB_vp_extension ); - if(driQueryOptionb(&rmesa->optionCache, "nv_vertex_program")) + if(driQueryOptionb(&rmesa->radeon.optionCache, "nv_vertex_program")) driInitSingleExtension( ctx, NV_vp_extension ); - if ((ctx->Const.MaxTextureUnits == 6) && rmesa->r200Screen->drmSupportsFragShader) + if ((ctx->Const.MaxTextureUnits == 6) && rmesa->radeon.radeonScreen->drmSupportsFragShader) driInitSingleExtension( ctx, ATI_fs_extension ); - if (rmesa->r200Screen->drmSupportsPointSprites) + if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) driInitExtensions( ctx, point_extensions, GL_FALSE ); #if 0 r200InitDriverFuncs( ctx ); @@ -476,33 +465,16 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, #endif /* plug in a few more device driver functions */ /* XXX these should really go right after _mesa_init_driver_functions() */ + radeon_fbo_init(&rmesa->radeon); + radeonInitSpanFuncs( ctx ); r200InitPixelFuncs( ctx ); - r200InitSpanFuncs( ctx ); r200InitTnlFuncs( ctx ); r200InitState( rmesa ); r200InitSwtcl( ctx ); - fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode"); - rmesa->iw.irq_seq = -1; - rmesa->irqsEmitted = 0; - rmesa->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && - rmesa->r200Screen->irq); - - rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); - - if (!rmesa->do_irqs) - fprintf(stderr, - "IRQ's not enabled, falling back to %s: %d %d\n", - rmesa->do_usleeps ? "usleeps" : "busy waits", - fthrottle_mode, - rmesa->r200Screen->irq); - rmesa->prefer_gart_client_texturing = (getenv("R200_GART_CLIENT_TEXTURES") != 0); - (*sPriv->systemTime->getUST)( & rmesa->swap_ust ); - - #if DO_DEBUG R200_DEBUG = driParseDebugString( getenv( "R200_DEBUG" ), debug_control ); @@ -510,202 +482,21 @@ GLboolean r200CreateContext( const __GLcontextModes *glVisual, debug_control ); #endif - tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode"); - if (driQueryOptionb(&rmesa->optionCache, "no_rast")) { + tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode"); + if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) { fprintf(stderr, "disabling 3D acceleration\n"); FALLBACK(rmesa, R200_FALLBACK_DISABLE, 1); } else if (tcl_mode == DRI_CONF_TCL_SW || getenv("R200_NO_TCL") || - !(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) { - if (rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL) { - rmesa->r200Screen->chip_flags &= ~RADEON_CHIPSET_TCL; + !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { + if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL; fprintf(stderr, "Disabling HW TCL support\n"); } - TCL_FALLBACK(rmesa->glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1); + TCL_FALLBACK(rmesa->radeon.glCtx, R200_TCL_FALLBACK_TCL_DISABLE, 1); } return GL_TRUE; } -/* Destroy the device specific context. - */ -/* Destroy the Mesa and driver specific context data. - */ -void r200DestroyContext( __DRIcontextPrivate *driContextPriv ) -{ - GET_CURRENT_CONTEXT(ctx); - r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate; - r200ContextPtr current = ctx ? R200_CONTEXT(ctx) : NULL; - - /* check if we're deleting the currently bound context */ - if (rmesa == current) { - R200_FIREVERTICES( rmesa ); - _mesa_make_current(NULL, NULL, NULL); - } - - /* Free r200 context resources */ - assert(rmesa); /* should never be null */ - if ( rmesa ) { - GLboolean release_texture_heaps; - - - release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1); - _swsetup_DestroyContext( rmesa->glCtx ); - _tnl_DestroyContext( rmesa->glCtx ); - _vbo_DestroyContext( rmesa->glCtx ); - _swrast_DestroyContext( rmesa->glCtx ); - - r200DestroySwtcl( rmesa->glCtx ); - r200ReleaseArrays( rmesa->glCtx, ~0 ); - - if (rmesa->dma.current.buf) { - r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ ); - r200FlushCmdBuf( rmesa, __FUNCTION__ ); - } - - if (rmesa->state.scissor.pClipRects) { - FREE(rmesa->state.scissor.pClipRects); - rmesa->state.scissor.pClipRects = NULL; - } - - if ( release_texture_heaps ) { - /* This share group is about to go away, free our private - * texture object data. - */ - int i; - - for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { - driDestroyTextureHeap( rmesa->texture_heaps[ i ] ); - rmesa->texture_heaps[ i ] = NULL; - } - - assert( is_empty_list( & rmesa->swapped ) ); - } - - /* free the Mesa context */ - rmesa->glCtx->DriverCtx = NULL; - _mesa_destroy_context( rmesa->glCtx ); - - /* free the option cache */ - driDestroyOptionCache (&rmesa->optionCache); - - FREE( rmesa ); - } -} - - - - -void -r200SwapBuffers( __DRIdrawablePrivate *dPriv ) -{ - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - r200ContextPtr rmesa; - GLcontext *ctx; - rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate; - ctx = rmesa->glCtx; - if (ctx->Visual.doubleBufferMode) { - _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ - if ( rmesa->doPageFlip ) { - r200PageFlip( dPriv ); - } - else { - r200CopyBuffer( dPriv, NULL ); - } - } - } - else { - /* XXX this shouldn't be an error but we can't handle it for now */ - _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__); - } -} - -void -r200CopySubBuffer( __DRIdrawablePrivate *dPriv, - int x, int y, int w, int h ) -{ - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - r200ContextPtr rmesa; - GLcontext *ctx; - rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate; - ctx = rmesa->glCtx; - if (ctx->Visual.doubleBufferMode) { - drm_clip_rect_t rect; - rect.x1 = x + dPriv->x; - rect.y1 = (dPriv->h - y - h) + dPriv->y; - rect.x2 = rect.x1 + w; - rect.y2 = rect.y1 + h; - _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ - r200CopyBuffer( dPriv, &rect ); - } - } - else { - /* XXX this shouldn't be an error but we can't handle it for now */ - _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__); - } -} - -/* Force the context `c' to be the current context and associate with it - * buffer `b'. - */ -GLboolean -r200MakeCurrent( __DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawPriv, - __DRIdrawablePrivate *driReadPriv ) -{ - if ( driContextPriv ) { - r200ContextPtr newCtx = - (r200ContextPtr) driContextPriv->driverPrivate; - - if (R200_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)newCtx->glCtx); - - newCtx->dri.readable = driReadPriv; - - if ( newCtx->dri.drawable != driDrawPriv || - newCtx->lastStamp != driDrawPriv->lastStamp ) { - if (driDrawPriv->swap_interval == (unsigned)-1) { - driDrawPriv->vblFlags = (newCtx->r200Screen->irq != 0) - ? driGetDefaultVBlankFlags(&newCtx->optionCache) - : VBLANK_FLAG_NO_IRQ; - - driDrawableInitVBlank( driDrawPriv ); - } - - newCtx->dri.drawable = driDrawPriv; - - r200SetCliprects(newCtx); - r200UpdateViewportOffset( newCtx->glCtx ); - } - - _mesa_make_current( newCtx->glCtx, - (GLframebuffer *) driDrawPriv->driverPrivate, - (GLframebuffer *) driReadPriv->driverPrivate ); - - _mesa_update_state( newCtx->glCtx ); - r200ValidateState( newCtx->glCtx ); - - } else { - if (R200_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s ctx is null\n", __FUNCTION__); - _mesa_make_current( NULL, NULL, NULL ); - } - - if (R200_DEBUG & DEBUG_DRI) - fprintf(stderr, "End %s\n", __FUNCTION__); - return GL_TRUE; -} - -/* Force the context `c' to be unbound from its buffer. - */ -GLboolean -r200UnbindContext( __DRIcontextPrivate *driContextPriv ) -{ - r200ContextPtr rmesa = (r200ContextPtr) driContextPriv->driverPrivate; - - if (R200_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *)rmesa->glCtx); - - return GL_TRUE; -} diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h index 14a1dda46a..6267293817 100644 --- a/src/mesa/drivers/dri/r200/r200_context.h +++ b/src/mesa/drivers/dri/r200/r200_context.h @@ -53,51 +53,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #error This driver requires a newer libdrm to compile #endif +#include "radeon_screen.h" +#include "radeon_common.h" + +#include "radeon_lock.h" + struct r200_context; typedef struct r200_context r200ContextRec; typedef struct r200_context *r200ContextPtr; -/* This union is used to avoid warnings/miscompilation - with float to uint32_t casts due to strict-aliasing */ -typedef union { GLfloat f; uint32_t ui32; } float_ui32_type; - -#include "r200_lock.h" -#include "radeon_screen.h" #include "main/mm.h" -/* Flags for software fallback cases */ -/* See correponding strings in r200_swtcl.c */ -#define R200_FALLBACK_TEXTURE 0x01 -#define R200_FALLBACK_DRAW_BUFFER 0x02 -#define R200_FALLBACK_STENCIL 0x04 -#define R200_FALLBACK_RENDER_MODE 0x08 -#define R200_FALLBACK_DISABLE 0x10 -#define R200_FALLBACK_BORDER_MODE 0x20 - -/* The blit width for texture uploads - */ -#define BLIT_WIDTH_BYTES 1024 - -/* Use the templated vertex format: - */ -#define COLOR_IS_RGBA -#define TAG(x) r200##x -#include "tnl_dd/t_dd_vertex.h" -#undef TAG - -typedef void (*r200_tri_func)( r200ContextPtr, - r200Vertex *, - r200Vertex *, - r200Vertex * ); - -typedef void (*r200_line_func)( r200ContextPtr, - r200Vertex *, - r200Vertex * ); - -typedef void (*r200_point_func)( r200ContextPtr, - r200Vertex * ); - - struct r200_vertex_program { struct gl_vertex_program mesa_program; /* Must be first */ int translated; @@ -112,93 +78,11 @@ struct r200_vertex_program { int fogmode; }; -struct r200_colorbuffer_state { - GLuint clear; -#if 000 - GLint drawOffset, drawPitch; -#endif - int roundEnable; -}; - - -struct r200_depthbuffer_state { - GLuint clear; - GLfloat scale; -}; - -#if 000 -struct r200_pixel_state { - GLint readOffset, readPitch; -}; -#endif - -struct r200_scissor_state { - drm_clip_rect_t rect; - GLboolean enabled; - - GLuint numClipRects; /* Cliprects active */ - GLuint numAllocedClipRects; /* Cliprects available */ - drm_clip_rect_t *pClipRects; -}; - -struct r200_stencilbuffer_state { - GLboolean hwBuffer; - GLuint clear; /* rb3d_stencilrefmask value */ -}; - -struct r200_stipple_state { - GLuint mask[32]; -}; - - - -#define TEX_0 0x1 -#define TEX_1 0x2 -#define TEX_2 0x4 -#define TEX_3 0x8 -#define TEX_4 0x10 -#define TEX_5 0x20 -#define TEX_ALL 0x3f - -typedef struct r200_tex_obj r200TexObj, *r200TexObjPtr; - -/* Texture object in locally shared texture space. - */ -struct r200_tex_obj { - driTextureObject base; - - GLuint bufAddr; /* Offset to start of locally - shared texture block */ - - GLuint dirty_state; /* Flags (1 per texunit) for - whether or not this texobj - has dirty hardware state - (pp_*) that needs to be - brought into the - texunit. */ - - drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; - /* Six, for the cube faces */ - GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ - - GLuint pp_txfilter; /* hardware register values */ - GLuint pp_txformat; - GLuint pp_txformat_x; - GLuint pp_txoffset; /* Image location in texmem. - All cube faces follow. */ - GLuint pp_txsize; /* npot only */ - GLuint pp_txpitch; /* npot only */ - GLuint pp_border_color; - GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ - - GLboolean border_fallback; - - GLuint tile_bits; /* hw texture tile bits used on this texture */ -}; +#define R200_TEX_ALL 0x3f struct r200_texture_env_state { - r200TexObjPtr texobj; + radeonTexObjPtr texobj; GLuint outputreg; GLuint unitneeded; }; @@ -210,19 +94,6 @@ struct r200_texture_state { }; -struct r200_state_atom { - struct r200_state_atom *next, *prev; - const char *name; /* for debug */ - int cmd_size; /* size in bytes */ - GLuint idx; - int *cmd; /* one or more cmd's */ - int *lastcmd; /* one or more cmd's */ - GLboolean dirty; - GLboolean (*check)( GLcontext *, int ); /* is this state active? */ -}; - - - /* Trying to keep these relatively short as the variables are becoming * extravagently long. Drop the driver name prefix off the front of * everything - I think we know which driver we're in by now, and keep the @@ -597,181 +468,79 @@ struct r200_state_atom { struct r200_hw_state { - /* Head of the linked list of state atoms. */ - struct r200_state_atom atomlist; - /* Hardware state, stored as cmdbuf commands: * -- Need to doublebuffer for * - reviving state after loss of context * - eliding noop statechange loops? (except line stipple count) */ - struct r200_state_atom ctx; - struct r200_state_atom set; - struct r200_state_atom vte; - struct r200_state_atom lin; - struct r200_state_atom msk; - struct r200_state_atom vpt; - struct r200_state_atom vap; - struct r200_state_atom vtx; - struct r200_state_atom tcl; - struct r200_state_atom msl; - struct r200_state_atom tcg; - struct r200_state_atom msc; - struct r200_state_atom cst; - struct r200_state_atom tam; - struct r200_state_atom tf; - struct r200_state_atom tex[6]; - struct r200_state_atom cube[6]; - struct r200_state_atom zbs; - struct r200_state_atom mtl[2]; - struct r200_state_atom mat[9]; - struct r200_state_atom lit[8]; /* includes vec, scl commands */ - struct r200_state_atom ucp[6]; - struct r200_state_atom pix[6]; /* pixshader stages */ - struct r200_state_atom eye; /* eye pos */ - struct r200_state_atom grd; /* guard band clipping */ - struct r200_state_atom fog; - struct r200_state_atom glt; - struct r200_state_atom prf; - struct r200_state_atom afs[2]; - struct r200_state_atom pvs; - struct r200_state_atom vpi[2]; - struct r200_state_atom vpp[2]; - struct r200_state_atom atf; - struct r200_state_atom spr; - struct r200_state_atom ptp; - - int max_state_size; /* Number of bytes necessary for a full state emit. */ - GLboolean is_dirty, all_dirty; + struct radeon_state_atom ctx; + struct radeon_state_atom set; + struct radeon_state_atom vte; + struct radeon_state_atom lin; + struct radeon_state_atom msk; + struct radeon_state_atom vpt; + struct radeon_state_atom vap; + struct radeon_state_atom vtx; + struct radeon_state_atom tcl; + struct radeon_state_atom msl; + struct radeon_state_atom tcg; + struct radeon_state_atom msc; + struct radeon_state_atom cst; + struct radeon_state_atom tam; + struct radeon_state_atom tf; + struct radeon_state_atom tex[6]; + struct radeon_state_atom cube[6]; + struct radeon_state_atom zbs; + struct radeon_state_atom mtl[2]; + struct radeon_state_atom mat[9]; + struct radeon_state_atom lit[8]; /* includes vec, scl commands */ + struct radeon_state_atom ucp[6]; + struct radeon_state_atom pix[6]; /* pixshader stages */ + struct radeon_state_atom eye; /* eye pos */ + struct radeon_state_atom grd; /* guard band clipping */ + struct radeon_state_atom fog; + struct radeon_state_atom glt; + struct radeon_state_atom prf; + struct radeon_state_atom afs[2]; + struct radeon_state_atom pvs; + struct radeon_state_atom vpi[2]; + struct radeon_state_atom vpp[2]; + struct radeon_state_atom atf; + struct radeon_state_atom spr; + struct radeon_state_atom ptp; }; struct r200_state { /* Derived state for internal purposes: */ - struct r200_colorbuffer_state color; - struct r200_depthbuffer_state depth; -#if 00 - struct r200_pixel_state pixel; -#endif - struct r200_scissor_state scissor; - struct r200_stencilbuffer_state stencil; - struct r200_stipple_state stipple; + struct radeon_stipple_state stipple; struct r200_texture_state texture; GLuint envneeded; }; -/* Need refcounting on dma buffers: - */ -struct r200_dma_buffer { - int refcount; /* the number of retained regions in buf */ - drmBufPtr buf; -}; - -#define GET_START(rvb) (rmesa->r200Screen->gart_buffer_offset + \ - (rvb)->address - rmesa->dma.buf0_address + \ - (rvb)->start) - -/* A retained region, eg vertices for indexed vertices. - */ -struct r200_dma_region { - struct r200_dma_buffer *buf; - char *address; /* == buf->address */ - int start, end, ptr; /* offsets from start of buf */ - int aos_start; - int aos_stride; - int aos_size; -}; - - -struct r200_dma { - /* Active dma region. Allocations for vertices and retained - * regions come from here. Also used for emitting random vertices, - * these may be flushed by calling flush_current(); - */ - struct r200_dma_region current; - - void (*flush)( r200ContextPtr ); - - char *buf0_address; /* start of buf[0], for index calcs */ - GLuint nr_released_bufs; /* flush after so many buffers released */ -}; - -struct r200_dri_mirror { - __DRIcontextPrivate *context; /* DRI context */ - __DRIscreenPrivate *screen; /* DRI screen */ - __DRIdrawablePrivate *drawable; /* DRI drawable bound to this ctx */ - __DRIdrawablePrivate *readable; /* DRI readable bound to this ctx */ - - drm_context_t hwContext; - drm_hw_lock_t *hwLock; - int fd; - int drmMinor; -}; - - #define R200_CMD_BUF_SZ (16*1024) -struct r200_store { - GLuint statenr; - GLuint primnr; - char cmd_buf[R200_CMD_BUF_SZ]; - int cmd_used; - int elts_start; -}; - - +#define R200_ELT_BUF_SZ (16*1024) /* r200_tcl.c */ struct r200_tcl_info { GLuint hw_primitive; -/* hw can handle 12 components max */ - struct r200_dma_region *aos_components[12]; - GLuint nr_aos_components; - GLuint *Elts; - struct r200_dma_region indexed_verts; - struct r200_dma_region vertex_data[15]; + int elt_used; + }; /* r200_swtcl.c */ struct r200_swtcl_info { - GLuint RenderIndex; - - /** - * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is - * installed in the Mesa state vector. - */ - GLuint vertex_size; - /** - * Attributes instructing the Mesa TCL pipeline where / how to put vertex - * data in the hardware buffer. - */ - struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; - /** - * Number of elements of \c ::vertex_attrs that are actually used. - */ - GLuint vertex_attr_count; - - /** - * Cached pointer to the buffer where Mesa will store vertex data. - */ - GLubyte *verts; - - /* Fallback rasterization functions - */ - r200_point_func draw_point; - r200_line_func draw_line; - r200_tri_func draw_tri; - - GLuint hw_primitive; - GLenum render_primitive; - GLuint numverts; + radeon_point_func draw_point; + radeon_line_func draw_line; + radeon_tri_func draw_tri; /** * Offset of the 4UB color data within a hardware (swtcl) vertex. @@ -787,27 +556,10 @@ struct r200_swtcl_info { * Should Mesa project vertex data or will the hardware do it? */ GLboolean needproj; - - struct r200_dma_region indexed_verts; -}; - - -struct r200_ioctl { - GLuint vertex_offset; - GLuint vertex_size; }; -#define R200_MAX_PRIMS 64 - - - -struct r200_prim { - GLuint start; - GLuint end; - GLuint prim; -}; /* A maximum total of 29 elements per vertex: 3 floats for position, 3 * floats for normal, 4 floats for color, 4 bytes for secondary color, @@ -822,9 +574,8 @@ struct r200_prim { #define R200_MAX_VERTEX_SIZE ((3*6)+11) - struct r200_context { - GLcontext *glCtx; /* Mesa context */ + struct radeon_context radeon; /* Driver and hardware state management */ @@ -832,56 +583,15 @@ struct r200_context { struct r200_state state; struct r200_vertex_program *curr_vp_hw; - /* Texture object bookkeeping - */ - unsigned nr_heaps; - driTexHeap * texture_heaps[ RADEON_NR_TEX_HEAPS ]; - driTextureObject swapped; - int texture_depth; - float initialMaxAnisotropy; - - /* Rasterization and vertex state: - */ - GLuint TclFallback; - GLuint Fallback; - GLuint NewGLState; - DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ - /* Vertex buffers */ - struct r200_ioctl ioctl; - struct r200_dma dma; - struct r200_store store; - /* A full state emit as of the first state emit in the main store, in case - * the context is lost. - */ - struct r200_store backup_store; - - /* Page flipping - */ - GLuint doPageFlip; - - /* Busy waiting - */ - GLuint do_usleeps; - GLuint do_irqs; - GLuint irqsEmitted; - drm_radeon_irq_wait_t iw; + struct radeon_ioctl ioctl; + struct radeon_store store; /* Clientdata textures; */ GLuint prefer_gart_client_texturing; - /* Drawable, cliprect and scissor information - */ - GLuint numClipRects; /* Cliprects for the draw buffer */ - drm_clip_rect_t *pClipRects; - unsigned int lastStamp; - GLboolean lost_context; - GLboolean save_on_next_emit; - radeonScreenPtr r200Screen; /* Screen private DRI data */ - drm_radeon_sarea_t *sarea; /* Private SAREA data */ - /* TCL stuff */ GLmatrix TexGenMatrix[R200_MAX_TEXTURE_UNITS]; @@ -893,15 +603,6 @@ struct r200_context { GLuint TexGenCompSel; GLmatrix tmpmat; - /* buffer swap - */ - int64_t swap_ust; - int64_t swap_missed_ust; - - GLuint swap_count; - GLuint swap_missed_count; - - /* r200_tcl.c */ struct r200_tcl_info tcl; @@ -910,14 +611,6 @@ struct r200_context { */ struct r200_swtcl_info swtcl; - /* Mirrors of some DRI state - */ - struct r200_dri_mirror dri; - - /* Configuration cache - */ - driOptionCache optionCache; - GLboolean using_hyperz; GLboolean texmicrotile; @@ -927,28 +620,10 @@ struct r200_context { #define R200_CONTEXT(ctx) ((r200ContextPtr)(ctx->DriverCtx)) -static INLINE GLuint r200PackColor( GLuint cpp, - GLubyte r, GLubyte g, - GLubyte b, GLubyte a ) -{ - switch ( cpp ) { - case 2: - return PACK_COLOR_565( r, g, b ); - case 4: - return PACK_COLOR_8888( a, r, g, b ); - default: - return 0; - } -} - - extern void r200DestroyContext( __DRIcontextPrivate *driContextPriv ); extern GLboolean r200CreateContext( const __GLcontextModes *glVisual, __DRIcontextPrivate *driContextPriv, void *sharedContextPrivate); -extern void r200SwapBuffers( __DRIdrawablePrivate *dPriv ); -extern void r200CopySubBuffer( __DRIdrawablePrivate * dPriv, - int x, int y, int w, int h ); extern GLboolean r200MakeCurrent( __DRIcontextPrivate *driContextPriv, __DRIdrawablePrivate *driDrawPriv, __DRIdrawablePrivate *driReadPriv ); @@ -957,28 +632,9 @@ extern GLboolean r200UnbindContext( __DRIcontextPrivate *driContextPriv ); /* ================================================================ * Debugging: */ -#define DO_DEBUG 1 -#if DO_DEBUG -extern int R200_DEBUG; -#else -#define R200_DEBUG 0 -#endif +#define R200_DEBUG RADEON_DEBUG + -#define DEBUG_TEXTURE 0x001 -#define DEBUG_STATE 0x002 -#define DEBUG_IOCTL 0x004 -#define DEBUG_PRIMS 0x008 -#define DEBUG_VERTS 0x010 -#define DEBUG_FALLBACKS 0x020 -#define DEBUG_VFMT 0x040 -#define DEBUG_CODEGEN 0x080 -#define DEBUG_VERBOSE 0x100 -#define DEBUG_DRI 0x200 -#define DEBUG_DMA 0x400 -#define DEBUG_SANITY 0x800 -#define DEBUG_SYNC 0x1000 -#define DEBUG_PIXEL 0x2000 -#define DEBUG_MEMORY 0x4000 #endif /* __R200_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/r200/r200_fragshader.c b/src/mesa/drivers/dri/r200/r200_fragshader.c index d514b28219..85c1b7bdd1 100644 --- a/src/mesa/drivers/dri/r200/r200_fragshader.c +++ b/src/mesa/drivers/dri/r200/r200_fragshader.c @@ -522,7 +522,7 @@ static void r200UpdateFSConstants( GLcontext *ctx ) CLAMPED_FLOAT_TO_UBYTE(con_byte[2], ctx->ATIFragmentShader.GlobalConstants[i][2]); CLAMPED_FLOAT_TO_UBYTE(con_byte[3], ctx->ATIFragmentShader.GlobalConstants[i][3]); } - rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = r200PackColor ( + rmesa->hw.atf.cmd[ATF_TFACTOR_0 + i] = radeonPackColor ( 4, con_byte[0], con_byte[1], con_byte[2], con_byte[3] ); } } diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.c b/src/mesa/drivers/dri/r200/r200_ioctl.c index 0741e57af7..6560efdca3 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.c +++ b/src/mesa/drivers/dri/r200/r200_ioctl.c @@ -31,7 +31,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * Authors: * Keith Whitwell <keith@tungstengraphics.com> */ - + #include <sched.h> #include <errno.h> @@ -41,6 +41,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "swrast/swrast.h" + + +#include "radeon_common.h" +#include "radeon_lock.h" #include "r200_context.h" #include "r200_state.h" #include "r200_ioctl.h" @@ -54,635 +58,19 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R200_TIMEOUT 512 #define R200_IDLE_RETRY 16 - -static void r200WaitForIdle( r200ContextPtr rmesa ); - - -/* At this point we were in FlushCmdBufLocked but we had lost our context, so - * we need to unwire our current cmdbuf, hook the one with the saved state in - * it, flush it, and then put the current one back. This is so commands at the - * start of a cmdbuf can rely on the state being kept from the previous one. - */ -static void r200BackUpAndEmitLostStateLocked( r200ContextPtr rmesa ) -{ - GLuint nr_released_bufs; - struct r200_store saved_store; - - if (rmesa->backup_store.cmd_used == 0) - return; - - if (R200_DEBUG & DEBUG_STATE) - fprintf(stderr, "Emitting backup state on lost context\n"); - - rmesa->lost_context = GL_FALSE; - - nr_released_bufs = rmesa->dma.nr_released_bufs; - saved_store = rmesa->store; - rmesa->dma.nr_released_bufs = 0; - rmesa->store = rmesa->backup_store; - r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); - rmesa->dma.nr_released_bufs = nr_released_bufs; - rmesa->store = saved_store; -} - -int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller ) -{ - int ret, i; - drm_radeon_cmd_buffer_t cmd; - - if (rmesa->lost_context) - r200BackUpAndEmitLostStateLocked( rmesa ); - - if (R200_DEBUG & DEBUG_IOCTL) { - fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); - - if (0 & R200_DEBUG & DEBUG_VERBOSE) - for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 ) - fprintf(stderr, "%d: %x\n", i/4, - *(int *)(&rmesa->store.cmd_buf[i])); - } - - if (R200_DEBUG & DEBUG_DMA) - fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__, - rmesa->dma.nr_released_bufs); - - - if (R200_DEBUG & DEBUG_SANITY) { - if (rmesa->state.scissor.enabled) - ret = r200SanityCmdBuffer( rmesa, - rmesa->state.scissor.numClipRects, - rmesa->state.scissor.pClipRects); - else - ret = r200SanityCmdBuffer( rmesa, - rmesa->numClipRects, - rmesa->pClipRects); - if (ret) { - fprintf(stderr, "drmSanityCommandWrite: %d\n", ret); - goto out; - } - } - - - if (R200_DEBUG & DEBUG_MEMORY) { - if (! driValidateTextureHeaps( rmesa->texture_heaps, rmesa->nr_heaps, - & rmesa->swapped ) ) { - fprintf( stderr, "%s: texture memory is inconsistent - expect " - "mangled textures\n", __FUNCTION__ ); - } - } - - - cmd.bufsz = rmesa->store.cmd_used; - cmd.buf = rmesa->store.cmd_buf; - - if (rmesa->state.scissor.enabled) { - cmd.nbox = rmesa->state.scissor.numClipRects; - cmd.boxes = (drm_clip_rect_t *)rmesa->state.scissor.pClipRects; - } else { - cmd.nbox = rmesa->numClipRects; - cmd.boxes = (drm_clip_rect_t *)rmesa->pClipRects; - } - - ret = drmCommandWrite( rmesa->dri.fd, - DRM_RADEON_CMDBUF, - &cmd, sizeof(cmd) ); - - if (ret) - fprintf(stderr, "drmCommandWrite: %d\n", ret); - - if (R200_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__); - r200WaitForIdleLocked( rmesa ); - } - - - out: - rmesa->store.primnr = 0; - rmesa->store.statenr = 0; - rmesa->store.cmd_used = 0; - rmesa->dma.nr_released_bufs = 0; - rmesa->save_on_next_emit = 1; - - return ret; -} - - -/* Note: does not emit any commands to avoid recursion on - * r200AllocCmdBuf. - */ -void r200FlushCmdBuf( r200ContextPtr rmesa, const char *caller ) -{ - int ret; - - LOCK_HARDWARE( rmesa ); - - ret = r200FlushCmdBufLocked( rmesa, caller ); - - UNLOCK_HARDWARE( rmesa ); - - if (ret) { - fprintf(stderr, "drmRadeonCmdBuffer: %d (exiting)\n", ret); - exit(ret); - } -} - - -/* ============================================================= - * Hardware vertex buffer handling - */ - - -void r200RefillCurrentDmaRegion( r200ContextPtr rmesa ) -{ - struct r200_dma_buffer *dmabuf; - int fd = rmesa->dri.fd; - int index = 0; - int size = 0; - drmDMAReq dma; - int ret; - - if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (rmesa->dma.flush) { - rmesa->dma.flush( rmesa ); - } - - if (rmesa->dma.current.buf) - r200ReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ ); - - if (rmesa->dma.nr_released_bufs > 4) - r200FlushCmdBuf( rmesa, __FUNCTION__ ); - - dma.context = rmesa->dri.hwContext; - dma.send_count = 0; - dma.send_list = NULL; - dma.send_sizes = NULL; - dma.flags = 0; - dma.request_count = 1; - dma.request_size = RADEON_BUFFER_SIZE; - dma.request_list = &index; - dma.request_sizes = &size; - dma.granted_count = 0; - - LOCK_HARDWARE(rmesa); /* no need to validate */ - - while (1) { - ret = drmDMA( fd, &dma ); - if (ret == 0) - break; - - if (rmesa->dma.nr_released_bufs) { - r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); - } - - if (rmesa->do_usleeps) { - UNLOCK_HARDWARE( rmesa ); - DO_USLEEP( 1 ); - LOCK_HARDWARE( rmesa ); - } - } - - UNLOCK_HARDWARE(rmesa); - - if (R200_DEBUG & DEBUG_DMA) - fprintf(stderr, "Allocated buffer %d\n", index); - - dmabuf = CALLOC_STRUCT( r200_dma_buffer ); - dmabuf->buf = &rmesa->r200Screen->buffers->list[index]; - dmabuf->refcount = 1; - - rmesa->dma.current.buf = dmabuf; - rmesa->dma.current.address = dmabuf->buf->address; - rmesa->dma.current.end = dmabuf->buf->total; - rmesa->dma.current.start = 0; - rmesa->dma.current.ptr = 0; -} - -void r200ReleaseDmaRegion( r200ContextPtr rmesa, - struct r200_dma_region *region, - const char *caller ) -{ - if (R200_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); - - if (!region->buf) - return; - - if (rmesa->dma.flush) - rmesa->dma.flush( rmesa ); - - if (--region->buf->refcount == 0) { - drm_radeon_cmd_header_t *cmd; - - if (R200_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) - fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__, - region->buf->buf->idx); - - cmd = (drm_radeon_cmd_header_t *)r200AllocCmdBuf( rmesa, sizeof(*cmd), - __FUNCTION__ ); - cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD; - cmd->dma.buf_idx = region->buf->buf->idx; - FREE(region->buf); - rmesa->dma.nr_released_bufs++; - } - - region->buf = NULL; - region->start = 0; -} - -/* Allocates a region from rmesa->dma.current. If there isn't enough - * space in current, grab a new buffer (and discard what was left of current) - */ -void r200AllocDmaRegion( r200ContextPtr rmesa, - struct r200_dma_region *region, - int bytes, - int alignment ) -{ - if (R200_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); - - if (rmesa->dma.flush) - rmesa->dma.flush( rmesa ); - - if (region->buf) - r200ReleaseDmaRegion( rmesa, region, __FUNCTION__ ); - - alignment--; - rmesa->dma.current.start = rmesa->dma.current.ptr = - (rmesa->dma.current.ptr + alignment) & ~alignment; - - if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) - r200RefillCurrentDmaRegion( rmesa ); - - region->start = rmesa->dma.current.start; - region->ptr = rmesa->dma.current.start; - region->end = rmesa->dma.current.start + bytes; - region->address = rmesa->dma.current.address; - region->buf = rmesa->dma.current.buf; - region->buf->refcount++; - - rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ - rmesa->dma.current.start = - rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; - - assert( rmesa->dma.current.ptr <= rmesa->dma.current.end ); -} - -/* ================================================================ - * SwapBuffers with client-side throttling - */ - -static uint32_t r200GetLastFrame(r200ContextPtr rmesa) -{ - drm_radeon_getparam_t gp; - int ret; - uint32_t frame; - - gp.param = RADEON_PARAM_LAST_FRAME; - gp.value = (int *)&frame; - ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM, - &gp, sizeof(gp) ); - if ( ret ) { - fprintf( stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, ret ); - exit(1); - } - - return frame; -} - -static void r200EmitIrqLocked( r200ContextPtr rmesa ) -{ - drm_radeon_irq_emit_t ie; - int ret; - - ie.irq_seq = &rmesa->iw.irq_seq; - ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, - &ie, sizeof(ie) ); - if ( ret ) { - fprintf( stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, ret ); - exit(1); - } -} - - -static void r200WaitIrq( r200ContextPtr rmesa ) -{ - int ret; - - do { - ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT, - &rmesa->iw, sizeof(rmesa->iw) ); - } while (ret && (errno == EINTR || errno == EBUSY)); - - if ( ret ) { - fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret ); - exit(1); - } -} - - -static void r200WaitForFrameCompletion( r200ContextPtr rmesa ) -{ - drm_radeon_sarea_t *sarea = rmesa->sarea; - - if (rmesa->do_irqs) { - if (r200GetLastFrame(rmesa) < sarea->last_frame) { - if (!rmesa->irqsEmitted) { - while (r200GetLastFrame (rmesa) < sarea->last_frame) - ; - } - else { - UNLOCK_HARDWARE( rmesa ); - r200WaitIrq( rmesa ); - LOCK_HARDWARE( rmesa ); - } - rmesa->irqsEmitted = 10; - } - - if (rmesa->irqsEmitted) { - r200EmitIrqLocked( rmesa ); - rmesa->irqsEmitted--; - } - } - else { - while (r200GetLastFrame (rmesa) < sarea->last_frame) { - UNLOCK_HARDWARE( rmesa ); - if (rmesa->do_usleeps) - DO_USLEEP( 1 ); - LOCK_HARDWARE( rmesa ); - } - } -} - - - -/* Copy the back color buffer to the front color buffer. - */ -void r200CopyBuffer( __DRIdrawablePrivate *dPriv, - const drm_clip_rect_t *rect) -{ - r200ContextPtr rmesa; - GLint nbox, i, ret; - GLboolean missed_target; - int64_t ust; - __DRIscreenPrivate *psp = dPriv->driScreenPriv; - - assert(dPriv); - assert(dPriv->driContextPriv); - assert(dPriv->driContextPriv->driverPrivate); - - rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate; - - if ( R200_DEBUG & DEBUG_IOCTL ) { - fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *)rmesa->glCtx ); - } - - R200_FIREVERTICES( rmesa ); - - LOCK_HARDWARE( rmesa ); - - - /* Throttle the frame rate -- only allow one pending swap buffers - * request at a time. - */ - r200WaitForFrameCompletion( rmesa ); - if (!rect) - { - UNLOCK_HARDWARE( rmesa ); - driWaitForVBlank( dPriv, & missed_target ); - LOCK_HARDWARE( rmesa ); - } - - nbox = dPriv->numClipRects; /* must be in locked region */ - - for ( i = 0 ; i < nbox ; ) { - GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox ); - drm_clip_rect_t *box = dPriv->pClipRects; - drm_clip_rect_t *b = rmesa->sarea->boxes; - GLint n = 0; - - for ( ; i < nr ; i++ ) { - - *b = box[i]; - - if (rect) - { - if (rect->x1 > b->x1) - b->x1 = rect->x1; - if (rect->y1 > b->y1) - b->y1 = rect->y1; - if (rect->x2 < b->x2) - b->x2 = rect->x2; - if (rect->y2 < b->y2) - b->y2 = rect->y2; - - if (b->x1 >= b->x2 || b->y1 >= b->y2) - continue; - } - - b++; - n++; - } - rmesa->sarea->nbox = n; - - if (!n) - continue; - - ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); - - if ( ret ) { - fprintf( stderr, "DRM_R200_SWAP_BUFFERS: return = %d\n", ret ); - UNLOCK_HARDWARE( rmesa ); - exit( 1 ); - } - } - - UNLOCK_HARDWARE( rmesa ); - if (!rect) - { - rmesa->hw.all_dirty = GL_TRUE; - - rmesa->swap_count++; - (*psp->systemTime->getUST)( & ust ); - if ( missed_target ) { - rmesa->swap_missed_count++; - rmesa->swap_missed_ust = ust - rmesa->swap_ust; - } - - rmesa->swap_ust = ust; - - sched_yield(); - } -} - -void r200PageFlip( __DRIdrawablePrivate *dPriv ) +static void r200UserClear(GLcontext *ctx, GLuint mask) { - r200ContextPtr rmesa; - GLint ret; - GLboolean missed_target; - __DRIscreenPrivate *psp = dPriv->driScreenPriv; - - assert(dPriv); - assert(dPriv->driContextPriv); - assert(dPriv->driContextPriv->driverPrivate); - - rmesa = (r200ContextPtr) dPriv->driContextPriv->driverPrivate; - - if ( R200_DEBUG & DEBUG_IOCTL ) { - fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__, - rmesa->sarea->pfCurrentPage); - } - - R200_FIREVERTICES( rmesa ); - LOCK_HARDWARE( rmesa ); - - if (!dPriv->numClipRects) { - UNLOCK_HARDWARE( rmesa ); - usleep( 10000 ); /* throttle invisible client 10ms */ - return; - } - - /* Need to do this for the perf box placement: - */ - { - drm_clip_rect_t *box = dPriv->pClipRects; - drm_clip_rect_t *b = rmesa->sarea->boxes; - b[0] = box[0]; - rmesa->sarea->nbox = 1; - } - - /* Throttle the frame rate -- only allow a few pending swap buffers - * request at a time. - */ - r200WaitForFrameCompletion( rmesa ); - UNLOCK_HARDWARE( rmesa ); - driWaitForVBlank( dPriv, & missed_target ); - if ( missed_target ) { - rmesa->swap_missed_count++; - (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust ); - } - LOCK_HARDWARE( rmesa ); - - ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP ); - - UNLOCK_HARDWARE( rmesa ); - - if ( ret ) { - fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret ); - exit( 1 ); - } - - rmesa->swap_count++; - (void) (*psp->systemTime->getUST)( & rmesa->swap_ust ); - -#if 000 - if ( rmesa->sarea->pfCurrentPage == 1 ) { - rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset; - rmesa->state.color.drawPitch = rmesa->r200Screen->frontPitch; - } else { - rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset; - rmesa->state.color.drawPitch = rmesa->r200Screen->backPitch; - } - - R200_STATECHANGE( rmesa, ctx ); - rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = rmesa->state.color.drawOffset - + rmesa->r200Screen->fbLocation; - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = rmesa->state.color.drawPitch; - if (rmesa->sarea->tiling_enabled) { - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; - } -#else - /* Get ready for drawing next frame. Update the renderbuffers' - * flippedOffset/Pitch fields so we draw into the right place. - */ - driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, - rmesa->sarea->pfCurrentPage); - - - r200UpdateDrawBuffer(rmesa->glCtx); -#endif + radeon_clear_tris(ctx, mask); } - -/* ================================================================ - * Buffer clear - */ -static void r200Clear( GLcontext *ctx, GLbitfield mask ) +static void r200KernelClear(GLcontext *ctx, GLuint flags) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; - GLuint flags = 0; - GLuint color_mask = 0; - GLint ret, i; - GLint cx, cy, cw, ch; - - if ( R200_DEBUG & DEBUG_IOCTL ) { - fprintf( stderr, "r200Clear\n"); - } - - { - LOCK_HARDWARE( rmesa ); - UNLOCK_HARDWARE( rmesa ); - if ( dPriv->numClipRects == 0 ) - return; - } - - r200Flush( ctx ); - - if ( mask & BUFFER_BIT_FRONT_LEFT ) { - flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; - mask &= ~BUFFER_BIT_FRONT_LEFT; - } - - if ( mask & BUFFER_BIT_BACK_LEFT ) { - flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; - mask &= ~BUFFER_BIT_BACK_LEFT; - } + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + GLint cx, cy, cw, ch, ret; + GLuint i; - if ( mask & BUFFER_BIT_DEPTH ) { - flags |= RADEON_DEPTH; - mask &= ~BUFFER_BIT_DEPTH; - } - - if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) { - flags |= RADEON_STENCIL; - mask &= ~BUFFER_BIT_STENCIL; - } - - if ( mask ) { - if (R200_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask); - _swrast_Clear( ctx, mask ); - } - - if ( !flags ) - return; - - if (rmesa->using_hyperz) { - flags |= RADEON_USE_COMP_ZBUF; -/* if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) - flags |= RADEON_USE_HIERZ; */ - if (!(rmesa->state.stencil.hwBuffer) || - ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) && - ((rmesa->state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) { - flags |= RADEON_CLEAR_FASTZ; - } - } - - LOCK_HARDWARE( rmesa ); - - /* compute region after locking: */ - cx = ctx->DrawBuffer->_Xmin; - cy = ctx->DrawBuffer->_Ymin; - cw = ctx->DrawBuffer->_Xmax - cx; - ch = ctx->DrawBuffer->_Ymax - cy; - - /* Flip top to bottom */ - cx += dPriv->x; - cy = dPriv->y + dPriv->h - cy - ch; + LOCK_HARDWARE( &rmesa->radeon ); /* Throttle the number of clear ioctls we do. */ @@ -693,7 +81,7 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) gp.param = RADEON_PARAM_LAST_CLEAR; gp.value = (int *)&clear; - ret = drmCommandWriteRead( rmesa->dri.fd, + ret = drmCommandWriteRead( rmesa->radeon.dri.fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp) ); if ( ret ) { @@ -703,24 +91,34 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) /* Clear throttling needs more thought. */ - if ( rmesa->sarea->last_clear - clear <= 25 ) { + if ( rmesa->radeon.sarea->last_clear - clear <= 25 ) { break; } - - if (rmesa->do_usleeps) { - UNLOCK_HARDWARE( rmesa ); + + if (rmesa->radeon.do_usleeps) { + UNLOCK_HARDWARE( &rmesa->radeon ); DO_USLEEP( 1 ); - LOCK_HARDWARE( rmesa ); + LOCK_HARDWARE( &rmesa->radeon ); } } /* Send current state to the hardware */ - r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); + rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ ); + + + /* compute region after locking: */ + cx = ctx->DrawBuffer->_Xmin; + cy = ctx->DrawBuffer->_Ymin; + cw = ctx->DrawBuffer->_Xmax - cx; + ch = ctx->DrawBuffer->_Ymax - cy; + /* Flip top to bottom */ + cx += dPriv->x; + cy = dPriv->y + dPriv->h - cy - ch; for ( i = 0 ; i < dPriv->numClipRects ; ) { GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects ); drm_clip_rect_t *box = dPriv->pClipRects; - drm_clip_rect_t *b = rmesa->sarea->boxes; + drm_clip_rect_t *b = rmesa->radeon.sarea->boxes; drm_radeon_clear_t clear; drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS]; GLint n = 0; @@ -755,17 +153,17 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) } } - rmesa->sarea->nbox = n; + rmesa->radeon.sarea->nbox = n; clear.flags = flags; - clear.clear_color = rmesa->state.color.clear; - clear.clear_depth = rmesa->state.depth.clear; /* needed for hyperz */ + clear.clear_color = rmesa->radeon.state.color.clear; + clear.clear_depth = rmesa->radeon.state.depth.clear; /* needed for hyperz */ clear.color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; - clear.depth_mask = rmesa->state.stencil.clear; + clear.depth_mask = rmesa->radeon.state.stencil.clear; clear.depth_boxes = depth_boxes; n--; - b = rmesa->sarea->boxes; + b = rmesa->radeon.sarea->boxes; for ( ; n >= 0 ; n-- ) { depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1; depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1; @@ -774,84 +172,94 @@ static void r200Clear( GLcontext *ctx, GLbitfield mask ) depth_boxes[n].f[CLEAR_DEPTH] = ctx->Depth.Clear; } - ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR, + ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR, &clear, sizeof(clear)); if ( ret ) { - UNLOCK_HARDWARE( rmesa ); + UNLOCK_HARDWARE( &rmesa->radeon ); fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret ); exit( 1 ); } } - - UNLOCK_HARDWARE( rmesa ); - rmesa->hw.all_dirty = GL_TRUE; + UNLOCK_HARDWARE( &rmesa->radeon ); } +/* ================================================================ + * Buffer clear + */ +static void r200Clear( GLcontext *ctx, GLbitfield mask ) +{ + r200ContextPtr rmesa = R200_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + GLuint flags = 0; + GLuint color_mask = 0; + GLuint orig_mask = mask; + if ( R200_DEBUG & DEBUG_IOCTL ) { + if (rmesa->radeon.sarea) + fprintf( stderr, "r200Clear %x %d\n", mask, rmesa->radeon.sarea->pfCurrentPage); + else + fprintf( stderr, "r200Clear %x radeon->sarea is NULL\n", mask); + } -void r200WaitForIdleLocked( r200ContextPtr rmesa ) -{ - int ret; - int i = 0; - - do { - ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_CP_IDLE); - if (ret) - DO_USLEEP( 1 ); - } while (ret && ++i < 100); - - if ( ret < 0 ) { - UNLOCK_HARDWARE( rmesa ); - fprintf( stderr, "Error: R200 timed out... exiting\n" ); - exit( -1 ); - } -} + { + LOCK_HARDWARE( &rmesa->radeon ); + UNLOCK_HARDWARE( &rmesa->radeon ); + if ( dPriv->numClipRects == 0 ) + return; + } + radeonFlush( ctx ); -static void r200WaitForIdle( r200ContextPtr rmesa ) -{ - LOCK_HARDWARE(rmesa); - r200WaitForIdleLocked( rmesa ); - UNLOCK_HARDWARE(rmesa); -} + if ( mask & BUFFER_BIT_FRONT_LEFT ) { + flags |= RADEON_FRONT; + color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; + mask &= ~BUFFER_BIT_FRONT_LEFT; + } + if ( mask & BUFFER_BIT_BACK_LEFT ) { + flags |= RADEON_BACK; + color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; + mask &= ~BUFFER_BIT_BACK_LEFT; + } -void r200Flush( GLcontext *ctx ) -{ - r200ContextPtr rmesa = R200_CONTEXT( ctx ); + if ( mask & BUFFER_BIT_DEPTH ) { + flags |= RADEON_DEPTH; + mask &= ~BUFFER_BIT_DEPTH; + } - if (R200_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); + if ( (mask & BUFFER_BIT_STENCIL) ) { + flags |= RADEON_STENCIL; + mask &= ~BUFFER_BIT_STENCIL; + } - if (rmesa->dma.flush) - rmesa->dma.flush( rmesa ); + if ( mask ) { + if (R200_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask); + _swrast_Clear( ctx, mask ); + } - r200EmitState( rmesa ); - - if (rmesa->store.cmd_used) - r200FlushCmdBuf( rmesa, __FUNCTION__ ); -} + if ( !flags ) + return; -/* Make sure all commands have been sent to the hardware and have - * completed processing. - */ -void r200Finish( GLcontext *ctx ) -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - r200Flush( ctx ); + if (rmesa->using_hyperz) { + flags |= RADEON_USE_COMP_ZBUF; +/* if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) + flags |= RADEON_USE_HIERZ; */ + if (!((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) && + ((rmesa->radeon.state.stencil.clear & R200_STENCIL_WRITE_MASK) == R200_STENCIL_WRITE_MASK))) { + flags |= RADEON_CLEAR_FASTZ; + } + } - if (rmesa->do_irqs) { - LOCK_HARDWARE( rmesa ); - r200EmitIrqLocked( rmesa ); - UNLOCK_HARDWARE( rmesa ); - r200WaitIrq( rmesa ); + if (rmesa->radeon.radeonScreen->kernel_mm) + r200UserClear(ctx, orig_mask); + else { + r200KernelClear(ctx, flags); + rmesa->radeon.hw.all_dirty = GL_TRUE; } - else - r200WaitForIdle( rmesa ); } - /* This version of AllocateMemoryMESA allocates only GART memory, and * only does so after the point at which the driver has been * initialized. @@ -862,7 +270,7 @@ void r200Finish( GLcontext *ctx ) * device fd. */ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size, - GLfloat readfreq, GLfloat writefreq, + GLfloat readfreq, GLfloat writefreq, GLfloat priority) { GET_CURRENT_CONTEXT(ctx); @@ -872,10 +280,10 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size, int ret; if (R200_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, + fprintf(stderr, "%s sz %d %f/%f/%f\n", __FUNCTION__, size, readfreq, writefreq, priority); - if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map) + if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map) return NULL; if (getenv("R200_NO_ALLOC")) @@ -886,17 +294,17 @@ void *r200AllocateMemoryMESA(__DRIscreen *screen, GLsizei size, alloc.size = size; alloc.region_offset = ®ion_offset; - ret = drmCommandWriteRead( rmesa->r200Screen->driScreen->fd, + ret = drmCommandWriteRead( rmesa->radeon.radeonScreen->driScreen->fd, DRM_RADEON_ALLOC, &alloc, sizeof(alloc)); - + if (ret) { fprintf(stderr, "%s: DRM_RADEON_ALLOC ret %d\n", __FUNCTION__, ret); return NULL; } - + { - char *region_start = (char *)rmesa->r200Screen->gartTextures.map; + char *region_start = (char *)rmesa->radeon.radeonScreen->gartTextures.map; return (void *)(region_start + region_offset); } } @@ -914,28 +322,28 @@ void r200FreeMemoryMESA(__DRIscreen *screen, GLvoid *pointer) if (R200_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s %p\n", __FUNCTION__, pointer); - if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->r200Screen->gartTextures.map) { + if (!ctx || !(rmesa = R200_CONTEXT(ctx)) || !rmesa->radeon.radeonScreen->gartTextures.map) { fprintf(stderr, "%s: no context\n", __FUNCTION__); return; } - region_offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map; + region_offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map; - if (region_offset < 0 || - region_offset > rmesa->r200Screen->gartTextures.size) { + if (region_offset < 0 || + region_offset > rmesa->radeon.radeonScreen->gartTextures.size) { fprintf(stderr, "offset %d outside range 0..%d\n", region_offset, - rmesa->r200Screen->gartTextures.size); + rmesa->radeon.radeonScreen->gartTextures.size); return; } memfree.region = RADEON_MEM_REGION_GART; memfree.region_offset = region_offset; - - ret = drmCommandWrite( rmesa->r200Screen->driScreen->fd, + + ret = drmCommandWrite( rmesa->radeon.radeonScreen->driScreen->fd, DRM_RADEON_FREE, &memfree, sizeof(memfree)); - - if (ret) + + if (ret) fprintf(stderr, "%s: DRM_RADEON_FREE ret %d\n", __FUNCTION__, ret); } @@ -956,32 +364,32 @@ GLuint r200GetMemoryOffsetMESA(__DRIscreen *screen, const GLvoid *pointer) card_offset = r200GartOffsetFromVirtual( rmesa, pointer ); - return card_offset - rmesa->r200Screen->gart_base; + return card_offset - rmesa->radeon.radeonScreen->gart_base; } GLboolean r200IsGartMemory( r200ContextPtr rmesa, const GLvoid *pointer, GLint size ) { - ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map; + ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map; int valid = (size >= 0 && offset >= 0 && - offset + size < rmesa->r200Screen->gartTextures.size); + offset + size < rmesa->radeon.radeonScreen->gartTextures.size); if (R200_DEBUG & DEBUG_IOCTL) fprintf(stderr, "r200IsGartMemory( %p ) : %d\n", pointer, valid ); - + return valid; } GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer ) { - ptrdiff_t offset = (char *)pointer - (char *)rmesa->r200Screen->gartTextures.map; + ptrdiff_t offset = (char *)pointer - (char *)rmesa->radeon.radeonScreen->gartTextures.map; - if (offset < 0 || offset > rmesa->r200Screen->gartTextures.size) + if (offset < 0 || offset > rmesa->radeon.radeonScreen->gartTextures.size) return ~0; else - return rmesa->r200Screen->gart_texture_offset + offset; + return rmesa->radeon.radeonScreen->gart_texture_offset + offset; } @@ -989,7 +397,7 @@ GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, const GLvoid *pointer ) void r200InitIoctlFuncs( struct dd_function_table *functions ) { functions->Clear = r200Clear; - functions->Finish = r200Finish; - functions->Flush = r200Flush; + functions->Finish = radeonFinish; + functions->Flush = radeonFlush; } diff --git a/src/mesa/drivers/dri/r200/r200_ioctl.h b/src/mesa/drivers/dri/r200/r200_ioctl.h index f7458e4a0e..2a4b8a11f4 100644 --- a/src/mesa/drivers/dri/r200/r200_ioctl.h +++ b/src/mesa/drivers/dri/r200/r200_ioctl.h @@ -37,65 +37,30 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/simple_list.h" #include "radeon_dri.h" -#include "r200_lock.h" + +#include "radeon_bocs_wrapper.h" #include "xf86drm.h" #include "drm.h" #include "radeon_drm.h" -extern void r200EmitState( r200ContextPtr rmesa ); extern void r200EmitVertexAOS( r200ContextPtr rmesa, - GLuint vertex_size, - GLuint offset ); + GLuint vertex_size, + struct radeon_bo *bo, + GLuint offset ); extern void r200EmitVbufPrim( r200ContextPtr rmesa, GLuint primitive, GLuint vertex_nr ); -extern void r200FlushElts( r200ContextPtr rmesa ); +extern void r200FlushElts(GLcontext *ctx); extern GLushort *r200AllocEltsOpenEnded( r200ContextPtr rmesa, GLuint primitive, GLuint min_nr ); -extern void r200EmitAOS( r200ContextPtr rmesa, - struct r200_dma_region **regions, - GLuint n, - GLuint offset ); - -extern void r200EmitBlit( r200ContextPtr rmesa, - GLuint color_fmt, - GLuint src_pitch, - GLuint src_offset, - GLuint dst_pitch, - GLuint dst_offset, - GLint srcx, GLint srcy, - GLint dstx, GLint dsty, - GLuint w, GLuint h ); - -extern void r200EmitWait( r200ContextPtr rmesa, GLuint flags ); - -extern void r200FlushCmdBuf( r200ContextPtr rmesa, const char * ); -extern int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller ); - -extern void r200RefillCurrentDmaRegion( r200ContextPtr rmesa ); - -extern void r200AllocDmaRegion( r200ContextPtr rmesa, - struct r200_dma_region *region, - int bytes, - int alignment ); - -extern void r200ReleaseDmaRegion( r200ContextPtr rmesa, - struct r200_dma_region *region, - const char *caller ); - -extern void r200CopyBuffer( __DRIdrawablePrivate *drawable, - const drm_clip_rect_t *rect); -extern void r200PageFlip( __DRIdrawablePrivate *drawable ); -extern void r200Flush( GLcontext *ctx ); -extern void r200Finish( GLcontext *ctx ); -extern void r200WaitForIdleLocked( r200ContextPtr rmesa ); -extern void r200WaitForVBlank( r200ContextPtr rmesa ); +extern void r200EmitAOS(r200ContextPtr rmesa, GLuint nr, GLuint offset); + extern void r200InitIoctlFuncs( struct dd_function_table *functions ); extern void *r200AllocateMemoryMESA( __DRIscreen *screen, GLsizei size, GLfloat readfreq, @@ -119,8 +84,8 @@ void r200SetUpAtomList( r200ContextPtr rmesa ); */ #define R200_NEWPRIM( rmesa ) \ do { \ - if ( rmesa->dma.flush ) \ - rmesa->dma.flush( rmesa ); \ + if ( rmesa->radeon.dma.flush ) \ + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \ } while (0) /* Can accomodate several state changes and primitive changes without @@ -130,7 +95,7 @@ do { \ do { \ R200_NEWPRIM( rmesa ); \ rmesa->hw.ATOM.dirty = GL_TRUE; \ - rmesa->hw.is_dirty = GL_TRUE; \ + rmesa->radeon.hw.is_dirty = GL_TRUE; \ } while (0) #define R200_DB_STATE( ATOM ) \ @@ -139,13 +104,13 @@ do { \ static INLINE int R200_DB_STATECHANGE( r200ContextPtr rmesa, - struct r200_state_atom *atom ) + struct radeon_state_atom *atom ) { if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) { - int *tmp; + GLuint *tmp; R200_NEWPRIM( rmesa ); atom->dirty = GL_TRUE; - rmesa->hw.is_dirty = GL_TRUE; + rmesa->radeon.hw.is_dirty = GL_TRUE; tmp = atom->cmd; atom->cmd = atom->lastcmd; atom->lastcmd = tmp; @@ -156,15 +121,6 @@ static INLINE int R200_DB_STATECHANGE( } -/* Fire the buffered vertices no matter what. - */ -#define R200_FIREVERTICES( rmesa ) \ -do { \ - if ( rmesa->store.cmd_used || rmesa->dma.flush ) { \ - r200Flush( rmesa->glCtx ); \ - } \ -} while (0) - /* Command lengths. Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ * are available, you will also be adding an rmesa->state.max_state_size because * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts. @@ -174,36 +130,36 @@ do { \ #define ELTS_BUFSZ(nr) (12 + nr * 2) #define VBUF_BUFSZ (3 * sizeof(int)) -/* Ensure that a minimum amount of space is available in the command buffer. - * This is used to ensure atomicity of state updates with the rendering requests - * that rely on them. - * - * An alternative would be to implement a "soft lock" such that when the buffer - * wraps at an inopportune time, we grab the lock, flush the current buffer, - * and hang on to the lock until the critical section is finished and we flush - * the buffer again and unlock. - */ -static INLINE void r200EnsureCmdBufSpace( r200ContextPtr rmesa, int bytes ) +static inline uint32_t cmdpacket3(int cmd_type) { - if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ) - r200FlushCmdBuf( rmesa, __FUNCTION__ ); - assert( bytes <= R200_CMD_BUF_SZ ); -} + drm_radeon_cmd_header_t cmd; -/* Alloc space in the command buffer - */ -static INLINE char *r200AllocCmdBuf( r200ContextPtr rmesa, - int bytes, const char *where ) -{ - char * head; + cmd.i = 0; + cmd.header.cmd_type = cmd_type; - if (rmesa->store.cmd_used + bytes > R200_CMD_BUF_SZ) - r200FlushCmdBuf( rmesa, where ); + return (uint32_t)cmd.i; - head = rmesa->store.cmd_buf + rmesa->store.cmd_used; - rmesa->store.cmd_used += bytes; - assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ ); - return head; } +#define OUT_BATCH_PACKET3(packet, num_extra) do { \ + if (!b_l_rmesa->radeonScreen->kernel_mm) { \ + OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3)); \ + OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ + } else { \ + OUT_BATCH(CP_PACKET2); \ + OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ + } \ + } while(0) + +#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do { \ + if (!b_l_rmesa->radeonScreen->kernel_mm) { \ + OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP)); \ + OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ + } else { \ + OUT_BATCH(CP_PACKET2); \ + OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ + } \ + } while(0) + + #endif /* __R200_IOCTL_H__ */ diff --git a/src/mesa/drivers/dri/r200/r200_lock.c b/src/mesa/drivers/dri/r200/r200_lock.c deleted file mode 100644 index 99661a4bfb..0000000000 --- a/src/mesa/drivers/dri/r200/r200_lock.c +++ /dev/null @@ -1,116 +0,0 @@ -/* -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "r200_context.h" -#include "r200_lock.h" -#include "r200_tex.h" -#include "r200_state.h" -#include "r200_ioctl.h" - -#include "drirenderbuffer.h" - - -#if DEBUG_LOCKING -char *prevLockFile = NULL; -int prevLockLine = 0; -#endif - -/* Turn on/off page flipping according to the flags in the sarea: - */ -static void -r200UpdatePageFlipping( r200ContextPtr rmesa ) -{ - rmesa->doPageFlip = rmesa->sarea->pfState; - if (rmesa->glCtx->WinSysDrawBuffer) { - driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, - rmesa->sarea->pfCurrentPage); - } -} - - - -/* Update the hardware state. This is called if another main/context.has - * grabbed the hardware lock, which includes the X server. This - * function also updates the driver's window state after the X server - * moves, resizes or restacks a window -- the change will be reflected - * in the drawable position and clip rects. Since the X server grabs - * the hardware lock when it changes the window state, this routine will - * automatically be called after such a change. - */ -void r200GetLock( r200ContextPtr rmesa, GLuint flags ) -{ - __DRIdrawablePrivate *drawable = rmesa->dri.drawable; - __DRIdrawablePrivate *readable = rmesa->dri.readable; - __DRIscreenPrivate *sPriv = rmesa->dri.screen; - drm_radeon_sarea_t *sarea = rmesa->sarea; - int i; - - drmGetLock( rmesa->dri.fd, rmesa->dri.hwContext, flags ); - - /* The window might have moved, so we might need to get new clip - * rects. - * - * NOTE: This releases and regrabs the hw lock to allow the X server - * to respond to the DRI protocol request for new drawable info. - * Since the hardware state depends on having the latest drawable - * clip rects, all state checking must be done _after_ this call. - */ - DRI_VALIDATE_DRAWABLE_INFO( sPriv, drawable ); - if (drawable != readable) { - DRI_VALIDATE_DRAWABLE_INFO( sPriv, readable ); - } - - if ( rmesa->lastStamp != drawable->lastStamp ) { - r200UpdatePageFlipping( rmesa ); - r200SetCliprects( rmesa ); - r200UpdateViewportOffset( rmesa->glCtx ); - driUpdateFramebufferSize(rmesa->glCtx, drawable); - } - - R200_STATECHANGE( rmesa, ctx ); - if (rmesa->sarea->tiling_enabled) { - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; - } - else rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= ~R200_COLOR_TILE_ENABLE; - - if ( sarea->ctx_owner != rmesa->dri.hwContext ) { - sarea->ctx_owner = rmesa->dri.hwContext; - } - - for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { - DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] ); - } - - rmesa->lost_context = GL_TRUE; -} diff --git a/src/mesa/drivers/dri/r200/r200_lock.h b/src/mesa/drivers/dri/r200/r200_lock.h deleted file mode 100644 index 4ff98907fb..0000000000 --- a/src/mesa/drivers/dri/r200/r200_lock.h +++ /dev/null @@ -1,106 +0,0 @@ -/* -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#ifndef __R200_LOCK_H__ -#define __R200_LOCK_H__ - -extern void r200GetLock( r200ContextPtr rmesa, GLuint flags ); - -/* Turn DEBUG_LOCKING on to find locking conflicts. - */ -#define DEBUG_LOCKING 0 - -#if DEBUG_LOCKING -extern char *prevLockFile; -extern int prevLockLine; - -#define DEBUG_LOCK() \ - do { \ - prevLockFile = (__FILE__); \ - prevLockLine = (__LINE__); \ - } while (0) - -#define DEBUG_RESET() \ - do { \ - prevLockFile = 0; \ - prevLockLine = 0; \ - } while (0) - -#define DEBUG_CHECK_LOCK() \ - do { \ - if ( prevLockFile ) { \ - fprintf( stderr, \ - "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \ - prevLockFile, prevLockLine, __FILE__, __LINE__ ); \ - exit( 1 ); \ - } \ - } while (0) - -#else - -#define DEBUG_LOCK() -#define DEBUG_RESET() -#define DEBUG_CHECK_LOCK() - -#endif - -/* - * !!! We may want to separate locks from locks with validation. This - * could be used to improve performance for those things commands that - * do not do any drawing !!! - */ - - -/* Lock the hardware and validate our state. - */ -#define LOCK_HARDWARE( rmesa ) \ - do { \ - char __ret = 0; \ - DEBUG_CHECK_LOCK(); \ - DRM_CAS( rmesa->dri.hwLock, rmesa->dri.hwContext, \ - (DRM_LOCK_HELD | rmesa->dri.hwContext), __ret ); \ - if ( __ret ) \ - r200GetLock( rmesa, 0 ); \ - DEBUG_LOCK(); \ - } while (0) - -#define UNLOCK_HARDWARE( rmesa ) \ - do { \ - DRM_UNLOCK( rmesa->dri.fd, \ - rmesa->dri.hwLock, \ - rmesa->dri.hwContext ); \ - DEBUG_RESET(); \ - } while (0) - -#endif /* __R200_LOCK_H__ */ diff --git a/src/mesa/drivers/dri/r200/r200_maos.h b/src/mesa/drivers/dri/r200/r200_maos.h index d3ed06d402..16a70475e1 100644 --- a/src/mesa/drivers/dri/r200/r200_maos.h +++ b/src/mesa/drivers/dri/r200/r200_maos.h @@ -38,6 +38,5 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_context.h" extern void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev ); -extern void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs ); #endif diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c index 8512b9af47..383a0c4b0d 100644 --- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c +++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c @@ -50,110 +50,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_maos.h" #include "r200_tcl.h" - -#if 0 -/* Usage: - * - from r200_tcl_render - * - call r200EmitArrays to ensure uptodate arrays in dma - * - emit primitives (new type?) which reference the data - * -- need to use elts for lineloop, quads, quadstrip/flat - * -- other primitives are all well-formed (need tristrip-1,fake-poly) - * - */ -static void emit_ubyte_rgba3( GLcontext *ctx, - struct r200_dma_region *rvb, - char *data, - int stride, - int count ) -{ - int i; - r200_color_t *out = (r200_color_t *)(rvb->start + rvb->address); - - if (R200_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d out %p\n", - __FUNCTION__, count, stride, (void *)out); - - for (i = 0; i < count; i++) { - out->red = *data; - out->green = *(data+1); - out->blue = *(data+2); - out->alpha = 0xFF; - out++; - data += stride; - } -} - -static void emit_ubyte_rgba4( GLcontext *ctx, - struct r200_dma_region *rvb, - char *data, - int stride, - int count ) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); - - if (R200_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); - - if (stride == 4) { - for (i = 0; i < count; i++) - ((int *)out)[i] = LE32_TO_CPU(((int *)data)[i]); - } else { - for (i = 0; i < count; i++) { - *(int *)out++ = LE32_TO_CPU(*(int *)data); - data += stride; - } - } -} - - -static void emit_ubyte_rgba( GLcontext *ctx, - struct r200_dma_region *rvb, - char *data, - int size, - int stride, - int count ) -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - - if (R200_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size); - - assert (!rvb->buf); - - if (stride == 0) { - r200AllocDmaRegion( rmesa, rvb, 4, 4 ); - count = 1; - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = 0; - rvb->aos_size = 1; - } - else { - r200AllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */ - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = 1; - rvb->aos_size = 1; - } - - /* Emit the data - */ - switch (size) { - case 3: - emit_ubyte_rgba3( ctx, rvb, data, stride, count ); - break; - case 4: - emit_ubyte_rgba4( ctx, rvb, data, stride, count ); - break; - default: - assert(0); - exit(1); - break; - } -} -#endif - - #if defined(USE_X86_ASM) #define COPY_DWORDS( dst, src, nr ) \ do { \ @@ -174,204 +70,34 @@ do { \ } while (0) #endif - -static void emit_vecfog( GLcontext *ctx, - struct r200_dma_region *rvb, - char *data, - int stride, - int count ) +static void r200_emit_vecfog(GLcontext *ctx, struct radeon_aos *aos, + GLvoid *data, int stride, int count) { - int i; - GLfloat *out; - - r200ContextPtr rmesa = R200_CONTEXT(ctx); - - if (R200_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); - - assert (!rvb->buf); - - if (stride == 0) { - r200AllocDmaRegion( rmesa, rvb, 4, 4 ); - count = 1; - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = 0; - rvb->aos_size = 1; - } - else { - r200AllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */ - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = 1; - rvb->aos_size = 1; - } - - /* Emit the data - */ - out = (GLfloat *)(rvb->address + rvb->start); - for (i = 0; i < count; i++) { - out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data ); - out++; - data += stride; - } - + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + uint32_t *out; + int i; + int size = 1; + + if (stride == 0) { + radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); + count = 1; + aos->stride = 0; + } else { + radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); + aos->stride = size; + } + + aos->components = size; + aos->count = count; + + out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); + for (i = 0; i < count; i++) { + out[0] = r200ComputeFogBlendFactor( ctx, *(GLfloat *)data ); + out++; + data += stride; + } } - -static void emit_vec4( GLcontext *ctx, - struct r200_dma_region *rvb, - char *data, - int stride, - int count ) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); - - if (R200_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); - - if (stride == 4) - COPY_DWORDS( out, data, count ); - else - for (i = 0; i < count; i++) { - out[0] = *(int *)data; - out++; - data += stride; - } -} - - -static void emit_vec8( GLcontext *ctx, - struct r200_dma_region *rvb, - char *data, - int stride, - int count ) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); - - if (R200_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); - - if (stride == 8) - COPY_DWORDS( out, data, count*2 ); - else - for (i = 0; i < count; i++) { - out[0] = *(int *)data; - out[1] = *(int *)(data+4); - out += 2; - data += stride; - } -} - -static void emit_vec12( GLcontext *ctx, - struct r200_dma_region *rvb, - char *data, - int stride, - int count ) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); - - if (R200_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d out %p data %p\n", - __FUNCTION__, count, stride, (void *)out, (void *)data); - - if (stride == 12) - COPY_DWORDS( out, data, count*3 ); - else - for (i = 0; i < count; i++) { - out[0] = *(int *)data; - out[1] = *(int *)(data+4); - out[2] = *(int *)(data+8); - out += 3; - data += stride; - } -} - -static void emit_vec16( GLcontext *ctx, - struct r200_dma_region *rvb, - char *data, - int stride, - int count ) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); - - if (R200_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); - - if (stride == 16) - COPY_DWORDS( out, data, count*4 ); - else - for (i = 0; i < count; i++) { - out[0] = *(int *)data; - out[1] = *(int *)(data+4); - out[2] = *(int *)(data+8); - out[3] = *(int *)(data+12); - out += 4; - data += stride; - } -} - - -static void emit_vector( GLcontext *ctx, - struct r200_dma_region *rvb, - char *data, - int size, - int stride, - int count ) -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - - if (R200_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d size %d stride %d\n", - __FUNCTION__, count, size, stride); - - assert (!rvb->buf); - - if (stride == 0) { - r200AllocDmaRegion( rmesa, rvb, size * 4, 4 ); - count = 1; - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = 0; - rvb->aos_size = size; - } - else { - r200AllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */ - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = size; - rvb->aos_size = size; - } - - /* Emit the data - */ - switch (size) { - case 1: - emit_vec4( ctx, rvb, data, stride, count ); - break; - case 2: - emit_vec8( ctx, rvb, data, stride, count ); - break; - case 3: - emit_vec12( ctx, rvb, data, stride, count ); - break; - case 4: - emit_vec16( ctx, rvb, data, stride, count ); - break; - default: - assert(0); - exit(1); - break; - } - -} - - - /* Emit any changed arrays to new GART memory, re-emit a packet to * update the arrays. */ @@ -379,12 +105,12 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev ) { r200ContextPtr rmesa = R200_CONTEXT( ctx ); struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb; - struct r200_dma_region **component = rmesa->tcl.aos_components; GLuint nr = 0; GLuint vfmt0 = 0, vfmt1 = 0; GLuint count = VB->Count; GLuint i, emitsize; + // fprintf(stderr,"emit arrays\n"); for ( i = 0; i < 15; i++ ) { GLubyte attrib = vimap_rev[i]; if (attrib != 255) { @@ -416,20 +142,20 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev ) case 3: /* special handling to fix up fog. Will get us into trouble with vbos...*/ assert(attrib == VERT_ATTRIB_FOG); - if (!rmesa->tcl.vertex_data[i].buf) { + if (!rmesa->radeon.tcl.aos[i].bo) { if (ctx->VertexProgram._Enabled) - emit_vector( ctx, - &(rmesa->tcl.vertex_data[i]), - (char *)VB->AttribPtr[attrib]->data, - 1, - VB->AttribPtr[attrib]->stride, - count); + rcommon_emit_vector( ctx, + &(rmesa->radeon.tcl.aos[nr]), + (char *)VB->AttribPtr[attrib]->data, + 1, + VB->AttribPtr[attrib]->stride, + count); else - emit_vecfog( ctx, - &(rmesa->tcl.vertex_data[i]), - (char *)VB->AttribPtr[attrib]->data, - VB->AttribPtr[attrib]->stride, - count); + r200_emit_vecfog( ctx, + &(rmesa->radeon.tcl.aos[nr]), + (char *)VB->AttribPtr[attrib]->data, + VB->AttribPtr[attrib]->stride, + count); } vfmt0 |= R200_VTX_DISCRETE_FOG; goto after_emit; @@ -473,17 +199,17 @@ void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev ) default: assert(0); } - if (!rmesa->tcl.vertex_data[i].buf) { - emit_vector( ctx, - &(rmesa->tcl.vertex_data[i]), - (char *)VB->AttribPtr[attrib]->data, - emitsize, - VB->AttribPtr[attrib]->stride, - count ); + if (!rmesa->radeon.tcl.aos[nr].bo) { + rcommon_emit_vector( ctx, + &(rmesa->radeon.tcl.aos[nr]), + (char *)VB->AttribPtr[attrib]->data, + emitsize, + VB->AttribPtr[attrib]->stride, + count ); } after_emit: assert(nr < 12); - component[nr++] = &rmesa->tcl.vertex_data[i]; + nr++; } } @@ -494,19 +220,6 @@ after_emit: rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = vfmt1; } - rmesa->tcl.nr_aos_components = nr; + rmesa->radeon.tcl.aos_count = nr; } - -void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs ) -{ - r200ContextPtr rmesa = R200_CONTEXT( ctx ); - - /* only do it for changed inputs ? */ - int i; - for (i = 0; i < 15; i++) { - if (newinputs & (1 << i)) - r200ReleaseDmaRegion( rmesa, - &rmesa->tcl.vertex_data[i], __FUNCTION__ ); - } -} diff --git a/src/mesa/drivers/dri/r200/r200_pixel.c b/src/mesa/drivers/dri/r200/r200_pixel.c index 2797cbb3dc..654f2c6ae9 100644 --- a/src/mesa/drivers/dri/r200/r200_pixel.c +++ b/src/mesa/drivers/dri/r200/r200_pixel.c @@ -51,7 +51,7 @@ check_color( const GLcontext *ctx, GLenum type, GLenum format, const void *pixels, GLint sz, GLint pitch ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - GLuint cpp = rmesa->r200Screen->cpp; + GLuint cpp = rmesa->radeon.radeonScreen->cpp; if (R200_DEBUG & DEBUG_PIXEL) fprintf(stderr, "%s\n", __FUNCTION__); @@ -65,8 +65,8 @@ check_color( const GLcontext *ctx, GLenum type, GLenum format, return GL_FALSE; } - if ( type == GL_UNSIGNED_INT_8_8_8_8_REV && - cpp == 4 && + if ( type == GL_UNSIGNED_INT_8_8_8_8_REV && + cpp == 4 && format == GL_BGRA ) { if (R200_DEBUG & DEBUG_PIXEL) fprintf(stderr, "%s: passed 2\n", __FUNCTION__); @@ -83,7 +83,7 @@ static GLboolean check_color_per_fragment_ops( const GLcontext *ctx ) { int result; - result = (!( ctx->Color.AlphaEnabled || + result = (!( ctx->Color.AlphaEnabled || ctx->Depth.Test || ctx->Fog.Enabled || ctx->Scissor.Enabled || @@ -96,7 +96,7 @@ check_color_per_fragment_ops( const GLcontext *ctx ) ctx->Texture._EnabledUnits ) && ctx->Current.RasterPosValid); - + return result; } @@ -137,8 +137,8 @@ clip_pixelrect( const GLcontext *ctx, if (*height <= 0) return GL_FALSE; - *size = ((*y + *height - 1) * rmesa->r200Screen->frontPitch + - (*x + *width - 1) * rmesa->r200Screen->cpp); + *size = ((*y + *height - 1) * rmesa->radeon.radeonScreen->frontPitch + + (*x + *width - 1) * rmesa->radeon.radeonScreen->cpp); return GL_TRUE; } @@ -153,19 +153,20 @@ r200TryReadPixels( GLcontext *ctx, r200ContextPtr rmesa = R200_CONTEXT(ctx); GLint pitch = pack->RowLength ? pack->RowLength : width; GLint blit_format; - GLuint cpp = rmesa->r200Screen->cpp; + GLuint cpp = rmesa->radeon.radeonScreen->cpp; GLint size = width * height * cpp; + return GL_FALSE; +#if 0 if (R200_DEBUG & DEBUG_PIXEL) fprintf(stderr, "%s\n", __FUNCTION__); /* Only accelerate reading to GART buffers. */ - if ( !r200IsGartMemory(rmesa, pixels, - pitch * height * rmesa->r200Screen->cpp ) ) { + if ( !r200IsGartMemory(rmesa, pixels, + pitch * height * rmesa->radeon.radeonScreen->cpp ) ) { if (R200_DEBUG & DEBUG_PIXEL) fprintf(stderr, "%s: dest not GART\n", __FUNCTION__); - return GL_FALSE; } /* Need GL_PACK_INVERT_MESA to cope with upsidedown results from @@ -180,7 +181,7 @@ r200TryReadPixels( GLcontext *ctx, if (!check_color(ctx, type, format, pack, pixels, size, pitch)) return GL_FALSE; - switch ( rmesa->r200Screen->cpp ) { + switch ( rmesa->radeon.radeonScreen->cpp ) { case 4: blit_format = R200_CP_COLOR_FORMAT_ARGB8888; break; @@ -197,14 +198,14 @@ r200TryReadPixels( GLcontext *ctx, * a full command buffer expects to be called unlocked. As a * workaround, immediately flush the buffer on aquiring the lock. */ - LOCK_HARDWARE( rmesa ); + LOCK_HARDWARE( &rmesa->radeon ); if (rmesa->store.cmd_used) - r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); + rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ ); if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height, &size)) { - UNLOCK_HARDWARE( rmesa ); + UNLOCK_HARDWARE( &rmesa->radeon ); if (R200_DEBUG & DEBUG_PIXEL) fprintf(stderr, "%s totally clipped -- nothing to do\n", __FUNCTION__); @@ -212,18 +213,18 @@ r200TryReadPixels( GLcontext *ctx, } { - __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; driRenderbuffer *drb = (driRenderbuffer *) ctx->ReadBuffer->_ColorReadBuffer; int nbox = dPriv->numClipRects; int src_offset = drb->offset - + rmesa->r200Screen->fbLocation; + + rmesa->radeon.radeonScreen->fbLocation; int src_pitch = drb->pitch * drb->cpp; int dst_offset = r200GartOffsetFromVirtual( rmesa, pixels ); - int dst_pitch = pitch * rmesa->r200Screen->cpp; + int dst_pitch = pitch * rmesa->radeon.radeonScreen->cpp; drm_clip_rect_t *box = dPriv->pClipRects; int i; - r200EmitWait( rmesa, RADEON_WAIT_3D ); + r200EmitWait( rmesa, RADEON_WAIT_3D ); y = dPriv->h - y - height; x += dPriv->x; @@ -240,7 +241,7 @@ r200TryReadPixels( GLcontext *ctx, GLint by = box[i].y1; GLint bw = box[i].x2 - bx; GLint bh = box[i].y2 - by; - + if (bx < x) bw -= x - bx, bx = x; if (by < y) bh -= y - by, by = y; if (bx + bw > x + width) bw = x + width - bx; @@ -257,12 +258,12 @@ r200TryReadPixels( GLcontext *ctx, bw, bh ); } - r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); + rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ ); } - UNLOCK_HARDWARE( rmesa ); - - r200Finish( ctx ); /* required by GL */ + UNLOCK_HARDWARE( &rmesa->radeon ); + radeonFinish( ctx ); /* required by GL */ +#endif return GL_TRUE; } @@ -276,9 +277,9 @@ r200ReadPixels( GLcontext *ctx, if (R200_DEBUG & DEBUG_PIXEL) fprintf(stderr, "%s\n", __FUNCTION__); - if (!r200TryReadPixels( ctx, x, y, width, height, format, type, pack, + if (!r200TryReadPixels( ctx, x, y, width, height, format, type, pack, pixels)) - _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack, + _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack, pixels); } @@ -292,7 +293,7 @@ static void do_draw_pix( GLcontext *ctx, GLuint planemask) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); drm_clip_rect_t *box = dPriv->pClipRects; struct gl_renderbuffer *rb = ctx->ReadBuffer->_ColorDrawBuffers[0]; driRenderbuffer *drb = (driRenderbuffer *) rb; @@ -301,12 +302,12 @@ static void do_draw_pix( GLcontext *ctx, int blit_format; int size; int src_offset = r200GartOffsetFromVirtual( rmesa, pixels ); - int src_pitch = pitch * rmesa->r200Screen->cpp; + int src_pitch = pitch * rmesa->radeon.radeonScreen->cpp; if (R200_DEBUG & DEBUG_PIXEL) fprintf(stderr, "%s\n", __FUNCTION__); - - switch ( rmesa->r200Screen->cpp ) { +#if 0 + switch ( rmesa->radeon.radeonScreen->cpp ) { case 2: blit_format = R200_CP_COLOR_FORMAT_RGB565; break; @@ -318,17 +319,17 @@ static void do_draw_pix( GLcontext *ctx, } - LOCK_HARDWARE( rmesa ); + LOCK_HARDWARE( &rmesa->radeon ); if (rmesa->store.cmd_used) - r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); + rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ ); y -= height; /* cope with pixel zoom */ - + if (!clip_pixelrect(ctx, ctx->DrawBuffer, &x, &y, &width, &height, &size)) { - UNLOCK_HARDWARE( rmesa ); + UNLOCK_HARDWARE( &rmesa->radeon ); return; } @@ -357,15 +358,16 @@ static void do_draw_pix( GLcontext *ctx, blit_format, src_pitch, src_offset, drb->pitch * drb->cpp, - drb->offset + rmesa->r200Screen->fbLocation, + drb->offset + rmesa->radeon.radeonScreen->fbLocation, bx - x, by - y, bx, by, bw, bh ); } - r200FlushCmdBufLocked( rmesa, __FUNCTION__ ); - r200WaitForIdleLocked( rmesa ); /* required by GL */ - UNLOCK_HARDWARE( rmesa ); + rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ ); + radeonWaitForIdleLocked( &rmesa->radeon ); /* required by GL */ + UNLOCK_HARDWARE( &rmesa->radeon ); +#endif } @@ -381,7 +383,7 @@ r200TryDrawPixels( GLcontext *ctx, r200ContextPtr rmesa = R200_CONTEXT(ctx); GLint pitch = unpack->RowLength ? unpack->RowLength : width; GLuint planemask; - GLuint cpp = rmesa->r200Screen->cpp; + GLuint cpp = rmesa->radeon.radeonScreen->cpp; GLint size = height * pitch * cpp; if (R200_DEBUG & DEBUG_PIXEL) @@ -395,7 +397,7 @@ r200TryDrawPixels( GLcontext *ctx, case GL_RGB: case GL_RGBA: case GL_BGRA: - planemask = r200PackColor(cpp, + planemask = radeonPackColor(cpp, ctx->Color.ColorMask[RCOMP], ctx->Color.ColorMask[GCOMP], ctx->Color.ColorMask[BCOMP], @@ -407,7 +409,7 @@ r200TryDrawPixels( GLcontext *ctx, if (planemask != ~0) return GL_FALSE; /* fix me -- should be possible */ - /* Can't do conversions on GART reads/draws. + /* Can't do conversions on GART reads/draws. */ if ( !r200IsGartMemory( rmesa, pixels, size ) ) { if (R200_DEBUG & DEBUG_PIXEL) @@ -431,7 +433,7 @@ r200TryDrawPixels( GLcontext *ctx, return GL_FALSE; } - if ( r200IsGartMemory(rmesa, pixels, size) ) + if (0)// r200IsGartMemory(rmesa, pixels, size) ) { do_draw_pix( ctx, x, y, width, height, pitch, pixels, planemask ); return GL_TRUE; @@ -471,7 +473,7 @@ r200Bitmap( GLcontext *ctx, GLint px, GLint py, { r200ContextPtr rmesa = R200_CONTEXT(ctx); - if (rmesa->Fallback) + if (rmesa->radeon.Fallback) _swrast_Bitmap( ctx, px, py, width, height, unpack, bitmap ); else r200PointsBitmap( ctx, px, py, width, height, unpack, bitmap ); @@ -482,9 +484,9 @@ r200Bitmap( GLcontext *ctx, GLint px, GLint py, void r200InitPixelFuncs( GLcontext *ctx ) { if (!getenv("R200_NO_BLITS")) { - ctx->Driver.ReadPixels = r200ReadPixels; - ctx->Driver.DrawPixels = r200DrawPixels; - if (getenv("R200_HW_BITMAP")) + ctx->Driver.ReadPixels = r200ReadPixels; + ctx->Driver.DrawPixels = r200DrawPixels; + if (getenv("R200_HW_BITMAP")) ctx->Driver.Bitmap = r200Bitmap; } } diff --git a/src/mesa/drivers/dri/r200/r200_reg.h b/src/mesa/drivers/dri/r200/r200_reg.h index 5ce287f7a5..526a624b69 100644 --- a/src/mesa/drivers/dri/r200/r200_reg.h +++ b/src/mesa/drivers/dri/r200/r200_reg.h @@ -463,8 +463,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R200_VSC_UPDATE_USER_COLOR_1_ENABLE 0x00020000 /* gap */ #define R200_SE_TCL_VECTOR_INDX_REG 0x2200 +# define RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT 16 +# define RADEON_VEC_INDX_DWORD_COUNT_SHIFT 28 #define R200_SE_TCL_VECTOR_DATA_REG 0x2204 #define R200_SE_TCL_SCALAR_INDX_REG 0x2208 +# define RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT 16 #define R200_SE_TCL_SCALAR_DATA_REG 0x220c /* gap */ #define R200_SE_TCL_MATRIX_SEL_0 0x2230 @@ -949,6 +952,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R200_LOD_BIAS_MASK (0xfff80000) #define R200_LOD_BIAS_SHIFT 19 #define R200_PP_TXSIZE_0 0x2c0c /* NPOT only */ +#define R200_PP_TX_WIDTHMASK_SHIFT 0 +#define R200_PP_TX_HEIGHTMASK_SHIFT 16 + #define R200_PP_TXPITCH_0 0x2c10 /* NPOT only */ #define R200_PP_BORDER_COLOR_0 0x2c14 #define R200_PP_CUBIC_FACES_0 0x2c18 diff --git a/src/mesa/drivers/dri/r200/r200_span.c b/src/mesa/drivers/dri/r200/r200_span.c deleted file mode 100644 index 9783678028..0000000000 --- a/src/mesa/drivers/dri/r200/r200_span.c +++ /dev/null @@ -1,307 +0,0 @@ -/* -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "main/glheader.h" -#include "main/imports.h" -#include "main/colormac.h" -#include "swrast/swrast.h" - -#include "r200_context.h" -#include "r200_ioctl.h" -#include "r200_state.h" -#include "r200_span.h" -#include "r200_tex.h" - -#define DBG 0 - -/* - * Note that all information needed to access pixels in a renderbuffer - * should be obtained through the gl_renderbuffer parameter, not per-context - * information. - */ -#define LOCAL_VARS \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - const __DRIdrawablePrivate *dPriv = drb->dPriv; \ - const GLuint bottom = dPriv->h - 1; \ - GLubyte *buf = (GLubyte *) drb->flippedData \ - + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ - GLuint p; \ - (void) p; - -#define LOCAL_DEPTH_VARS \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - const __DRIdrawablePrivate *dPriv = drb->dPriv; \ - const GLuint bottom = dPriv->h - 1; \ - GLuint xo = dPriv->x; \ - GLuint yo = dPriv->y; \ - GLubyte *buf = (GLubyte *) drb->Base.Data; - -#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS - -#define Y_FLIP(Y) (bottom - (Y)) - -#define HW_LOCK() - -#define HW_UNLOCK() - - - -/* ================================================================ - * Color buffer - */ - -/* 16 bit, RGB565 color spanline and pixel functions - */ -#define SPANTMP_PIXEL_FMT GL_RGB -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 - -#define TAG(x) r200##x##_RGB565 -#define TAG2(x,y) r200##x##_RGB565##y -#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) -#include "spantmp2.h" - -/* 32 bit, ARGB8888 color spanline and pixel functions - */ -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) r200##x##_ARGB8888 -#define TAG2(x,y) r200##x##_ARGB8888##y -#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) -#include "spantmp2.h" - - -/* ================================================================ - * Depth buffer - */ - -/* The Radeon family has depth tiling on all the time, so we have to convert - * the x,y coordinates into the memory bus address (mba) in the same - * manner as the engine. In each case, the linear block address (ba) - * is calculated, and then wired with x and y to produce the final - * memory address. - * The chip will do address translation on its own if the surface registers - * are set up correctly. It is not quite enough to get it working with hyperz too... - */ - -/* extract bit 'b' of x, result is zero or one */ -#define BIT(x,b) ((x & (1<<b))>>b) - -static GLuint -r200_mba_z32( driRenderbuffer *drb, GLint x, GLint y ) -{ - GLuint pitch = drb->pitch; - if (drb->depthHasSurface) { - return 4 * (x + y * pitch); - } - else { - GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 5) + ((x & 0x7FF) >> 5); - GLuint a = - (BIT(x,0) << 2) | - (BIT(y,0) << 3) | - (BIT(x,1) << 4) | - (BIT(y,1) << 5) | - (BIT(x,3) << 6) | - (BIT(x,4) << 7) | - (BIT(x,2) << 8) | - (BIT(y,2) << 9) | - (BIT(y,3) << 10) | - (((pitch & 0x20) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) | - ((b >> 1) << 12); - return a; - } -} - -static GLuint -r200_mba_z16( driRenderbuffer *drb, GLint x, GLint y ) -{ - GLuint pitch = drb->pitch; - if (drb->depthHasSurface) { - return 2 * (x + y * pitch); - } - else { - GLuint b = ((y & 0x7FF) >> 4) * ((pitch & 0xFFF) >> 6) + ((x & 0x7FF) >> 6); - GLuint a = - (BIT(x,0) << 1) | - (BIT(y,0) << 2) | - (BIT(x,1) << 3) | - (BIT(y,1) << 4) | - (BIT(x,2) << 5) | - (BIT(x,4) << 6) | - (BIT(x,5) << 7) | - (BIT(x,3) << 8) | - (BIT(y,2) << 9) | - (BIT(y,3) << 10) | - (((pitch & 0x40) ? (b & 0x01) : ((b & 0x01) ^ (BIT(y,4)))) << 11) | - ((b >> 1) << 12); - return a; - } -} - - -/* 16-bit depth buffer functions - */ -#define VALUE_TYPE GLushort - -#define WRITE_DEPTH( _x, _y, d ) \ - *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )) = d; - -#define READ_DEPTH( d, _x, _y ) \ - d = *(GLushort *)(buf + r200_mba_z16( drb, _x + xo, _y + yo )); - -#define TAG(x) r200##x##_z16 -#include "depthtmp.h" - - -/* 24 bit depth, 8 bit stencil depthbuffer functions - */ -#define VALUE_TYPE GLuint - -#define WRITE_DEPTH( _x, _y, d ) \ -do { \ - GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - tmp &= 0xff000000; \ - tmp |= ((d) & 0x00ffffff); \ - *(GLuint *)(buf + offset) = tmp; \ -} while (0) - -#define READ_DEPTH( d, _x, _y ) \ - d = *(GLuint *)(buf + r200_mba_z32( drb, _x + xo, \ - _y + yo )) & 0x00ffffff; - -#define TAG(x) r200##x##_z24_s8 -#include "depthtmp.h" - - -/* ================================================================ - * Stencil buffer - */ - -/* 24 bit depth, 8 bit stencil depthbuffer functions - */ -#define WRITE_STENCIL( _x, _y, d ) \ -do { \ - GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - tmp &= 0x00ffffff; \ - tmp |= (((d) & 0xff) << 24); \ - *(GLuint *)(buf + offset) = tmp; \ -} while (0) - -#define READ_STENCIL( d, _x, _y ) \ -do { \ - GLuint offset = r200_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - tmp &= 0xff000000; \ - d = tmp >> 24; \ -} while (0) - -#define TAG(x) r200##x##_z24_s8 -#include "stenciltmp.h" - - -/* Move locking out to get reasonable span performance (10x better - * than doing this in HW_LOCK above). WaitForIdle() is the main - * culprit. - */ - -static void r200SpanRenderStart( GLcontext *ctx ) -{ - r200ContextPtr rmesa = R200_CONTEXT( ctx ); - - R200_FIREVERTICES( rmesa ); - LOCK_HARDWARE( rmesa ); - r200WaitForIdleLocked( rmesa ); - - /* Read & rewrite the first pixel in the frame buffer. This should - * be a noop, right? In fact without this conform fails as reading - * from the framebuffer sometimes produces old results -- the - * on-card read cache gets mixed up and doesn't notice that the - * framebuffer has been updated. - * - * In the worst case this is buggy too as p might get the wrong - * value first time, so really need a hidden pixel somewhere for this. - */ - { - int p; - driRenderbuffer *drb = - (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0]; - volatile int *buf = - (volatile int *)(rmesa->dri.screen->pFB + drb->offset); - p = *buf; - *buf = p; - } -} - -static void r200SpanRenderFinish( GLcontext *ctx ) -{ - r200ContextPtr rmesa = R200_CONTEXT( ctx ); - _swrast_flush( ctx ); - UNLOCK_HARDWARE( rmesa ); -} - -void r200InitSpanFuncs( GLcontext *ctx ) -{ - struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx); - swdd->SpanRenderStart = r200SpanRenderStart; - swdd->SpanRenderFinish = r200SpanRenderFinish; -} - - - -/** - * Plug in the Get/Put routines for the given driRenderbuffer. - */ -void -radeonSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis) -{ - if (drb->Base.InternalFormat == GL_RGBA) { - if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) { - r200InitPointers_RGB565(&drb->Base); - } - else { - r200InitPointers_ARGB8888(&drb->Base); - } - } - else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { - r200InitDepthPointers_z16(&drb->Base); - } - else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { - r200InitDepthPointers_z24_s8(&drb->Base); - } - else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { - r200InitStencilPointers_z24_s8(&drb->Base); - } -} diff --git a/src/mesa/drivers/dri/r200/r200_span.h b/src/mesa/drivers/dri/r200/r200_span.h deleted file mode 100644 index bae5644309..0000000000 --- a/src/mesa/drivers/dri/r200/r200_span.h +++ /dev/null @@ -1,45 +0,0 @@ -/* -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#ifndef __R200_SPAN_H__ -#define __R200_SPAN_H__ - -#include "drirenderbuffer.h" - -extern void r200InitSpanFuncs( GLcontext *ctx ); - -extern void -radeonSetSpanFunctions(driRenderbuffer *rb, const GLvisual *vis); - -#endif diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index 2fcc87c0f5..f8ebe0df57 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -47,6 +47,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/t_pipeline.h" #include "swrast_setup/swrast_setup.h" +#include "radeon_common.h" +#include "radeon_mipmap_tree.h" #include "r200_context.h" #include "r200_ioctl.h" #include "r200_state.h" @@ -77,7 +79,7 @@ static void r200AlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref ) switch ( func ) { case GL_NEVER: - pp_misc |= R200_ALPHA_TEST_FAIL; + pp_misc |= R200_ALPHA_TEST_FAIL; break; case GL_LESS: pp_misc |= R200_ALPHA_TEST_LESS; @@ -114,8 +116,8 @@ static void r200BlendColor( GLcontext *ctx, const GLfloat cf[4] ) CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]); CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]); CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]); - if (rmesa->r200Screen->drmSupportsBlendColor) - rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = r200PackColor( 4, color[0], color[1], color[2], color[3] ); + if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) + rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = radeonPackColor( 4, color[0], color[1], color[2], color[3] ); } /** @@ -213,7 +215,7 @@ static void r200_set_blend_state( GLcontext * ctx ) R200_STATECHANGE( rmesa, ctx ); - if (rmesa->r200Screen->drmSupportsBlendColor) { + if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) { if (ctx->Color.ColorLogicOpEnabled) { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = cntl | R200_ROP_ENABLE; rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = eqn | func; @@ -278,7 +280,7 @@ static void r200_set_blend_state( GLcontext * ctx ) return; } - if (!rmesa->r200Screen->drmSupportsBlendColor) { + if (!rmesa->radeon.radeonScreen->drmSupportsBlendColor) { rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = eqn | func; return; } @@ -383,10 +385,10 @@ static void r200ClearDepth( GLcontext *ctx, GLclampd d ) switch ( format ) { case R200_DEPTH_FORMAT_16BIT_INT_Z: - rmesa->state.depth.clear = d * 0x0000ffff; + rmesa->radeon.state.depth.clear = d * 0x0000ffff; break; case R200_DEPTH_FORMAT_24BIT_INT_Z: - rmesa->state.depth.clear = d * 0x00ffffff; + rmesa->radeon.state.depth.clear = d * 0x00ffffff; break; } } @@ -477,10 +479,10 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) } } break; - case GL_FOG_COLOR: + case GL_FOG_COLOR: R200_STATECHANGE( rmesa, ctx ); UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color ); - i = r200PackColor( 4, col[0], col[1], col[2], 0 ); + i = radeonPackColor( 4, col[0], col[1], col[2], 0 ); rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~R200_FOG_COLOR_MASK; rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= i; break; @@ -505,7 +507,7 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) if (out_0 != rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0]) { R200_STATECHANGE( rmesa, vtx ); - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = out_0; + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] = out_0; } break; @@ -521,102 +523,6 @@ static void r200Fogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) } } - -/* ============================================================= - * Scissoring - */ - - -static GLboolean intersect_rect( drm_clip_rect_t *out, - drm_clip_rect_t *a, - drm_clip_rect_t *b ) -{ - *out = *a; - if ( b->x1 > out->x1 ) out->x1 = b->x1; - if ( b->y1 > out->y1 ) out->y1 = b->y1; - if ( b->x2 < out->x2 ) out->x2 = b->x2; - if ( b->y2 < out->y2 ) out->y2 = b->y2; - if ( out->x1 >= out->x2 ) return GL_FALSE; - if ( out->y1 >= out->y2 ) return GL_FALSE; - return GL_TRUE; -} - - -void r200RecalcScissorRects( r200ContextPtr rmesa ) -{ - drm_clip_rect_t *out; - int i; - - /* Grow cliprect store? - */ - if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) { - while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) { - rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */ - rmesa->state.scissor.numAllocedClipRects *= 2; - } - - if (rmesa->state.scissor.pClipRects) - FREE(rmesa->state.scissor.pClipRects); - - rmesa->state.scissor.pClipRects = - MALLOC( rmesa->state.scissor.numAllocedClipRects * - sizeof(drm_clip_rect_t) ); - - if ( rmesa->state.scissor.pClipRects == NULL ) { - rmesa->state.scissor.numAllocedClipRects = 0; - return; - } - } - - out = rmesa->state.scissor.pClipRects; - rmesa->state.scissor.numClipRects = 0; - - for ( i = 0 ; i < rmesa->numClipRects ; i++ ) { - if ( intersect_rect( out, - &rmesa->pClipRects[i], - &rmesa->state.scissor.rect ) ) { - rmesa->state.scissor.numClipRects++; - out++; - } - } -} - - -static void r200UpdateScissor( GLcontext *ctx ) -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - - if ( rmesa->dri.drawable ) { - __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; - - int x = ctx->Scissor.X; - int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height; - int w = ctx->Scissor.X + ctx->Scissor.Width - 1; - int h = dPriv->h - ctx->Scissor.Y - 1; - - rmesa->state.scissor.rect.x1 = x + dPriv->x; - rmesa->state.scissor.rect.y1 = y + dPriv->y; - rmesa->state.scissor.rect.x2 = w + dPriv->x + 1; - rmesa->state.scissor.rect.y2 = h + dPriv->y + 1; - - r200RecalcScissorRects( rmesa ); - } -} - - -static void r200Scissor( GLcontext *ctx, - GLint x, GLint y, GLsizei w, GLsizei h ) -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - - if ( ctx->Scissor.Enabled ) { - R200_FIREVERTICES( rmesa ); /* don't pipeline cliprect changes */ - r200UpdateScissor( ctx ); - } - -} - - /* ============================================================= * Culling */ @@ -790,7 +696,7 @@ static void r200LineStipple( GLcontext *ctx, GLint factor, GLushort pattern ) r200ContextPtr rmesa = R200_CONTEXT(ctx); R200_STATECHANGE( rmesa, lin ); - rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern)); } @@ -803,7 +709,7 @@ static void r200ColorMask( GLcontext *ctx, GLboolean b, GLboolean a ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - GLuint mask = r200PackColor( rmesa->r200Screen->cpp, + GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp, ctx->Color.ColorMask[RCOMP], ctx->Color.ColorMask[GCOMP], ctx->Color.ColorMask[BCOMP], @@ -814,10 +720,10 @@ static void r200ColorMask( GLcontext *ctx, if (!(r && g && b && a)) flag |= R200_PLANE_MASK_ENABLE; - if ( rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] != flag ) { - R200_STATECHANGE( rmesa, ctx ); - rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = flag; - } + if ( rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] != flag ) { + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = flag; + } if ( rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK] != mask ) { R200_STATECHANGE( rmesa, msk ); @@ -834,7 +740,8 @@ static void r200PolygonOffset( GLcontext *ctx, GLfloat factor, GLfloat units ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - float_ui32_type constant = { units * rmesa->state.depth.scale }; + const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + float_ui32_type constant = { units * depthScale }; float_ui32_type factoru = { factor }; /* factor *= 2; */ @@ -861,15 +768,15 @@ static void r200PolygonStipple( GLcontext *ctx, const GLubyte *mask ) /* TODO: push this into cmd mechanism */ - R200_FIREVERTICES( rmesa ); - LOCK_HARDWARE( rmesa ); + radeon_firevertices(&rmesa->radeon); + LOCK_HARDWARE( &rmesa->radeon ); /* FIXME: Use window x,y offsets into stipple RAM. */ stipple.mask = rmesa->state.stipple.mask; - drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, + drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, &stipple, sizeof(stipple) ); - UNLOCK_HARDWARE( rmesa ); + UNLOCK_HARDWARE( &rmesa->radeon ); } static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) @@ -878,10 +785,10 @@ static void r200PolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0; /* Can't generally do unfilled via tcl, but some good special - * cases work. + * cases work. */ TCL_FALLBACK( ctx, R200_TCL_FALLBACK_UNFILLED, flag); - if (rmesa->TclFallback) { + if (rmesa->radeon.TclFallback) { r200ChooseRenderState( ctx ); r200ChooseVertexState( ctx ); } @@ -920,34 +827,34 @@ static void r200UpdateSpecular( GLcontext *ctx ) if (ctx->Light.Enabled && ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) { - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) | - (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); + (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0; rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1; rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE; p |= R200_SPECULAR_ENABLE; - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_DIFFUSE_SPECULAR_COMBINE; } else if (ctx->Light.Enabled) { - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= - ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)); rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_0; rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHTING_ENABLE; } else if (ctx->Fog.ColorSumEnabled ) { - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT) | - (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); + (R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); p |= R200_SPECULAR_ENABLE; } else { - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= - ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + ((R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT)); } if (ctx->Fog.Enabled) { - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= - ((R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_0] |= + ((R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT)); rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL] |= R200_OUTPUT_COLOR_1; } @@ -958,7 +865,7 @@ static void r200UpdateSpecular( GLcontext *ctx ) /* Update vertex/render formats */ - if (rmesa->TclFallback) { + if (rmesa->radeon.TclFallback) { r200ChooseRenderState( ctx ); r200ChooseVertexState( ctx ); } @@ -970,7 +877,7 @@ static void r200UpdateSpecular( GLcontext *ctx ) */ -/* Update on colormaterial, material emmissive/ambient, +/* Update on colormaterial, material emmissive/ambient, * lightmodel.globalambient */ static void update_global_ambient( GLcontext *ctx ) @@ -984,23 +891,23 @@ static void update_global_ambient( GLcontext *ctx ) */ if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] & ((3 << R200_FRONT_EMISSIVE_SOURCE_SHIFT) | - (3 << R200_FRONT_AMBIENT_SOURCE_SHIFT))) == 0) + (3 << R200_FRONT_AMBIENT_SOURCE_SHIFT))) == 0) { - COPY_3V( &fcmd[GLT_RED], + COPY_3V( &fcmd[GLT_RED], ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]); ACC_SCALE_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient, ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]); - } + } else { COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient ); } - + R200_DB_STATECHANGE(rmesa, &rmesa->hw.glt); } -/* Update on change to +/* Update on change to * - light[p].colors * - light[p].enabled */ @@ -1014,10 +921,10 @@ static void update_light_colors( GLcontext *ctx, GLuint p ) r200ContextPtr rmesa = R200_CONTEXT(ctx); float *fcmd = (float *)R200_DB_STATE( lit[p] ); - COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient ); + COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient ); COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse ); COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular ); - + R200_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] ); } } @@ -1037,7 +944,7 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) if (ctx->Light.ColorMaterialEnabled) { GLuint mask = ctx->Light.ColorMaterialBitmask; - + if (mask & MAT_BIT_FRONT_EMISSION) { light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << R200_FRONT_EMISSIVE_SOURCE_SHIFT); @@ -1053,7 +960,7 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_AMBIENT_SOURCE_SHIFT); - + if (mask & MAT_BIT_FRONT_DIFFUSE) { light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << R200_FRONT_DIFFUSE_SOURCE_SHIFT); @@ -1061,7 +968,7 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) else light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_DIFFUSE_SOURCE_SHIFT); - + if (mask & MAT_BIT_FRONT_SPECULAR) { light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << R200_FRONT_SPECULAR_SOURCE_SHIFT); @@ -1070,7 +977,7 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) light_model_ctl1 |= (R200_LM1_SOURCE_MATERIAL_0 << R200_FRONT_SPECULAR_SOURCE_SHIFT); } - + if (mask & MAT_BIT_BACK_EMISSION) { light_model_ctl1 |= (R200_LM1_SOURCE_VERTEX_COLOR_0 << R200_BACK_EMISSIVE_SOURCE_SHIFT); @@ -1120,8 +1027,8 @@ static void r200ColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) R200_STATECHANGE( rmesa, tcl ); rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_1] = light_model_ctl1; } - - + + } void r200UpdateMaterial( GLcontext *ctx ) @@ -1131,7 +1038,7 @@ void r200UpdateMaterial( GLcontext *ctx ) GLfloat *fcmd = (GLfloat *)R200_DB_STATE( mtl[0] ); GLfloat *fcmd2 = (GLfloat *)R200_DB_STATE( mtl[1] ); GLuint mask = ~0; - + /* Might be possible and faster to update everything unconditionally? */ if (ctx->Light.ColorMaterialEnabled) mask &= ~ctx->Light.ColorMaterialBitmask; @@ -1217,7 +1124,7 @@ void r200UpdateMaterial( GLcontext *ctx ) * * which are calculated in light.c and are correct for the current * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW - * and _MESA_NEW_NEED_EYE_COORDS. + * and _MESA_NEW_NEED_EYE_COORDS. */ static void update_light( GLcontext *ctx ) { @@ -1234,8 +1141,8 @@ static void update_light( GLcontext *ctx ) tmp &= ~R200_LIGHT_IN_MODELSPACE; else tmp |= R200_LIGHT_IN_MODELSPACE; - - if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]) + + if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]) { R200_STATECHANGE( rmesa, tcl ); rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] = tmp; @@ -1259,10 +1166,10 @@ static void update_light( GLcontext *ctx ) if (ctx->Light.Light[p].Enabled) { struct gl_light *l = &ctx->Light.Light[p]; GLfloat *fcmd = (GLfloat *)R200_DB_STATE( lit[p] ); - + if (l->EyePosition[3] == 0.0) { - COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); - COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); + COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); + COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); fcmd[LIT_POSITION_W] = 0; fcmd[LIT_DIRECTION_W] = 0; } else { @@ -1286,21 +1193,21 @@ static void r200Lightfv( GLcontext *ctx, GLenum light, GLint p = light - GL_LIGHT0; struct gl_light *l = &ctx->Light.Light[p]; GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd; - + switch (pname) { - case GL_AMBIENT: + case GL_AMBIENT: case GL_DIFFUSE: case GL_SPECULAR: update_light_colors( ctx, p ); break; - case GL_SPOT_DIRECTION: - /* picked up in update_light */ + case GL_SPOT_DIRECTION: + /* picked up in update_light */ break; case GL_POSITION: { - /* positions picked up in update_light, but can do flag here */ + /* positions picked up in update_light, but can do flag here */ GLuint flag = (p&1)? R200_LIGHT_1_IS_LOCAL : R200_LIGHT_0_IS_LOCAL; GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; @@ -1416,7 +1323,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname, r200ContextPtr rmesa = R200_CONTEXT(ctx); switch (pname) { - case GL_LIGHT_MODEL_AMBIENT: + case GL_LIGHT_MODEL_AMBIENT: update_global_ambient( ctx ); break; @@ -1430,7 +1337,7 @@ static void r200LightModelfv( GLcontext *ctx, GLenum pname, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] |= R200_LIGHT_TWOSIDE; else rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~(R200_LIGHT_TWOSIDE); - if (rmesa->TclFallback) { + if (rmesa->radeon.TclFallback) { r200ChooseRenderState( ctx ); r200ChooseVertexState( ctx ); } @@ -1675,7 +1582,7 @@ static void r200ClearStencil( GLcontext *ctx, GLint s ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - rmesa->state.stencil.clear = + rmesa->radeon.state.stencil.clear = ((GLuint) (ctx->Stencil.Clear & 0xff) | (0xff << R200_STENCIL_MASK_SHIFT) | ((ctx->Stencil.WriteMask[0] & 0xff) << R200_STENCIL_WRITEMASK_SHIFT)); @@ -1700,19 +1607,29 @@ static void r200ClearStencil( GLcontext *ctx, GLint s ) void r200UpdateWindow( GLcontext *ctx ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; - GLfloat xoffset = (GLfloat)dPriv->x; - GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; + GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; const GLfloat *v = ctx->Viewport._WindowMap.m; + const GLboolean render_to_fbo = (ctx->DrawBuffer ? (ctx->DrawBuffer->Name != 0) : 0); + const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + GLfloat y_scale, y_bias; + + if (render_to_fbo) { + y_scale = 1.0; + y_bias = 0; + } else { + y_scale = -1.0; + y_bias = yoffset; + } float_ui32_type sx = { v[MAT_SX] }; float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X }; - float_ui32_type sy = { - v[MAT_SY] }; - float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y }; - float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale }; - float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale }; + float_ui32_type sy = { v[MAT_SY] * y_scale }; + float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y }; + float_ui32_type sz = { v[MAT_SZ] * depthScale }; + float_ui32_type tz = { v[MAT_TZ] * depthScale }; - R200_FIREVERTICES( rmesa ); R200_STATECHANGE( rmesa, vpt ); rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE] = sx.ui32; @@ -1733,6 +1650,8 @@ static void r200Viewport( GLcontext *ctx, GLint x, GLint y, * values, or keep the originals hanging around. */ r200UpdateWindow( ctx ); + + radeon_viewport(ctx, x, y, width, height); } static void r200DepthRange( GLcontext *ctx, GLclampd nearval, @@ -1744,7 +1663,7 @@ static void r200DepthRange( GLcontext *ctx, GLclampd nearval, void r200UpdateViewportOffset( GLcontext *ctx ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = (GLfloat)dPriv->x; GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -1774,8 +1693,8 @@ void r200UpdateViewportOffset( GLcontext *ctx ) R200_STIPPLE_Y_OFFSET_MASK); /* add magic offsets, then invert */ - stx = 31 - ((rmesa->dri.drawable->x - 1) & R200_STIPPLE_COORD_MASK); - sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1) + stx = 31 - ((dPriv->x - 1) & R200_STIPPLE_COORD_MASK); + sty = 31 - ((dPriv->y + dPriv->h - 1) & R200_STIPPLE_COORD_MASK); m |= ((stx << R200_STIPPLE_X_OFFSET_SHIFT) | @@ -1788,7 +1707,7 @@ void r200UpdateViewportOffset( GLcontext *ctx ) } } - r200UpdateScissor( ctx ); + radeonUpdateScissor( ctx ); } @@ -1805,7 +1724,7 @@ static void r200ClearColor( GLcontext *ctx, const GLfloat c[4] ) CLAMPED_FLOAT_TO_UBYTE(color[1], c[1]); CLAMPED_FLOAT_TO_UBYTE(color[2], c[2]); CLAMPED_FLOAT_TO_UBYTE(color[3], c[3]); - rmesa->state.color.clear = r200PackColor( rmesa->r200Screen->cpp, + rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp, color[0], color[1], color[2], color[3] ); } @@ -1848,96 +1767,6 @@ static void r200LogicOpCode( GLcontext *ctx, GLenum opcode ) rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = r200_rop_tab[rop]; } - -/* - * Set up the cliprects for either front or back-buffer drawing. - */ -void r200SetCliprects( r200ContextPtr rmesa ) -{ - __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; - __DRIdrawablePrivate *const readable = rmesa->dri.readable; - GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate; - GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate; - - if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BIT_BACK_LEFT) { - /* Can't ignore 2d windows if we are page flipping. - */ - if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) { - rmesa->numClipRects = drawable->numClipRects; - rmesa->pClipRects = drawable->pClipRects; - } - else { - rmesa->numClipRects = drawable->numBackClipRects; - rmesa->pClipRects = drawable->pBackClipRects; - } - } - else { - /* front buffer (or none, or multiple buffers) */ - rmesa->numClipRects = drawable->numClipRects; - rmesa->pClipRects = drawable->pClipRects; - } - - if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) { - _mesa_resize_framebuffer(rmesa->glCtx, draw_fb, - drawable->w, drawable->h); - draw_fb->Initialized = GL_TRUE; - } - - if (drawable != readable) { - if ((read_fb->Width != readable->w) || - (read_fb->Height != readable->h)) { - _mesa_resize_framebuffer(rmesa->glCtx, read_fb, - readable->w, readable->h); - read_fb->Initialized = GL_TRUE; - } - } - - if (rmesa->state.scissor.enabled) - r200RecalcScissorRects( rmesa ); - - rmesa->lastStamp = drawable->lastStamp; -} - - -static void r200DrawBuffer( GLcontext *ctx, GLenum mode ) -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - - if (R200_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s %s\n", __FUNCTION__, - _mesa_lookup_enum_by_nr( mode )); - - R200_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */ - - if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) { - /* 0 (GL_NONE) buffers or multiple color drawing buffers */ - FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE ); - return; - } - - switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) { - case BUFFER_FRONT_LEFT: - case BUFFER_BACK_LEFT: - FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_FALSE ); - break; - default: - FALLBACK( rmesa, R200_FALLBACK_DRAW_BUFFER, GL_TRUE ); - return; - } - - r200SetCliprects( rmesa ); - - /* We'll set the drawing engine's offset/pitch parameters later - * when we update other state. - */ -} - - -static void r200ReadBuffer( GLcontext *ctx, GLenum mode ) -{ - /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */ -} - /* ============================================================= * State enable/disable */ @@ -1979,7 +1808,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_CLIP_PLANE2: case GL_CLIP_PLANE3: case GL_CLIP_PLANE4: - case GL_CLIP_PLANE5: + case GL_CLIP_PLANE5: p = cap-GL_CLIP_PLANE0; R200_STATECHANGE( rmesa, tcl ); if (state) { @@ -2013,10 +1842,10 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) R200_STATECHANGE(rmesa, ctx ); if ( state ) { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE; - rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable; + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable; } else { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_DITHER_ENABLE; - rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable; + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable; } break; @@ -2031,7 +1860,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] &= ~R200_TCL_FOG_MASK; } r200UpdateSpecular( ctx ); /* for PK_SPEC */ - if (rmesa->TclFallback) + if (rmesa->radeon.TclFallback) r200ChooseVertexState( ctx ); _mesa_allow_light_in_model( ctx, !state ); break; @@ -2046,13 +1875,13 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_LIGHT7: R200_STATECHANGE(rmesa, tcl); p = cap - GL_LIGHT0; - if (p&1) + if (p&1) flag = (R200_LIGHT_1_ENABLE | - R200_LIGHT_1_ENABLE_AMBIENT | + R200_LIGHT_1_ENABLE_AMBIENT | R200_LIGHT_1_ENABLE_SPECULAR); else flag = (R200_LIGHT_0_ENABLE | - R200_LIGHT_0_ENABLE_AMBIENT | + R200_LIGHT_0_ENABLE_AMBIENT | R200_LIGHT_0_ENABLE_SPECULAR); if (state) @@ -2060,7 +1889,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) else rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag; - /* + /* */ update_light_colors( ctx, p ); break; @@ -2068,7 +1897,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_LIGHTING: r200UpdateSpecular(ctx); /* for reflection map fixup - might set recheck_texgen for all units too */ - rmesa->NewGLState |= _NEW_TEXTURE; + rmesa->radeon.NewGLState |= _NEW_TEXTURE; break; case GL_LINE_SMOOTH: @@ -2181,21 +2010,30 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) } case GL_SCISSOR_TEST: - R200_FIREVERTICES( rmesa ); - rmesa->state.scissor.enabled = state; - r200UpdateScissor( ctx ); + radeon_firevertices(&rmesa->radeon); + rmesa->radeon.state.scissor.enabled = state; + radeonUpdateScissor( ctx ); break; case GL_STENCIL_TEST: - if ( rmesa->state.stencil.hwBuffer ) { - R200_STATECHANGE( rmesa, ctx ); - if ( state ) { - rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_STENCIL_ENABLE; + { + GLboolean hw_stencil = GL_FALSE; + if (ctx->DrawBuffer) { + struct radeon_renderbuffer *rrbStencil + = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); + hw_stencil = (rrbStencil && rrbStencil->bo); + } + + if (hw_stencil) { + R200_STATECHANGE( rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_STENCIL_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_STENCIL_ENABLE; + } } else { - rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~R200_STENCIL_ENABLE; + FALLBACK( rmesa, R200_FALLBACK_STENCIL, state ); } - } else { - FALLBACK( rmesa, R200_FALLBACK_STENCIL, state ); } break; @@ -2205,7 +2043,7 @@ static void r200Enable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_TEXTURE_GEN_T: /* Picked up in r200UpdateTextureState. */ - rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; + rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; break; case GL_COLOR_SUM_EXT: @@ -2322,7 +2160,7 @@ void r200LightingSpaceChange( GLcontext *ctx ) r200ContextPtr rmesa = R200_CONTEXT(ctx); GLboolean tmp; - if (R200_DEBUG & DEBUG_STATE) + if (R200_DEBUG & DEBUG_STATE) fprintf(stderr, "%s %d BEFORE %x\n", __FUNCTION__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]); @@ -2338,7 +2176,7 @@ void r200LightingSpaceChange( GLcontext *ctx ) rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0] &= ~R200_RESCALE_NORMALS; } - if (R200_DEBUG & DEBUG_STATE) + if (R200_DEBUG & DEBUG_STATE) fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL_0]); } @@ -2381,7 +2219,7 @@ static void update_texturematrix( GLcontext *ctx ) GLuint compsel = rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]; int unit; - if (R200_DEBUG & DEBUG_STATE) + if (R200_DEBUG & DEBUG_STATE) fprintf(stderr, "%s before COMPSEL: %x\n", __FUNCTION__, rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_COMPSEL]); @@ -2389,7 +2227,7 @@ static void update_texturematrix( GLcontext *ctx ) rmesa->TexMatCompSel = 0; for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) { - if (!ctx->Texture.Unit[unit]._ReallyEnabled) + if (!ctx->Texture.Unit[unit]._ReallyEnabled) continue; if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) { @@ -2399,21 +2237,21 @@ static void update_texturematrix( GLcontext *ctx ) rmesa->TexMatCompSel |= R200_OUTPUT_TEX_0 << unit; if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) { - /* Need to preconcatenate any active texgen + /* Need to preconcatenate any active texgen * obj/eyeplane matrices: */ _math_matrix_mul_matrix( &rmesa->tmpmat, - ctx->TextureMatrixStack[unit].Top, + ctx->TextureMatrixStack[unit].Top, &rmesa->TexGenMatrix[unit] ); upload_matrix( rmesa, rmesa->tmpmat.m, R200_MTX_TEX0+unit ); - } + } else { - upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m, + upload_matrix( rmesa, ctx->TextureMatrixStack[unit].Top->m, R200_MTX_TEX0+unit ); } } else if (rmesa->TexGenEnabled & (R200_TEXMAT_0_ENABLE << unit)) { - upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m, + upload_matrix( rmesa, rmesa->TexGenMatrix[unit].m, R200_MTX_TEX0+unit ); } } @@ -2432,69 +2270,78 @@ static void update_texturematrix( GLcontext *ctx ) } } - - -/** - * Tell the card where to render (offset, pitch). - * Effected by glDrawBuffer, etc - */ -void -r200UpdateDrawBuffer(GLcontext *ctx) +static GLboolean r200ValidateBuffers(GLcontext *ctx) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - struct gl_framebuffer *fb = ctx->DrawBuffer; - driRenderbuffer *drb; + struct radeon_renderbuffer *rrb; + int i; - if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { - /* draw to front */ - drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; - } - else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { - /* draw to back */ - drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + radeon_validate_reset_bos(&rmesa->radeon); + + rrb = radeon_get_colorbuffer(&rmesa->radeon); + /* color buffer */ + if (rrb && rrb->bo) { + radeon_validate_bo(&rmesa->radeon, rrb->bo, + 0, RADEON_GEM_DOMAIN_VRAM); } - else { - /* drawing to multiple buffers, or none */ - return; + + /* depth buffer */ + rrb = radeon_get_depthbuffer(&rmesa->radeon); + /* color buffer */ + if (rrb && rrb->bo) { + radeon_validate_bo(&rmesa->radeon, rrb->bo, + 0, RADEON_GEM_DOMAIN_VRAM); } - assert(drb); - assert(drb->flippedPitch); + for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) { + radeonTexObj *t; - R200_STATECHANGE( rmesa, ctx ); + if (!ctx->Texture.Unit[i]._ReallyEnabled) + continue; - /* Note: we used the (possibly) page-flipped values */ - rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] - = ((drb->flippedOffset + rmesa->r200Screen->fbLocation) - & R200_COLOROFFSET_MASK); - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch; - if (rmesa->sarea->tiling_enabled) { - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; + t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); + if (t->image_override && t->bo) + radeon_validate_bo(&rmesa->radeon, t->bo, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); + else if (t->mt->bo) + radeon_validate_bo(&rmesa->radeon, t->mt->bo, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); } -} + if (rmesa->radeon.dma.current) + radeon_validate_bo(&rmesa->radeon, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); + return radeon_revalidate_bos(ctx); +} -void r200ValidateState( GLcontext *ctx ) +GLboolean r200ValidateState( GLcontext *ctx ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - GLuint new_state = rmesa->NewGLState; + GLuint new_state = rmesa->radeon.NewGLState; - if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { - r200UpdateDrawBuffer(ctx); + if (new_state & _NEW_BUFFERS) { + _mesa_update_framebuffer(ctx); + /* this updates the DrawBuffer's Width/Height if it's a FBO */ + _mesa_update_draw_buffer_bounds(ctx); + + R200_STATECHANGE(rmesa, ctx); } - if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM)) { + if (new_state & (_NEW_TEXTURE | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)) { r200UpdateTextureState( ctx ); - new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */ + new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */ r200UpdateLocalViewer( ctx ); } + /* we need to do a space check here */ + if (!r200ValidateBuffers(ctx)) + return GL_FALSE; + /* FIXME: don't really need most of these when vertex progs are enabled */ /* Need an event driven matrix update? */ - if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) + if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, R200_MTX_MVP ); /* Need these for lighting (shouldn't upload otherwise) @@ -2518,11 +2365,12 @@ void r200ValidateState( GLcontext *ctx ) /* emit all active clip planes if projection matrix changes. */ if (new_state & (_NEW_PROJECTION)) { - if (ctx->Transform.ClipPlanesEnabled) + if (ctx->Transform.ClipPlanesEnabled) r200UpdateClipPlanes( ctx ); } if (new_state & (_NEW_PROGRAM| + _NEW_PROGRAM_CONSTANTS | /* need to test for pretty much anything due to possible parameter bindings */ _NEW_MODELVIEW|_NEW_PROJECTION|_NEW_TRANSFORM| _NEW_LIGHT|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX| @@ -2533,7 +2381,8 @@ void r200ValidateState( GLcontext *ctx ) else TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, 0); } - rmesa->NewGLState = 0; + rmesa->radeon.NewGLState = 0; + return GL_TRUE; } @@ -2544,7 +2393,7 @@ static void r200InvalidateState( GLcontext *ctx, GLuint new_state ) _vbo_InvalidateState( ctx, new_state ); _tnl_InvalidateState( ctx, new_state ); _ae_invalidate_state( ctx, new_state ); - R200_CONTEXT(ctx)->NewGLState |= new_state; + R200_CONTEXT(ctx)->radeon.NewGLState |= new_state; } /* A hack. The r200 can actually cope just fine with materials @@ -2573,12 +2422,13 @@ static void r200WrapRunPipeline( GLcontext *ctx ) GLboolean has_material; if (0) - fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState); + fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState); /* Validate state: */ - if (rmesa->NewGLState) - r200ValidateState( ctx ); + if (rmesa->radeon.NewGLState) + if (!r200ValidateState( ctx )) + FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE); has_material = !ctx->VertexProgram._Enabled && ctx->Light.Enabled && check_material( ctx ); @@ -2587,7 +2437,7 @@ static void r200WrapRunPipeline( GLcontext *ctx ) } /* Run the pipeline. - */ + */ _tnl_run_pipeline( ctx ); if (has_material) { @@ -2603,8 +2453,8 @@ void r200InitStateFuncs( struct dd_function_table *functions ) functions->UpdateState = r200InvalidateState; functions->LightingSpaceChange = r200LightingSpaceChange; - functions->DrawBuffer = r200DrawBuffer; - functions->ReadBuffer = r200ReadBuffer; + functions->DrawBuffer = radeonDrawBuffer; + functions->ReadBuffer = radeonReadBuffer; functions->AlphaFunc = r200AlphaFunc; functions->BlendColor = r200BlendColor; @@ -2636,7 +2486,7 @@ void r200InitStateFuncs( struct dd_function_table *functions ) functions->PointParameterfv = r200PointParameter; functions->PointSize = r200PointSize; functions->RenderMode = r200RenderMode; - functions->Scissor = r200Scissor; + functions->Scissor = radeonScissor; functions->ShadeModel = r200ShadeModel; functions->StencilFuncSeparate = r200StencilFuncSeparate; functions->StencilMaskSeparate = r200StencilMaskSeparate; diff --git a/src/mesa/drivers/dri/r200/r200_state.h b/src/mesa/drivers/dri/r200/r200_state.h index a917163a00..23cf8aea66 100644 --- a/src/mesa/drivers/dri/r200/r200_state.h +++ b/src/mesa/drivers/dri/r200/r200_state.h @@ -43,23 +43,17 @@ extern void r200InitTnlFuncs( GLcontext *ctx ); extern void r200UpdateMaterial( GLcontext *ctx ); -extern void r200SetCliprects( r200ContextPtr rmesa ); -extern void r200RecalcScissorRects( r200ContextPtr rmesa ); extern void r200UpdateViewportOffset( GLcontext *ctx ); extern void r200UpdateWindow( GLcontext *ctx ); extern void r200UpdateDrawBuffer(GLcontext *ctx); -extern void r200ValidateState( GLcontext *ctx ); - -extern void r200PrintDirty( r200ContextPtr rmesa, - const char *msg ); - +extern GLboolean r200ValidateState( GLcontext *ctx ); extern void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ); #define FALLBACK( rmesa, bit, mode ) do { \ if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ __FUNCTION__, bit, mode ); \ - r200Fallback( rmesa->glCtx, bit, mode ); \ + r200Fallback( rmesa->radeon.glCtx, bit, mode ); \ } while (0) extern void r200LightingSpaceChange( GLcontext *ctx ); diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index 9e4677eda4..c7df4b2587 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -43,6 +43,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/t_pipeline.h" #include "swrast_setup/swrast_setup.h" +#include "radeon_common.h" +#include "radeon_mipmap_tree.h" #include "r200_context.h" #include "r200_ioctl.h" #include "r200_state.h" @@ -52,31 +54,129 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "xmlpool.h" +/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in + * 1.3 cmdbuffers allow all previous state to be updated as well as + * the tcl scalar and vector areas. + */ +static struct { + int start; + int len; + const char *name; +} packet[RADEON_MAX_STATE_PACKETS] = { + {RADEON_PP_MISC, 7, "RADEON_PP_MISC"}, + {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"}, + {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"}, + {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"}, + {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"}, + {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"}, + {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"}, + {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"}, + {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"}, + {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"}, + {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"}, + {RADEON_RE_MISC, 1, "RADEON_RE_MISC"}, + {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"}, + {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"}, + {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"}, + {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"}, + {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"}, + {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"}, + {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"}, + {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"}, + {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17, + "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"}, + {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"}, + {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"}, + {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"}, + {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"}, + {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"}, + {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"}, + {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"}, + {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"}, + {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"}, + {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"}, + {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"}, + {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"}, + {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"}, + {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"}, + {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"}, + {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"}, + {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"}, + {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"}, + {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"}, + {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"}, + {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"}, + {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"}, + {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"}, + {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"}, + {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"}, + {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"}, + {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"}, + {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"}, + {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, + "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"}, + {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"}, + {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"}, + {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"}, + {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"}, + {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"}, + {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"}, + {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"}, + {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"}, + {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"}, + {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"}, + {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, + "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"}, + {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */ + {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */ + {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"}, + {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"}, + {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"}, + {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"}, + {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"}, + {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"}, + {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"}, + {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"}, + {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"}, + {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"}, + {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"}, + {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"}, + {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"}, + {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"}, + {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"}, + {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"}, + {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"}, + {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"}, + {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"}, + {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"}, + {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"}, + {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"}, + {R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"}, /* 85 */ + {R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"}, + {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"}, + {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"}, + {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"}, + {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"}, + {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"}, + {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"}, + {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"}, + {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"}, +}; + /* ============================================================= * State initialization */ - -void r200PrintDirty( r200ContextPtr rmesa, const char *msg ) +static int cmdpkt( r200ContextPtr rmesa, int id ) { - struct r200_state_atom *l; - - fprintf(stderr, msg); - fprintf(stderr, ": "); + drm_radeon_cmd_header_t h; - foreach(l, &rmesa->hw.atomlist) { - if (l->dirty || rmesa->hw.all_dirty) - fprintf(stderr, "%s, ", l->name); + if (rmesa->radeon.radeonScreen->kernel_mm) { + return CP_PACKET0(packet[id].start, packet[id].len - 1); + } else { + h.i = 0; + h.packet.cmd_type = RADEON_CMD_PACKET; + h.packet.packet_id = id; } - - fprintf(stderr, "\n"); -} - -static int cmdpkt( int id ) -{ - drm_radeon_cmd_header_t h; - h.i = 0; - h.packet.cmd_type = RADEON_CMD_PACKET; - h.packet.packet_id = id; return h.i; } @@ -127,150 +227,493 @@ static int cmdscl2( int offset, int stride, int count ) } #define CHECK( NM, FLAG ) \ -static GLboolean check_##NM( GLcontext *ctx, int idx ) \ +static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \ { \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ - (void) idx; \ (void) rmesa; \ - return FLAG; \ + return (FLAG) ? atom->cmd_size : 0; \ } #define TCL_CHECK( NM, FLAG ) \ -static GLboolean check_##NM( GLcontext *ctx, int idx ) \ -{ \ - r200ContextPtr rmesa = R200_CONTEXT(ctx); \ - (void) idx; \ - return !rmesa->TclFallback && !ctx->VertexProgram._Enabled && (FLAG); \ +static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom) \ +{ \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ + return (!rmesa->radeon.TclFallback && !ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \ } #define TCL_OR_VP_CHECK( NM, FLAG ) \ -static GLboolean check_##NM( GLcontext *ctx, int idx ) \ +static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ { \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ - (void) idx; \ - return !rmesa->TclFallback && (FLAG); \ + return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0; \ } #define VP_CHECK( NM, FLAG ) \ -static GLboolean check_##NM( GLcontext *ctx, int idx ) \ -{ \ - r200ContextPtr rmesa = R200_CONTEXT(ctx); \ - (void) idx; \ - return !rmesa->TclFallback && ctx->VertexProgram._Enabled && (FLAG); \ +static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ +{ \ + r200ContextPtr rmesa = R200_CONTEXT(ctx); \ + (void) atom; \ + return (!rmesa->radeon.TclFallback && ctx->VertexProgram._Enabled && (FLAG)) ? atom->cmd_size : 0; \ } - CHECK( always, GL_TRUE ) CHECK( never, GL_FALSE ) CHECK( tex_any, ctx->Texture._EnabledUnits ) CHECK( tf, (ctx->Texture._EnabledUnits && !ctx->ATIFragmentShader._Enabled) ); -CHECK( tex_pair, (rmesa->state.texture.unit[idx].unitneeded | rmesa->state.texture.unit[idx & ~1].unitneeded) ) -CHECK( tex, rmesa->state.texture.unit[idx].unitneeded ) +CHECK( tex_pair, (rmesa->state.texture.unit[atom->idx].unitneeded | rmesa->state.texture.unit[atom->idx & ~1].unitneeded) ) +CHECK( tex, rmesa->state.texture.unit[atom->idx].unitneeded ) CHECK( pix_zero, !ctx->ATIFragmentShader._Enabled ) -CHECK( texenv, (rmesa->state.envneeded & (1 << idx) && !ctx->ATIFragmentShader._Enabled) ) + CHECK( texenv, (rmesa->state.envneeded & (1 << (atom->idx)) && !ctx->ATIFragmentShader._Enabled) ) CHECK( afs_pass1, (ctx->ATIFragmentShader._Enabled && (ctx->ATIFragmentShader.Current->NumPasses > 1)) ) CHECK( afs, ctx->ATIFragmentShader._Enabled ) -CHECK( tex_cube, rmesa->state.texture.unit[idx].unitneeded & TEXTURE_CUBE_BIT ) +CHECK( tex_cube, rmesa->state.texture.unit[atom->idx].unitneeded & TEXTURE_CUBE_BIT ) TCL_CHECK( tcl_fog, ctx->Fog.Enabled ) TCL_CHECK( tcl, GL_TRUE ) -TCL_CHECK( tcl_tex, rmesa->state.texture.unit[idx].unitneeded ) +TCL_CHECK( tcl_tex, rmesa->state.texture.unit[atom->idx].unitneeded ) TCL_CHECK( tcl_lighting, ctx->Light.Enabled ) -TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[idx].Enabled ) -TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << idx)) ) +TCL_CHECK( tcl_light, ctx->Light.Enabled && ctx->Light.Light[atom->idx].Enabled ) +TCL_OR_VP_CHECK( tcl_ucp, (ctx->Transform.ClipPlanesEnabled & (1 << (atom->idx))) ) TCL_OR_VP_CHECK( tcl_or_vp, GL_TRUE ) VP_CHECK( tcl_vp, GL_TRUE ) VP_CHECK( tcl_vp_size, ctx->VertexProgram.Current->Base.NumNativeInstructions > 64 ) VP_CHECK( tcl_vpp_size, ctx->VertexProgram.Current->Base.NumNativeParameters > 96 ) +#define OUT_VEC(hdr, data) do { \ + drm_radeon_cmd_header_t h; \ + h.i = hdr; \ + OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \ + OUT_BATCH(0); \ + OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \ + OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \ + OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1)); \ + OUT_BATCH_TABLE((data), h.vectors.count); \ + } while(0) + +#define OUT_VECLINEAR(hdr, data) do { \ + drm_radeon_cmd_header_t h; \ + uint32_t _start, _sz; \ + h.i = hdr; \ + _start = h.veclinear.addr_lo | (h.veclinear.addr_hi << 8); \ + _sz = h.veclinear.count * 4; \ + if (r200->radeon.radeonScreen->kernel_mm && _sz) { \ + BEGIN_BATCH_NO_AUTOSTATE(dwords); \ + OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \ + OUT_BATCH(0); \ + OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \ + OUT_BATCH(_start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \ + OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, _sz - 1)); \ + OUT_BATCH_TABLE((data), _sz); \ + END_BATCH(); \ + } \ + } while(0) + +#define OUT_SCL(hdr, data) do { \ + drm_radeon_cmd_header_t h; \ + h.i = hdr; \ + OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0)); \ + OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \ + OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \ + OUT_BATCH_TABLE((data), h.scalars.count); \ + } while(0) + +#define OUT_SCL2(hdr, data) do { \ + drm_radeon_cmd_header_t h; \ + h.i = hdr; \ + OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0)); \ + OUT_BATCH((h.scalars.offset + 0x100) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \ + OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \ + OUT_BATCH_TABLE((data), h.scalars.count); \ + } while(0) + +static void mtl_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + uint32_t dwords = atom->cmd_size; + + dwords += 6; + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_VEC(atom->cmd[MTL_CMD_0], (atom->cmd+1)); + OUT_SCL2(atom->cmd[MTL_CMD_1], (atom->cmd + 18)); + END_BATCH(); +} -/* Initialize the context's hardware state. - */ -void r200InitState( r200ContextPtr rmesa ) +static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom) { - GLcontext *ctx = rmesa->glCtx; - GLuint color_fmt, depth_fmt, i; - GLint drawPitch, drawOffset; + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + uint32_t dwords = atom->cmd_size; + + dwords += 8; + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1); + OUT_VEC(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1); + END_BATCH(); +} - switch ( rmesa->r200Screen->cpp ) { - case 2: - color_fmt = R200_COLOR_FORMAT_RGB565; - break; - case 4: - color_fmt = R200_COLOR_FORMAT_ARGB8888; - break; - default: - fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" ); - exit( -1 ); +static void ptp_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + uint32_t dwords = atom->cmd_size; + + dwords += 8; + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_VEC(atom->cmd[PTP_CMD_0], atom->cmd+1); + OUT_VEC(atom->cmd[PTP_CMD_1], atom->cmd+PTP_CMD_1+1); + END_BATCH(); +} + +static void veclinear_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + uint32_t dwords = atom->cmd_size; + + dwords += 4; + OUT_VECLINEAR(atom->cmd[0], atom->cmd+1); +} + +static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + uint32_t dwords = atom->cmd_size; + + dwords += 2; + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_SCL(atom->cmd[0], atom->cmd+1); + END_BATCH(); +} + + +static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + uint32_t dwords = atom->cmd_size; + + dwords += 4; + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_VEC(atom->cmd[0], atom->cmd+1); + END_BATCH(); +} + +static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + struct radeon_renderbuffer *rrb; + uint32_t cbpitch; + uint32_t zbpitch, depth_fmt; + uint32_t dwords = atom->cmd_size; + + /* output the first 7 bytes of context */ + BEGIN_BATCH_NO_AUTOSTATE(dwords+2+2); + OUT_BATCH_TABLE(atom->cmd, 5); + + rrb = radeon_get_depthbuffer(&r200->radeon); + if (!rrb) { + OUT_BATCH(0); + OUT_BATCH(0); + } else { + zbpitch = (rrb->pitch / rrb->cpp); + if (r200->using_hyperz) + zbpitch |= RADEON_DEPTH_HYPERZ; + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH(zbpitch); + if (rrb->cpp == 4) + depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; + else + depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; + atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK; + atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; + } + + OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]); + OUT_BATCH(atom->cmd[CTX_CMD_1]); + OUT_BATCH(atom->cmd[CTX_PP_CNTL]); + + rrb = radeon_get_colorbuffer(&r200->radeon); + if (!rrb || !rrb->bo) { + OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); + OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]); + } else { + atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); + if (rrb->cpp == 4) + atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; + else + atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; + + OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); } - rmesa->state.color.clear = 0x00000000; + OUT_BATCH(atom->cmd[CTX_CMD_2]); - switch ( ctx->Visual.depthBits ) { - case 16: - rmesa->state.depth.clear = 0x0000ffff; - rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff; - depth_fmt = R200_DEPTH_FORMAT_16BIT_INT_Z; - rmesa->state.stencil.clear = 0x00000000; - break; - case 24: - rmesa->state.depth.clear = 0x00ffffff; - rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff; - depth_fmt = R200_DEPTH_FORMAT_24BIT_INT_Z; - rmesa->state.stencil.clear = 0xffff0000; - break; - default: - fprintf( stderr, "Error: Unsupported depth %d... exiting\n", - ctx->Visual.depthBits ); - exit( -1 ); + if (!rrb || !rrb->bo) { + OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]); + } else { + cbpitch = (rrb->pitch / rrb->cpp); + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + cbpitch |= R200_COLOR_TILE_ENABLE; + OUT_BATCH(cbpitch); } - /* Only have hw stencil when depth buffer is 24 bits deep */ - rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 && - ctx->Visual.depthBits == 24 ); + if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) + OUT_BATCH_TABLE((atom->cmd + 14), 4); + + END_BATCH(); +} - rmesa->Fallback = 0; +static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + struct radeon_renderbuffer *rrb, *drb; + uint32_t cbpitch = 0; + uint32_t zbpitch = 0; + uint32_t dwords = atom->cmd_size; + uint32_t depth_fmt; + + rrb = radeon_get_colorbuffer(&r200->radeon); + if (!rrb || !rrb->bo) { + return; + } - if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) { - drawOffset = rmesa->r200Screen->backOffset; - drawPitch = rmesa->r200Screen->backPitch; - } else { - drawOffset = rmesa->r200Screen->frontOffset; - drawPitch = rmesa->r200Screen->frontPitch; + atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); + if (rrb->cpp == 4) + atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; + else + atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; + + cbpitch = (rrb->pitch / rrb->cpp); + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + cbpitch |= R200_COLOR_TILE_ENABLE; + + drb = radeon_get_depthbuffer(&r200->radeon); + if (drb) { + zbpitch = (drb->pitch / drb->cpp); + if (drb->cpp == 4) + depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; + else + depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; + atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK; + atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; + } + + dwords = 10; + if (drb) + dwords += 6; + if (rrb) + dwords += 6; + if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) + dwords += 4; + + /* output the first 7 bytes of context */ + BEGIN_BATCH_NO_AUTOSTATE(dwords); + + /* In the CS case we need to split this up */ + OUT_BATCH(CP_PACKET0(packet[0].start, 3)); + OUT_BATCH_TABLE((atom->cmd + 1), 4); + + if (drb) { + OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0)); + OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + + OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0)); + OUT_BATCH(zbpitch); } -#if 000 - if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) { - rmesa->state.color.drawOffset = rmesa->r200Screen->backOffset; - rmesa->state.color.drawPitch = rmesa->r200Screen->backPitch; + + OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0)); + OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]); + OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1)); + OUT_BATCH(atom->cmd[CTX_PP_CNTL]); + OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); + + + if (rrb) { + OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0)); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + + OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); + OUT_BATCH(cbpitch); + } + + if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) { + OUT_BATCH_TABLE((atom->cmd + 14), 4); + } + + END_BATCH(); +} + +static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + uint32_t dwords = atom->cmd_size; + int i = atom->idx; + radeonTexObj *t = r200->state.texture.unit[i].texobj; + radeon_mipmap_level *lvl; + + if (t && t->mt && !t->image_override) + dwords += 2; + BEGIN_BATCH_NO_AUTOSTATE(dwords); + /* is this ok even with drm older than 1.18? */ + OUT_BATCH_TABLE(atom->cmd, 10); + + if (t && t->mt && !t->image_override) { + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + } else if (!t) { + /* workaround for old CS mechanism */ + OUT_BATCH(r200->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]); } else { - rmesa->state.color.drawOffset = rmesa->r200Screen->frontOffset; - rmesa->state.color.drawPitch = rmesa->r200Screen->frontPitch; + OUT_BATCH(t->override_offset); } - rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset; - rmesa->state.pixel.readPitch = rmesa->state.color.drawPitch; -#endif + END_BATCH(); +} + +static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + uint32_t dwords = atom->cmd_size; + int i = atom->idx; + radeonTexObj *t = r200->state.texture.unit[i].texobj; + radeon_mipmap_level *lvl; + int hastexture = 1; + + if (!r200->state.texture.unit[i].unitneeded) + hastexture = 0; + if (!t) + hastexture = 0; + else { + if (!t->mt && !t->bo) + hastexture = 0; + } - rmesa->hw.max_state_size = 0; + if (hastexture) + dwords += 2; + else + dwords -= 2; + BEGIN_BATCH_NO_AUTOSTATE(dwords); + + OUT_BATCH(CP_PACKET0(R200_PP_TXFILTER_0 + (32 * i), 7)); + OUT_BATCH_TABLE((atom->cmd + 1), 8); + + if (hastexture) { + OUT_BATCH(CP_PACKET0(R200_PP_TXOFFSET_0 + (24 * i), 0)); + if (t->mt && !t->image_override) { + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + } else { + if (t->bo) + OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + } + } + END_BATCH(); +} + + +static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + uint32_t dwords = 3; + int i = atom->idx, j; + radeonTexObj *t = r200->state.texture.unit[i].texobj; + radeon_mipmap_level *lvl; + + BEGIN_BATCH_NO_AUTOSTATE(dwords + (3 * 5)); + /* XXX that size won't really match with image_override... */ + OUT_BATCH_TABLE(atom->cmd, 2); + + if (t && !t->image_override) { + lvl = &t->mt->levels[0]; + OUT_BATCH_TABLE((atom->cmd + 2), 1); + for (j = 1; j <= 5; j++) { + OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset, + RADEON_GEM_DOMAIN_VRAM, 0, 0); + } + } + END_BATCH(); +} + +static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r200ContextPtr r200 = R200_CONTEXT(ctx); + BATCH_LOCALS(&r200->radeon); + uint32_t dwords = 2; + int i = atom->idx, j; + radeonTexObj *t = r200->state.texture.unit[i].texobj; + radeon_mipmap_level *lvl; + + BEGIN_BATCH_NO_AUTOSTATE(dwords + (4 * 5)); + OUT_BATCH_TABLE(atom->cmd, 2); + + if (t && !t->image_override) { + lvl = &t->mt->levels[0]; + for (j = 1; j <= 5; j++) { + OUT_BATCH(CP_PACKET0(R200_PP_CUBIC_OFFSET_F1_0 + (24*i) + (4 * (j-1)), 0)); + OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset, + RADEON_GEM_DOMAIN_VRAM, 0, 0); + } + } + END_BATCH(); +} + +/* Initialize the context's hardware state. + */ +void r200InitState( r200ContextPtr rmesa ) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + GLuint i; + + rmesa->radeon.state.color.clear = 0x00000000; + + switch ( ctx->Visual.depthBits ) { + case 16: + rmesa->radeon.state.depth.clear = 0x0000ffff; + rmesa->radeon.state.stencil.clear = 0x00000000; + break; + case 24: + default: + rmesa->radeon.state.depth.clear = 0x00ffffff; + rmesa->radeon.state.stencil.clear = 0xffff0000; + break; + } + + rmesa->radeon.Fallback = 0; + + rmesa->radeon.hw.max_state_size = 0; #define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX ) \ do { \ rmesa->hw.ATOM.cmd_size = SZ; \ - rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int)); \ - rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int)); \ + rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int)); \ + rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \ rmesa->hw.ATOM.name = NM; \ rmesa->hw.ATOM.idx = IDX; \ rmesa->hw.ATOM.check = check_##CHK; \ rmesa->hw.ATOM.dirty = GL_FALSE; \ - rmesa->hw.max_state_size += SZ * sizeof(int); \ + rmesa->radeon.hw.max_state_size += SZ * sizeof(int); \ } while (0) /* Allocate state buffers: */ - if (rmesa->r200Screen->drmSupportsBlendColor) + if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) ALLOC_STATE( ctx, always, CTX_STATE_SIZE_NEWDRM, "CTX/context", 0 ); else ALLOC_STATE( ctx, always, CTX_STATE_SIZE_OLDDRM, "CTX/context", 0 ); + + if (rmesa->radeon.radeonScreen->kernel_mm) + rmesa->hw.ctx.emit = ctx_emit_cs; + else + rmesa->hw.ctx.emit = ctx_emit; ALLOC_STATE( set, always, SET_STATE_SIZE, "SET/setup", 0 ); ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 ); ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 ); @@ -282,8 +725,8 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( cst, always, CST_STATE_SIZE, "CST/constant", 0 ); ALLOC_STATE( zbs, always, ZBS_STATE_SIZE, "ZBS/zbias", 0 ); ALLOC_STATE( tf, tf, TF_STATE_SIZE, "TF/tfactor", 0 ); - if (rmesa->r200Screen->drmSupportsFragShader) { - if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) { + if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { + if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { /* make sure texture units 0/1 are emitted pair-wise for r200 t0 hang workaround */ ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-0", 0 ); ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_NEWDRM, "TEX/tex-1", 1 ); @@ -303,7 +746,7 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( afs[1], afs, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); } else { - if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) { + if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { ALLOC_STATE( tex[0], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-0", 0 ); ALLOC_STATE( tex[1], tex_pair, TEX_STATE_SIZE_OLDDRM, "TEX/tex-1", 1 ); ALLOC_STATE( tam, tex_any, TAM_STATE_SIZE, "TAM/tam", 0 ); @@ -321,13 +764,24 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( afs[0], never, AFS_STATE_SIZE, "AFS/afsinst-0", 0 ); ALLOC_STATE( afs[1], never, AFS_STATE_SIZE, "AFS/afsinst-1", 1 ); } - if (rmesa->r200Screen->drmSupportsCubeMapsR200) { + + for (i = 0; i < 5; i++) + if (rmesa->radeon.radeonScreen->kernel_mm) + rmesa->hw.tex[i].emit = tex_emit_cs; + else + rmesa->hw.tex[i].emit = tex_emit; + if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR200) { ALLOC_STATE( cube[0], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-0", 0 ); ALLOC_STATE( cube[1], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-1", 1 ); ALLOC_STATE( cube[2], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-2", 2 ); ALLOC_STATE( cube[3], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-3", 3 ); ALLOC_STATE( cube[4], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-4", 4 ); ALLOC_STATE( cube[5], tex_cube, CUBE_STATE_SIZE, "CUBE/tex-5", 5 ); + for (i = 0; i < 5; i++) + if (rmesa->radeon.radeonScreen->kernel_mm) + rmesa->hw.cube[i].emit = cube_emit_cs; + else + rmesa->hw.cube[i].emit = cube_emit; } else { ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/tex-0", 0 ); @@ -337,7 +791,8 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( cube[4], never, CUBE_STATE_SIZE, "CUBE/tex-4", 4 ); ALLOC_STATE( cube[5], never, CUBE_STATE_SIZE, "CUBE/tex-5", 5 ); } - if (rmesa->r200Screen->drmSupportsVertexProgram) { + + if (rmesa->radeon.radeonScreen->drmSupportsVertexProgram) { ALLOC_STATE( pvs, tcl_vp, PVS_STATE_SIZE, "PVS/pvscntl", 0 ); ALLOC_STATE( vpi[0], tcl_vp, VPI_STATE_SIZE, "VP/vertexprog-0", 0 ); ALLOC_STATE( vpi[1], tcl_vp_size, VPI_STATE_SIZE, "VP/vertexprog-1", 1 ); @@ -390,13 +845,13 @@ void r200InitState( r200ContextPtr rmesa ) ALLOC_STATE( pix[3], texenv, PIX_STATE_SIZE, "PIX/pixstage-3", 3 ); ALLOC_STATE( pix[4], texenv, PIX_STATE_SIZE, "PIX/pixstage-4", 4 ); ALLOC_STATE( pix[5], texenv, PIX_STATE_SIZE, "PIX/pixstage-5", 5 ); - if (rmesa->r200Screen->drmSupportsTriPerf) { + if (rmesa->radeon.radeonScreen->drmSupportsTriPerf) { ALLOC_STATE( prf, always, PRF_STATE_SIZE, "PRF/performance-tri", 0 ); } else { ALLOC_STATE( prf, never, PRF_STATE_SIZE, "PRF/performance-tri", 0 ); } - if (rmesa->r200Screen->drmSupportsPointSprites) { + if (rmesa->radeon.radeonScreen->drmSupportsPointSprites) { ALLOC_STATE( spr, always, SPR_STATE_SIZE, "SPR/pointsprite", 0 ); ALLOC_STATE( ptp, tcl, PTP_STATE_SIZE, "PTP/pointparams", 0 ); } @@ -409,87 +864,115 @@ void r200InitState( r200ContextPtr rmesa ) /* Fill in the packet headers: */ - rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC); - rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL); - rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH); - if (rmesa->r200Screen->drmSupportsBlendColor) - rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(R200_EMIT_RB3D_BLENDCOLOR); - rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN); - rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH); - rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK); - rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE); - rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL); - rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC); - rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(R200_EMIT_PP_CNTL_X); - rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(R200_EMIT_RB3D_DEPTHXY_OFFSET); - rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(R200_EMIT_RE_AUX_SCISSOR_CNTL); - rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(R200_EMIT_RE_SCISSOR_TL_0); - rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(R200_EMIT_SE_VAP_CNTL_STATUS); - rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(R200_EMIT_RE_POINTSIZE); - rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0); - rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(R200_EMIT_PP_TAM_DEBUG3); - rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(R200_EMIT_TFACTOR_0); - if (rmesa->r200Screen->drmSupportsFragShader) { - rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(R200_EMIT_ATF_TFACTOR); - rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_0); - rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0); - rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_1); - rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1); - rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_2); - rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2); - rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_3); - rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3); - rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_4); - rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4); - rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCTLALL_5); - rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5); + rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC); + rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL); + rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH); + if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) + rmesa->hw.ctx.cmd[CTX_CMD_3] = cmdpkt(rmesa, R200_EMIT_RB3D_BLENDCOLOR); + rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN); + rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH); + rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK); + rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE); + rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL); + rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC); + rmesa->hw.cst.cmd[CST_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CNTL_X); + rmesa->hw.cst.cmd[CST_CMD_1] = cmdpkt(rmesa, R200_EMIT_RB3D_DEPTHXY_OFFSET); + rmesa->hw.cst.cmd[CST_CMD_2] = cmdpkt(rmesa, R200_EMIT_RE_AUX_SCISSOR_CNTL); + rmesa->hw.cst.cmd[CST_CMD_3] = cmdpkt(rmesa, R200_EMIT_RE_SCISSOR_TL_0); + rmesa->hw.cst.cmd[CST_CMD_4] = cmdpkt(rmesa, R200_EMIT_SE_VAP_CNTL_STATUS); + rmesa->hw.cst.cmd[CST_CMD_5] = cmdpkt(rmesa, R200_EMIT_RE_POINTSIZE); + rmesa->hw.cst.cmd[CST_CMD_6] = cmdpkt(rmesa, R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0); + rmesa->hw.tam.cmd[TAM_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TAM_DEBUG3); + rmesa->hw.tf.cmd[TF_CMD_0] = cmdpkt(rmesa, R200_EMIT_TFACTOR_0); + if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { + rmesa->hw.atf.cmd[ATF_CMD_0] = cmdpkt(rmesa, R200_EMIT_ATF_TFACTOR); + rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_0); + rmesa->hw.tex[0].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0); + rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_1); + rmesa->hw.tex[1].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1); + rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_2); + rmesa->hw.tex[2].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2); + rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_3); + rmesa->hw.tex[3].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3); + rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_4); + rmesa->hw.tex[4].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4); + rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCTLALL_5); + rmesa->hw.tex[5].cmd[TEX_CMD_1_NEWDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5); } else { - rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_0); - rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_0); - rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_1); - rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_1); - rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_2); - rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_2); - rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_3); - rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_3); - rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_4); - rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_4); - rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(R200_EMIT_PP_TXFILTER_5); - rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(R200_EMIT_PP_TXOFFSET_5); - } - rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_0); - rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(R200_EMIT_PP_AFS_1); - rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(R200_EMIT_VAP_PVS_CNTL); - rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_0); - rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_0); - rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_1); - rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_1); - rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_2); - rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_2); - rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_3); - rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_3); - rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_4); - rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_4); - rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(R200_EMIT_PP_CUBIC_FACES_5); - rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(R200_EMIT_PP_CUBIC_OFFSETS_5); - rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_0); - rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_1); - rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_2); - rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_3); - rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_4); - rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(R200_EMIT_PP_TXCBLEND_5); - rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR); - rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(R200_EMIT_TCL_LIGHT_MODEL_CTL_0); - rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(R200_EMIT_TCL_UCP_VERT_BLEND_CTL); - rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(R200_EMIT_TEX_PROC_CTL_2); - rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(R200_EMIT_MATRIX_SELECT_0); - rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(R200_EMIT_VAP_CTL); - rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(R200_EMIT_VTX_FMT_0); - rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(R200_EMIT_OUTPUT_VTX_COMP_SEL); - rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(R200_EMIT_SE_VTX_STATE_CNTL); - rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(R200_EMIT_VTE_CNTL); - rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(R200_EMIT_PP_TRI_PERF_CNTL); - rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(R200_EMIT_TCL_POINT_SPRITE_CNTL); + rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_0); + rmesa->hw.tex[0].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_0); + rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_1); + rmesa->hw.tex[1].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_1); + rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_2); + rmesa->hw.tex[2].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_2); + rmesa->hw.tex[3].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_3); + rmesa->hw.tex[3].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_3); + rmesa->hw.tex[4].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_4); + rmesa->hw.tex[4].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_4); + rmesa->hw.tex[5].cmd[TEX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXFILTER_5); + rmesa->hw.tex[5].cmd[TEX_CMD_1_OLDDRM] = cmdpkt(rmesa, R200_EMIT_PP_TXOFFSET_5); + } + rmesa->hw.afs[0].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_0); + rmesa->hw.afs[1].cmd[AFS_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_AFS_1); + rmesa->hw.pvs.cmd[PVS_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_PVS_CNTL); + rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_0); + rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_0); + rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_1); + rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_1); + rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_2); + rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_2); + rmesa->hw.cube[3].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_3); + rmesa->hw.cube[3].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_3); + rmesa->hw.cube[4].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_4); + rmesa->hw.cube[4].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_4); + rmesa->hw.cube[5].cmd[CUBE_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_FACES_5); + rmesa->hw.cube[5].cmd[CUBE_CMD_1] = cmdpkt(rmesa, R200_EMIT_PP_CUBIC_OFFSETS_5); + rmesa->hw.pix[0].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_0); + rmesa->hw.pix[1].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_1); + rmesa->hw.pix[2].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_2); + rmesa->hw.pix[3].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_3); + rmesa->hw.pix[4].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_4); + rmesa->hw.pix[5].cmd[PIX_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TXCBLEND_5); + rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR); + rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_LIGHT_MODEL_CTL_0); + rmesa->hw.tcl.cmd[TCL_CMD_1] = cmdpkt(rmesa, R200_EMIT_TCL_UCP_VERT_BLEND_CTL); + rmesa->hw.tcg.cmd[TCG_CMD_0] = cmdpkt(rmesa, R200_EMIT_TEX_PROC_CTL_2); + rmesa->hw.msl.cmd[MSL_CMD_0] = cmdpkt(rmesa, R200_EMIT_MATRIX_SELECT_0); + rmesa->hw.vap.cmd[VAP_CMD_0] = cmdpkt(rmesa, R200_EMIT_VAP_CTL); + rmesa->hw.vtx.cmd[VTX_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTX_FMT_0); + rmesa->hw.vtx.cmd[VTX_CMD_1] = cmdpkt(rmesa, R200_EMIT_OUTPUT_VTX_COMP_SEL); + rmesa->hw.vtx.cmd[VTX_CMD_2] = cmdpkt(rmesa, R200_EMIT_SE_VTX_STATE_CNTL); + rmesa->hw.vte.cmd[VTE_CMD_0] = cmdpkt(rmesa, R200_EMIT_VTE_CNTL); + rmesa->hw.prf.cmd[PRF_CMD_0] = cmdpkt(rmesa, R200_EMIT_PP_TRI_PERF_CNTL); + rmesa->hw.spr.cmd[SPR_CMD_0] = cmdpkt(rmesa, R200_EMIT_TCL_POINT_SPRITE_CNTL); + if (rmesa->radeon.radeonScreen->kernel_mm) { + rmesa->hw.mtl[0].emit = mtl_emit; + rmesa->hw.mtl[1].emit = mtl_emit; + + rmesa->hw.vpi[0].emit = veclinear_emit; + rmesa->hw.vpi[1].emit = veclinear_emit; + rmesa->hw.vpp[0].emit = veclinear_emit; + rmesa->hw.vpp[1].emit = veclinear_emit; + + rmesa->hw.grd.emit = scl_emit; + rmesa->hw.fog.emit = vec_emit; + rmesa->hw.glt.emit = vec_emit; + rmesa->hw.eye.emit = vec_emit; + + for (i = R200_MTX_MV; i <= R200_MTX_TEX5; i++) + rmesa->hw.mat[i].emit = vec_emit; + + for (i = 0; i < 8; i++) + rmesa->hw.lit[i].emit = lit_emit; + + for (i = 0; i < 6; i++) + rmesa->hw.ucp[i].emit = vec_emit; + + rmesa->hw.ptp.emit = ptp_emit; + } + + + rmesa->hw.mtl[0].cmd[MTL_CMD_0] = cmdvec( R200_VS_MAT_0_EMISS, 1, 16 ); rmesa->hw.mtl[0].cmd[MTL_CMD_1] = @@ -567,7 +1050,7 @@ void r200InitState( r200ContextPtr rmesa ) (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | (R200_BLEND_GL_ZERO << R200_DST_BLEND_SHIFT)); - if (rmesa->r200Screen->drmSupportsBlendColor) { + if (rmesa->radeon.radeonScreen->drmSupportsBlendColor) { rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCOLOR] = 0x00000000; rmesa->hw.ctx.cmd[CTX_RB3D_ABLENDCNTL] = (R200_COMB_FCN_ADD_CLAMP | (R200_BLEND_GL_ONE << R200_SRC_BLEND_SHIFT) | @@ -578,18 +1061,17 @@ void r200InitState( r200ContextPtr rmesa ) } rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] = - rmesa->r200Screen->depthOffset + rmesa->r200Screen->fbLocation; + rmesa->radeon.radeonScreen->depthOffset + rmesa->radeon.radeonScreen->fbLocation; rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = - ((rmesa->r200Screen->depthPitch & + ((rmesa->radeon.radeonScreen->depthPitch & R200_DEPTHPITCH_MASK) | R200_DEPTH_ENDIAN_NO_SWAP); if (rmesa->using_hyperz) rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= R200_DEPTH_HYPERZ; - rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt | - R200_Z_TEST_LESS | + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (R200_Z_TEST_LESS | R200_STENCIL_TEST_ALWAYS | R200_STENCIL_FAIL_KEEP | R200_STENCIL_ZPASS_KEEP | @@ -599,15 +1081,14 @@ void r200InitState( r200ContextPtr rmesa ) if (rmesa->using_hyperz) { rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= R200_Z_COMPRESSION_ENABLE | R200_Z_DECOMPRESSION_ENABLE; -/* if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) +/* if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/ } rmesa->hw.ctx.cmd[CTX_PP_CNTL] = (R200_ANTI_ALIAS_NONE | R200_TEX_BLEND_0_ENABLE); - rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = color_fmt; - switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) { + switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) { case DRI_CONF_DITHER_XERRORDIFFRESET: rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_INIT; break; @@ -615,41 +1096,19 @@ void r200InitState( r200ContextPtr rmesa ) rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_SCALE_DITHER_ENABLE; break; } - if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) == + if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) == DRI_CONF_ROUND_ROUND ) - rmesa->state.color.roundEnable = R200_ROUND_ENABLE; + rmesa->radeon.state.color.roundEnable = R200_ROUND_ENABLE; else - rmesa->state.color.roundEnable = 0; - if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) == + rmesa->radeon.state.color.roundEnable = 0; + if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) == DRI_CONF_COLOR_REDUCTION_DITHER ) rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= R200_DITHER_ENABLE; else - rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable; - -#if 000 - rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((rmesa->state.color.drawOffset + - rmesa->r200Screen->fbLocation) - & R200_COLOROFFSET_MASK); - - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((rmesa->state.color.drawPitch & - R200_COLORPITCH_MASK) | - R200_COLOR_ENDIAN_NO_SWAP); -#else - rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset + - rmesa->r200Screen->fbLocation) - & R200_COLOROFFSET_MASK); - - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch & - R200_COLORPITCH_MASK) | - R200_COLOR_ENDIAN_NO_SWAP); -#endif - /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */ - if (rmesa->sarea->tiling_enabled) { - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= R200_COLOR_TILE_ENABLE; - } + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable; rmesa->hw.prf.cmd[PRF_PP_TRI_PERF] = R200_TRI_CUTOFF_MASK - R200_TRI_CUTOFF_MASK * - driQueryOptionf (&rmesa->optionCache,"texture_blend_quality"); + driQueryOptionf (&rmesa->radeon.optionCache,"texture_blend_quality"); rmesa->hw.prf.cmd[PRF_PP_PERF_CNTL] = 0; rmesa->hw.set.cmd[SET_SE_CNTL] = (R200_FFACE_CULL_CCW | @@ -704,7 +1163,7 @@ void r200InitState( r200ContextPtr rmesa ) R200_VC_NO_SWAP; #endif - if (!(rmesa->r200Screen->chip_flags & RADEON_CHIPSET_TCL)) { + if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { /* Bypass TCL */ rmesa->hw.cst.cmd[CST_SE_VAP_CNTL_STATUS] |= (1<<8); } @@ -743,28 +1202,28 @@ void r200InitState( r200ContextPtr rmesa ) rmesa->hw.tex[i].cmd[TEX_PP_TXFORMAT_X] = (/* R200_TEXCOORD_PROJ | */ 0x100000); /* Small default bias */ - if (rmesa->r200Screen->drmSupportsFragShader) { + if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_NEWDRM] = - rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; rmesa->hw.tex[i].cmd[TEX_PP_CUBIC_FACES] = 0; rmesa->hw.tex[i].cmd[TEX_PP_TXMULTI_CTL] = 0; } else { rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET_OLDDRM] = - rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; } rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0; rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F1] = - rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F2] = - rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F3] = - rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F4] = - rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_F5] = - rmesa->r200Screen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; rmesa->hw.pix[i].cmd[PIX_PP_TXCBLEND] = (R200_TXC_ARG_A_ZERO | @@ -967,5 +1426,7 @@ void r200InitState( r200ContextPtr rmesa ) r200LightingSpaceChange( ctx ); - rmesa->hw.all_dirty = GL_TRUE; + rmesa->radeon.hw.all_dirty = GL_TRUE; + + rcommonInitCmdBuf(&rmesa->radeon); } diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index b25f028244..83e70b586d 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -55,27 +55,24 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_tcl.h" -static void flush_last_swtcl_prim( r200ContextPtr rmesa ); - - /*********************************************************************** - * Initialization + * Initialization ***********************************************************************/ #define EMIT_ATTR( ATTR, STYLE, F0 ) \ do { \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \ - rmesa->swtcl.vertex_attr_count++; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ fmt_0 |= F0; \ } while (0) #define EMIT_PAD( N ) \ do { \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \ - rmesa->swtcl.vertex_attr_count++; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ } while (0) static void r200SetVertexFormat( GLcontext *ctx ) @@ -100,7 +97,7 @@ static void r200SetVertexFormat( GLcontext *ctx ) } assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); - rmesa->swtcl.vertex_attr_count = 0; + rmesa->radeon.swtcl.vertex_attr_count = 0; /* EMIT_ATTR's must be in order as they tell t_vertex.c how to * build up a hardware vertex. @@ -121,7 +118,7 @@ static void r200SetVertexFormat( GLcontext *ctx ) } rmesa->swtcl.coloroffset = offset; -#if MESA_LITTLE_ENDIAN +#if MESA_LITTLE_ENDIAN EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) ); #else EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR, (R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT) ); @@ -132,7 +129,7 @@ static void r200SetVertexFormat( GLcontext *ctx ) if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 ) || RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_FOG )) { -#if MESA_LITTLE_ENDIAN +#if MESA_LITTLE_ENDIAN if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { rmesa->swtcl.specoffset = offset; EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_3UB_3F_RGB, (R200_VTX_PK_RGBA << R200_VTX_COLOR_1_SHIFT) ); @@ -185,7 +182,7 @@ static void r200SetVertexFormat( GLcontext *ctx ) rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] |= R200_FOG_USE_SPEC_ALPHA; } - if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) || + if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) || (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0) || (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) { R200_NEWPRIM(rmesa); @@ -193,26 +190,20 @@ static void r200SetVertexFormat( GLcontext *ctx ) rmesa->hw.vtx.cmd[VTX_VTXFMT_0] = fmt_0; rmesa->hw.vtx.cmd[VTX_VTXFMT_1] = fmt_1; - rmesa->swtcl.vertex_size = + rmesa->radeon.swtcl.vertex_size = _tnl_install_attrs( ctx, - rmesa->swtcl.vertex_attrs, - rmesa->swtcl.vertex_attr_count, + rmesa->radeon.swtcl.vertex_attrs, + rmesa->radeon.swtcl.vertex_attr_count, NULL, 0 ); - rmesa->swtcl.vertex_size /= 4; - RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); + rmesa->radeon.swtcl.vertex_size /= 4; + RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset ); } } static void r200RenderStart( GLcontext *ctx ) { - r200ContextPtr rmesa = R200_CONTEXT( ctx ); - r200SetVertexFormat( ctx ); - - if (rmesa->dma.flush != 0 && - rmesa->dma.flush != flush_last_swtcl_prim) - rmesa->dma.flush( rmesa ); } @@ -232,7 +223,7 @@ void r200ChooseVertexState( GLcontext *ctx ) * rasterization fallback. As this function will be called again when we * leave a rasterization fallback, we can just skip it for now. */ - if (rmesa->Fallback != 0) + if (rmesa->radeon.Fallback != 0) return; vte = rmesa->hw.vte.cmd[VTE_SE_VTE_CNTL]; @@ -273,78 +264,27 @@ void r200ChooseVertexState( GLcontext *ctx ) } } - -/* Flush vertices in the current dma region. - */ -static void flush_last_swtcl_prim( r200ContextPtr rmesa ) +void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset) { - if (R200_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); - - rmesa->dma.flush = NULL; - - if (rmesa->dma.current.buf) { - struct r200_dma_region *current = &rmesa->dma.current; - GLuint current_offset = (rmesa->r200Screen->gart_buffer_offset + - current->buf->buf->idx * RADEON_BUFFER_SIZE + - current->start); - - assert (!(rmesa->swtcl.hw_primitive & R200_VF_PRIM_WALK_IND)); - - assert (current->start + - rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == - current->ptr); - - if (rmesa->dma.current.start != rmesa->dma.current.ptr) { - r200EnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ + - rmesa->hw.max_state_size + VBUF_BUFSZ ); - r200EmitVertexAOS( rmesa, - rmesa->swtcl.vertex_size, - current_offset); - - r200EmitVbufPrim( rmesa, - rmesa->swtcl.hw_primitive, - rmesa->swtcl.numverts); - } - - rmesa->swtcl.numverts = 0; - current->start = current->ptr; - } -} - - -/* Alloc space in the current dma region. - */ -static INLINE void * -r200AllocDmaLowVerts( r200ContextPtr rmesa, int nverts, int vsize ) -{ - GLuint bytes = vsize * nverts; - - if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) - r200RefillCurrentDmaRegion( rmesa ); + r200ContextPtr rmesa = R200_CONTEXT(ctx); + rcommonEnsureCmdBufSpace(&rmesa->radeon, + rmesa->radeon.hw.max_state_size + (12*sizeof(int)), + __FUNCTION__); - if (!rmesa->dma.flush) { - rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; - rmesa->dma.flush = flush_last_swtcl_prim; - } - ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); - ASSERT( rmesa->dma.flush == flush_last_swtcl_prim ); - ASSERT( rmesa->dma.current.start + - rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == - rmesa->dma.current.ptr ); + radeonEmitState(&rmesa->radeon); + r200EmitVertexAOS( rmesa, + rmesa->radeon.swtcl.vertex_size, + rmesa->radeon.dma.current, + current_offset); - { - GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr); - rmesa->dma.current.ptr += bytes; - rmesa->swtcl.numverts += nverts; - return head; - } + r200EmitVbufPrim( rmesa, + rmesa->radeon.swtcl.hw_primitive, + rmesa->radeon.swtcl.numverts); } - /**************************************************************************/ @@ -392,13 +332,13 @@ static void r200ResetLineStipple( GLcontext *ctx ); #undef LOCAL_VARS #undef ALLOC_VERTS #define CTX_ARG r200ContextPtr rmesa -#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size -#define ALLOC_VERTS( n, size ) r200AllocDmaLowVerts( rmesa, n, size * 4 ) +#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size +#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ) #define LOCAL_VARS \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ - const char *r200verts = (char *)rmesa->swtcl.verts; -#define VERT(x) (r200Vertex *)(r200verts + ((x) * vertsize * sizeof(int))) -#define VERTEX r200Vertex + const char *r200verts = (char *)rmesa->radeon.swtcl.verts; +#define VERT(x) (radeonVertex *)(r200verts + ((x) * vertsize * sizeof(int))) +#define VERTEX radeonVertex #define DO_DEBUG_VERTS (1 && (R200_DEBUG & DEBUG_VERTS)) #undef TAG @@ -456,11 +396,11 @@ static struct { #define VERT_Y(_v) _v->v.y #define VERT_Z(_v) _v->v.z #define AREA_IS_CCW( a ) (a < 0) -#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int))) +#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int))) #define VERT_SET_RGBA( v, c ) \ do { \ - r200_color_t *color = (r200_color_t *)&((v)->ui[coloroffset]); \ + radeon_color_t *color = (radeon_color_t *)&((v)->ui[coloroffset]); \ UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \ UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \ UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \ @@ -472,7 +412,7 @@ do { \ #define VERT_SET_SPEC( v, c ) \ do { \ if (specoffset) { \ - r200_color_t *spec = (r200_color_t *)&((v)->ui[specoffset]); \ + radeon_color_t *spec = (radeon_color_t *)&((v)->ui[specoffset]); \ UNCLAMPED_FLOAT_TO_UBYTE(spec->red, (c)[0]); \ UNCLAMPED_FLOAT_TO_UBYTE(spec->green, (c)[1]); \ UNCLAMPED_FLOAT_TO_UBYTE(spec->blue, (c)[2]); \ @@ -481,8 +421,8 @@ do { \ #define VERT_COPY_SPEC( v0, v1 ) \ do { \ if (specoffset) { \ - r200_color_t *spec0 = (r200_color_t *)&((v0)->ui[specoffset]); \ - r200_color_t *spec1 = (r200_color_t *)&((v1)->ui[specoffset]); \ + radeon_color_t *spec0 = (radeon_color_t *)&((v0)->ui[specoffset]); \ + radeon_color_t *spec1 = (radeon_color_t *)&((v1)->ui[specoffset]); \ spec0->red = spec1->red; \ spec0->green = spec1->green; \ spec0->blue = spec1->blue; \ @@ -513,7 +453,7 @@ do { \ ***********************************************************************/ #define RASTERIZE(x) r200RasterPrimitive( ctx, reduced_hw_prim(ctx, x) ) -#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive +#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive #undef TAG #define TAG(x) x #include "tnl_dd/t_dd_unfilled.h" @@ -569,8 +509,8 @@ static void init_rast_tab( void ) #undef LOCAL_VARS #define LOCAL_VARS \ r200ContextPtr rmesa = R200_CONTEXT(ctx); \ - const GLuint vertsize = rmesa->swtcl.vertex_size; \ - const char *r200verts = (char *)rmesa->swtcl.verts; \ + const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \ + const char *r200verts = (char *)rmesa->radeon.swtcl.verts; \ const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ const GLboolean stipple = ctx->Line.StippleFlag; \ (void) elt; (void) stipple; @@ -599,13 +539,13 @@ void r200ChooseRenderState( GLcontext *ctx ) GLuint index = 0; GLuint flags = ctx->_TriangleCaps; - if (!rmesa->TclFallback || rmesa->Fallback) + if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) return; if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R200_TWOSIDE_BIT; if (flags & DD_TRI_UNFILLED) index |= R200_UNFILLED_BIT; - if (index != rmesa->swtcl.RenderIndex) { + if (index != rmesa->radeon.swtcl.RenderIndex) { tnl->Driver.Render.Points = rast_tab[index].points; tnl->Driver.Render.Line = rast_tab[index].line; tnl->Driver.Render.ClippedLine = rast_tab[index].line; @@ -622,7 +562,7 @@ void r200ChooseRenderState( GLcontext *ctx ) tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; } - rmesa->swtcl.RenderIndex = index; + rmesa->radeon.swtcl.RenderIndex = index; } } @@ -636,7 +576,7 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - if (rmesa->swtcl.hw_primitive != hwprim) { + if (rmesa->radeon.swtcl.hw_primitive != hwprim) { /* need to disable perspective-correct texturing for point sprites */ if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) { if (rmesa->hw.set.cmd[SET_RE_CNTL] & R200_PERSPECTIVE_ENABLE) { @@ -649,15 +589,15 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim ) rmesa->hw.set.cmd[SET_RE_CNTL] |= R200_PERSPECTIVE_ENABLE; } R200_NEWPRIM( rmesa ); - rmesa->swtcl.hw_primitive = hwprim; + rmesa->radeon.swtcl.hw_primitive = hwprim; } } static void r200RenderPrimitive( GLcontext *ctx, GLenum prim ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - rmesa->swtcl.render_primitive = prim; - if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) + rmesa->radeon.swtcl.render_primitive = prim; + if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) r200RasterPrimitive( ctx, reduced_hw_prim(ctx, prim) ); } @@ -701,15 +641,15 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); - GLuint oldfallback = rmesa->Fallback; + GLuint oldfallback = rmesa->radeon.Fallback; if (mode) { - rmesa->Fallback |= bit; + rmesa->radeon.Fallback |= bit; if (oldfallback == 0) { - R200_FIREVERTICES( rmesa ); + radeon_firevertices(&rmesa->radeon); TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_TRUE ); _swsetup_Wakeup( ctx ); - rmesa->swtcl.RenderIndex = ~0; + rmesa->radeon.swtcl.RenderIndex = ~0; if (R200_DEBUG & DEBUG_FALLBACKS) { fprintf(stderr, "R200 begin rasterization fallback: 0x%x %s\n", bit, getFallbackString(bit)); @@ -717,7 +657,7 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ) } } else { - rmesa->Fallback &= ~bit; + rmesa->radeon.Fallback &= ~bit; if (oldfallback == bit) { _swrast_flush( ctx ); @@ -731,14 +671,14 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ) tnl->Driver.Render.ResetLineStipple = r200ResetLineStipple; TCL_FALLBACK( ctx, R200_TCL_FALLBACK_RASTER, GL_FALSE ); - if (rmesa->TclFallback) { - /* These are already done if rmesa->TclFallback goes to + if (rmesa->radeon.TclFallback) { + /* These are already done if rmesa->radeon.TclFallback goes to * zero above. But not if it doesn't (R200_NO_TCL for * example?) */ _tnl_invalidate_vertex_state( ctx, ~0 ); _tnl_invalidate_vertices( ctx, ~0 ); - RENDERINPUTS_ZERO( rmesa->tnl_index_bitset ); + RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset ); r200ChooseVertexState( ctx ); r200ChooseRenderState( ctx ); } @@ -755,7 +695,7 @@ void r200Fallback( GLcontext *ctx, GLuint bit, GLboolean mode ) /** * Cope with depth operations by drawing individual pixels as points. - * + * * \todo * The way the vertex state is set in this routine is hokey. It seems to * work, but it's very hackish. This whole routine is pretty hackish. If @@ -770,14 +710,14 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, const GLubyte *bitmap ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - const GLfloat *rc = ctx->Current.RasterColor; + const GLfloat *rc = ctx->Current.RasterColor; GLint row, col; - r200Vertex vert; + radeonVertex vert; GLuint orig_vte; GLuint h; - /* Turn off tcl. + /* Turn off tcl. */ TCL_FALLBACK( ctx, R200_TCL_FALLBACK_BITMAP, 1 ); @@ -794,7 +734,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, vte |= R200_VTX_W0_FMT; vap &= ~R200_VAP_FORCE_W_TO_ONE; - rmesa->swtcl.vertex_size = 5; + rmesa->radeon.swtcl.vertex_size = 5; if ( (rmesa->hw.vtx.cmd[VTX_VTXFMT_0] != fmt_0) || (rmesa->hw.vtx.cmd[VTX_VTXFMT_1] != fmt_1) ) { @@ -828,7 +768,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, R200_VPORT_Z_SCALE_ENA | R200_VPORT_X_OFFSET_ENA | R200_VPORT_Y_OFFSET_ENA | - R200_VPORT_Z_OFFSET_ENA); + R200_VPORT_Z_OFFSET_ENA); /* Turn off other stuff: Stipple?, texture?, blending?, etc. */ @@ -871,16 +811,16 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, /* Update window height */ - LOCK_HARDWARE( rmesa ); - UNLOCK_HARDWARE( rmesa ); - h = rmesa->dri.drawable->h + rmesa->dri.drawable->y; - px += rmesa->dri.drawable->x; + LOCK_HARDWARE( &rmesa->radeon ); + UNLOCK_HARDWARE( &rmesa->radeon ); + h = radeon_get_drawable(&rmesa->radeon)->h + radeon_get_drawable(&rmesa->radeon)->y; + px += radeon_get_drawable(&rmesa->radeon)->x; /* Clipping handled by existing mechansims in r200_ioctl.c? */ for (row=0; row<height; row++) { - const GLubyte *src = (const GLubyte *) - _mesa_image_address2d(unpack, bitmap, width, height, + const GLubyte *src = (const GLubyte *) + _mesa_image_address2d(unpack, bitmap, width, height, GL_COLOR_INDEX, GL_BITMAP, row, 0 ); if (unpack->LsbFirst) { @@ -929,7 +869,7 @@ r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, /* Need to restore vertexformat? */ - if (rmesa->TclFallback) + if (rmesa->radeon.TclFallback) r200ChooseVertexState( ctx ); } @@ -959,20 +899,12 @@ void r200InitSwtcl( GLcontext *ctx ) tnl->Driver.Render.Interp = _tnl_interp; /* FIXME: what are these numbers? */ - _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, + _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 36 * sizeof(GLfloat) ); - - rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; - rmesa->swtcl.RenderIndex = ~0; - rmesa->swtcl.render_primitive = GL_TRIANGLES; - rmesa->swtcl.hw_primitive = 0; -} - -void r200DestroySwtcl( GLcontext *ctx ) -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - - if (rmesa->swtcl.indexed_verts.buf) - r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, __FUNCTION__ ); + rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; + rmesa->radeon.swtcl.RenderIndex = ~0; + rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES; + rmesa->radeon.swtcl.hw_primitive = 0; } + diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.h b/src/mesa/drivers/dri/r200/r200_swtcl.h index 8c29fd0c99..b0905879d7 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.h +++ b/src/mesa/drivers/dri/r200/r200_swtcl.h @@ -39,7 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r200_context.h" extern void r200InitSwtcl( GLcontext *ctx ); -extern void r200DestroySwtcl( GLcontext *ctx ); extern void r200ChooseRenderState( GLcontext *ctx ); extern void r200ChooseVertexState( GLcontext *ctx ); @@ -52,15 +51,11 @@ extern void r200BuildVertices( GLcontext *ctx, GLuint start, GLuint count, extern void r200PrintSetupFlags(char *msg, GLuint flags ); -extern void r200_emit_indexed_verts( GLcontext *ctx, - GLuint start, - GLuint count ); - extern void r200_translate_vertex( GLcontext *ctx, - const r200Vertex *src, + const radeonVertex *src, SWvertex *dst ); -extern void r200_print_vertex( GLcontext *ctx, const r200Vertex *v ); +extern void r200_print_vertex( GLcontext *ctx, const radeonVertex *v ); extern void r200_import_float_colors( GLcontext *ctx ); extern void r200_import_float_spec_colors( GLcontext *ctx ); @@ -70,5 +65,5 @@ extern void r200PointsBitmap( GLcontext *ctx, GLint px, GLint py, const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap ); - +void r200_swtcl_flush(GLcontext *ctx, uint32_t current_offset); #endif diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index 99aecfe1e9..580370933e 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -123,7 +123,7 @@ static GLboolean discrete_prim[0x10] = { #define RESET_STIPPLE() do { \ R200_STATECHANGE( rmesa, lin ); \ - r200EmitState( rmesa ); \ + radeonEmitState(&rmesa->radeon); \ } while (0) #define AUTO_STIPPLE( mode ) do { \ @@ -134,7 +134,7 @@ static GLboolean discrete_prim[0x10] = { else \ rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \ ~R200_LINE_PATTERN_AUTO_RESET; \ - r200EmitState( rmesa ); \ + radeonEmitState(&rmesa->radeon); \ } while (0) @@ -142,26 +142,24 @@ static GLboolean discrete_prim[0x10] = { static GLushort *r200AllocElts( r200ContextPtr rmesa, GLuint nr ) { - if (rmesa->dma.flush == r200FlushElts && - rmesa->store.cmd_used + nr*2 < R200_CMD_BUF_SZ) { + if (rmesa->radeon.dma.flush == r200FlushElts && + rmesa->tcl.elt_used + nr*2 < R200_ELT_BUF_SZ) { - GLushort *dest = (GLushort *)(rmesa->store.cmd_buf + - rmesa->store.cmd_used); + GLushort *dest = (GLushort *)(rmesa->radeon.tcl.elt_dma_bo->ptr + + rmesa->tcl.elt_used); - rmesa->store.cmd_used += nr*2; + rmesa->tcl.elt_used += nr*2; return dest; } else { - if (rmesa->dma.flush) - rmesa->dma.flush( rmesa ); + if (rmesa->radeon.dma.flush) + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); - r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + - rmesa->hw.max_state_size + ELTS_BUFSZ(nr) ); + rcommonEnsureCmdBufSpace(&rmesa->radeon, AOS_BUFSZ(rmesa->radeon.tcl.aos_count), __FUNCTION__); r200EmitAOS( rmesa, - rmesa->tcl.aos_components, - rmesa->tcl.nr_aos_components, 0 ); + rmesa->radeon.tcl.aos_count, 0 ); return r200AllocEltsOpenEnded( rmesa, rmesa->tcl.hw_primitive, nr ); } @@ -188,13 +186,14 @@ static void r200EmitPrim( GLcontext *ctx, r200ContextPtr rmesa = R200_CONTEXT( ctx ); r200TclPrimitive( ctx, prim, hwprim ); - r200EnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + - rmesa->hw.max_state_size + VBUF_BUFSZ ); + // fprintf(stderr,"Emit prim %d\n", rmesa->radeon.tcl.aos_count); + rcommonEnsureCmdBufSpace( &rmesa->radeon, + AOS_BUFSZ(rmesa->radeon.tcl.aos_count) + + rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ ); r200EmitAOS( rmesa, - rmesa->tcl.aos_components, - rmesa->tcl.nr_aos_components, - start ); + rmesa->radeon.tcl.aos_count, + start ); /* Why couldn't this packet have taken an offset param? */ @@ -394,7 +393,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, /* TODO: separate this from the swtnl pipeline */ - if (rmesa->TclFallback) + if (rmesa->radeon.TclFallback) return GL_TRUE; /* fallback to software t&l */ if (R200_DEBUG & DEBUG_PRIMS) @@ -405,8 +404,9 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, /* Validate state: */ - if (rmesa->NewGLState) - r200ValidateState( ctx ); + if (rmesa->radeon.NewGLState) + if (!r200ValidateState( ctx )) + return GL_TRUE; /* fallback to sw t&l */ if (!ctx->VertexProgram._Enabled) { /* NOTE: inputs != tnl->render_inputs - these are the untransformed @@ -481,7 +481,7 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, /* Do the actual work: */ - r200ReleaseArrays( ctx, ~0 /* stage->changed_inputs */ ); + radeonReleaseArrays( ctx, ~0 /* stage->changed_inputs */ ); r200EmitArrays( ctx, vimap_rev ); rmesa->tcl.Elts = VB->Elts; @@ -545,7 +545,7 @@ static void transition_to_swtnl( GLcontext *ctx ) tnl->Driver.NotifyMaterialChange = _mesa_validate_all_lighting_tables; - r200ReleaseArrays( ctx, ~0 ); + radeonReleaseArrays( ctx, ~0 ); /* Still using the D3D based hardware-rasterizer from the radeon; * need to put the card into D3D mode to make it work: @@ -565,15 +565,11 @@ static void transition_to_hwtnl( GLcontext *ctx ) tnl->Driver.NotifyMaterialChange = r200UpdateMaterial; - if ( rmesa->dma.flush ) - rmesa->dma.flush( rmesa ); + if ( rmesa->radeon.dma.flush ) + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); - rmesa->dma.flush = NULL; + rmesa->radeon.dma.flush = NULL; - if (rmesa->swtcl.indexed_verts.buf) - r200ReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, - __FUNCTION__ ); - R200_STATECHANGE( rmesa, vap ); rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] |= R200_VAP_TCL_ENABLE; rmesa->hw.vap.cmd[VAP_SE_VAP_CNTL] &= ~R200_VAP_FORCE_W_TO_ONE; @@ -631,10 +627,10 @@ static char *getFallbackString(GLuint bit) void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - GLuint oldfallback = rmesa->TclFallback; + GLuint oldfallback = rmesa->radeon.TclFallback; if (mode) { - rmesa->TclFallback |= bit; + rmesa->radeon.TclFallback |= bit; if (oldfallback == 0) { if (R200_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "R200 begin tcl fallback %s\n", @@ -643,7 +639,7 @@ void r200TclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) } } else { - rmesa->TclFallback &= ~bit; + rmesa->radeon.TclFallback &= ~bit; if (oldfallback == bit) { if (R200_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "R200 end tcl fallback %s\n", diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index 259f35a34c..9f79157915 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -43,8 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/teximage.h" #include "main/texobj.h" -#include "texmem.h" - +#include "radeon_mipmap_tree.h" #include "r200_context.h" #include "r200_state.h" #include "r200_ioctl.h" @@ -63,10 +62,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * \param twrap Wrap mode for the \a t texture coordinate */ -static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap ) +static void r200SetTexWrap( radeonTexObjPtr t, GLenum swrap, GLenum twrap, GLenum rwrap ) { GLboolean is_clamp = GL_FALSE; GLboolean is_clamp_to_border = GL_FALSE; + struct gl_texture_object *tObj = &t->base; t->pp_txfilter &= ~(R200_CLAMP_S_MASK | R200_CLAMP_T_MASK | R200_BORDER_MODE_D3D); @@ -103,7 +103,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum _mesa_problem(NULL, "bad S wrap mode in %s", __FUNCTION__); } - if (t->base.tObj->Target != GL_TEXTURE_1D) { + if (tObj->Target != GL_TEXTURE_1D) { switch ( twrap ) { case GL_REPEAT: t->pp_txfilter |= R200_CLAMP_T_WRAP; @@ -180,7 +180,7 @@ static void r200SetTexWrap( r200TexObjPtr t, GLenum swrap, GLenum twrap, GLenum t->border_fallback = (is_clamp && is_clamp_to_border); } -static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max ) +static void r200SetTexMaxAnisotropy( radeonTexObjPtr t, GLfloat max ) { t->pp_txfilter &= ~R200_MAX_ANISO_MASK; @@ -205,10 +205,13 @@ static void r200SetTexMaxAnisotropy( r200TexObjPtr t, GLfloat max ) * \param magf Texture magnification mode */ -static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf ) +static void r200SetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf ) { GLuint anisotropy = (t->pp_txfilter & R200_MAX_ANISO_MASK); + /* Force revalidation to account for switches from/to mipmapping. */ + t->validated = GL_FALSE; + t->pp_txfilter &= ~(R200_MIN_FILTER_MASK | R200_MAG_FILTER_MASK); t->pp_txformat_x &= ~R200_VOLUME_FILTER_MASK; @@ -267,701 +270,16 @@ static void r200SetTexFilter( r200TexObjPtr t, GLenum minf, GLenum magf ) } } -static void r200SetTexBorderColor( r200TexObjPtr t, const GLfloat color[4] ) +static void r200SetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] ) { GLubyte c[4]; CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); - t->pp_border_color = r200PackColor( 4, c[0], c[1], c[2], c[3] ); -} - - -/** - * Allocate space for and load the mesa images into the texture memory block. - * This will happen before drawing with a new texture, or drawing with a - * texture after it was swapped out or teximaged again. - */ - -static r200TexObjPtr r200AllocTexObj( struct gl_texture_object *texObj ) -{ - r200TexObjPtr t; - - t = CALLOC_STRUCT( r200_tex_obj ); - texObj->DriverData = t; - if ( t != NULL ) { - if ( R200_DEBUG & DEBUG_TEXTURE ) { - fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, - (void *)t ); - } - - /* Initialize non-image-dependent parts of the state: - */ - t->base.tObj = texObj; - t->border_fallback = GL_FALSE; - - make_empty_list( & t->base ); - - r200SetTexWrap( t, texObj->WrapS, texObj->WrapT, texObj->WrapR ); - r200SetTexMaxAnisotropy( t, texObj->MaxAnisotropy ); - r200SetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); - r200SetTexBorderColor( t, texObj->BorderColor ); - } - - return t; -} - -/* try to find a format which will only need a memcopy */ -static const struct gl_texture_format * -r200Choose8888TexFormat( GLenum srcFormat, GLenum srcType ) -{ - const GLuint ui = 1; - const GLubyte littleEndian = *((const GLubyte *) &ui); - - if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) || - (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) || - (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || - (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) { - return &_mesa_texformat_rgba8888; - } - else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || - (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) || - (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) || - (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) { - return &_mesa_texformat_rgba8888_rev; - } - else return _dri_texformat_argb8888; -} - -static const struct gl_texture_format * -r200ChooseTextureFormat( GLcontext *ctx, GLint internalFormat, - GLenum format, GLenum type ) -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - const GLboolean do32bpt = - ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 ); - const GLboolean force16bpt = - ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 ); - (void) format; - - switch ( internalFormat ) { - case 4: - case GL_RGBA: - case GL_COMPRESSED_RGBA: - switch ( type ) { - case GL_UNSIGNED_INT_10_10_10_2: - case GL_UNSIGNED_INT_2_10_10_10_REV: - return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555; - case GL_UNSIGNED_SHORT_4_4_4_4: - case GL_UNSIGNED_SHORT_4_4_4_4_REV: - return _dri_texformat_argb4444; - case GL_UNSIGNED_SHORT_5_5_5_1: - case GL_UNSIGNED_SHORT_1_5_5_5_REV: - return _dri_texformat_argb1555; - default: - return do32bpt ? - r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444; - } - - case 3: - case GL_RGB: - case GL_COMPRESSED_RGB: - switch ( type ) { - case GL_UNSIGNED_SHORT_4_4_4_4: - case GL_UNSIGNED_SHORT_4_4_4_4_REV: - return _dri_texformat_argb4444; - case GL_UNSIGNED_SHORT_5_5_5_1: - case GL_UNSIGNED_SHORT_1_5_5_5_REV: - return _dri_texformat_argb1555; - case GL_UNSIGNED_SHORT_5_6_5: - case GL_UNSIGNED_SHORT_5_6_5_REV: - return _dri_texformat_rgb565; - default: - return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; - } - - case GL_RGBA8: - case GL_RGB10_A2: - case GL_RGBA12: - case GL_RGBA16: - return !force16bpt ? - r200Choose8888TexFormat(format, type) : _dri_texformat_argb4444; - - case GL_RGBA4: - case GL_RGBA2: - return _dri_texformat_argb4444; - - case GL_RGB5_A1: - return _dri_texformat_argb1555; - - case GL_RGB8: - case GL_RGB10: - case GL_RGB12: - case GL_RGB16: - return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; - - case GL_RGB5: - case GL_RGB4: - case GL_R3_G3_B2: - return _dri_texformat_rgb565; - - case GL_ALPHA: - case GL_ALPHA4: - case GL_ALPHA8: - case GL_ALPHA12: - case GL_ALPHA16: - case GL_COMPRESSED_ALPHA: - /* can't use a8 format since interpreting hw I8 as a8 would result - in wrong rgb values (same as alpha value instead of 0). */ - return _dri_texformat_al88; - - case 1: - case GL_LUMINANCE: - case GL_LUMINANCE4: - case GL_LUMINANCE8: - case GL_LUMINANCE12: - case GL_LUMINANCE16: - case GL_COMPRESSED_LUMINANCE: - return _dri_texformat_l8; - - case 2: - case GL_LUMINANCE_ALPHA: - case GL_LUMINANCE4_ALPHA4: - case GL_LUMINANCE6_ALPHA2: - case GL_LUMINANCE8_ALPHA8: - case GL_LUMINANCE12_ALPHA4: - case GL_LUMINANCE12_ALPHA12: - case GL_LUMINANCE16_ALPHA16: - case GL_COMPRESSED_LUMINANCE_ALPHA: - return _dri_texformat_al88; - - case GL_INTENSITY: - case GL_INTENSITY4: - case GL_INTENSITY8: - case GL_INTENSITY12: - case GL_INTENSITY16: - case GL_COMPRESSED_INTENSITY: - return _dri_texformat_i8; - - case GL_YCBCR_MESA: - if (type == GL_UNSIGNED_SHORT_8_8_APPLE || - type == GL_UNSIGNED_BYTE) - return &_mesa_texformat_ycbcr; - else - return &_mesa_texformat_ycbcr_rev; - - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return &_mesa_texformat_rgb_dxt1; - - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - return &_mesa_texformat_rgba_dxt1; - - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - return &_mesa_texformat_rgba_dxt3; - - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - return &_mesa_texformat_rgba_dxt5; - - default: - _mesa_problem(ctx, - "unexpected internalFormat 0x%x in r200ChooseTextureFormat", - (int) internalFormat); - return NULL; - } - - return NULL; /* never get here */ -} - - -static GLboolean -r200ValidateClientStorage( GLcontext *ctx, GLenum target, - GLint internalFormat, - GLint srcWidth, GLint srcHeight, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) - -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - - if ( R200_DEBUG & DEBUG_TEXTURE ) - fprintf(stderr, "intformat %s format %s type %s\n", - _mesa_lookup_enum_by_nr( internalFormat ), - _mesa_lookup_enum_by_nr( format ), - _mesa_lookup_enum_by_nr( type )); - - if (!ctx->Unpack.ClientStorage) - return 0; - - if (ctx->_ImageTransferState || - texImage->IsCompressed || - texObj->GenerateMipmap) - return 0; - - - /* This list is incomplete, may be different on ppc??? - */ - switch ( internalFormat ) { - case GL_RGBA: - if ( format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV ) { - texImage->TexFormat = _dri_texformat_argb8888; - } - else - return 0; - break; - - case GL_RGB: - if ( format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5 ) { - texImage->TexFormat = _dri_texformat_rgb565; - } - else - return 0; - break; - - case GL_YCBCR_MESA: - if ( format == GL_YCBCR_MESA && - type == GL_UNSIGNED_SHORT_8_8_REV_APPLE ) { - texImage->TexFormat = &_mesa_texformat_ycbcr_rev; - } - else if ( format == GL_YCBCR_MESA && - (type == GL_UNSIGNED_SHORT_8_8_APPLE || - type == GL_UNSIGNED_BYTE)) { - texImage->TexFormat = &_mesa_texformat_ycbcr; - } - else - return 0; - break; - - default: - return 0; - } - - /* Could deal with these packing issues, but currently don't: - */ - if (packing->SkipPixels || - packing->SkipRows || - packing->SwapBytes || - packing->LsbFirst) { - return 0; - } - - { - GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, - format, type); - - - if ( R200_DEBUG & DEBUG_TEXTURE ) - fprintf(stderr, "%s: srcRowStride %d/%x\n", - __FUNCTION__, srcRowStride, srcRowStride); - - /* Could check this later in upload, pitch restrictions could be - * relaxed, but would need to store the image pitch somewhere, - * as packing details might change before image is uploaded: - */ - if (!r200IsGartMemory( rmesa, pixels, srcHeight * srcRowStride ) || - (srcRowStride & 63)) - return 0; - - - /* Have validated that _mesa_transfer_teximage would be a straight - * memcpy at this point. NOTE: future calls to TexSubImage will - * overwrite the client data. This is explicitly mentioned in the - * extension spec. - */ - texImage->Data = (void *)pixels; - texImage->IsClientData = GL_TRUE; - texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes; - - return 1; - } -} - - -static void r200TexImage1D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - - if ( t ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) r200AllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D"); - return; - } - } - - /* Note, this will call ChooseTextureFormat */ - _mesa_store_teximage1d(ctx, target, level, internalFormat, - width, border, format, type, pixels, - &ctx->Unpack, texObj, texImage); - - t->dirty_images[0] |= (1 << level); -} - - -static void r200TexSubImage1D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, - GLsizei width, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - - assert( t ); /* this _should_ be true */ - if ( t ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) r200AllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D"); - return; - } - } - - _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, - format, type, pixels, packing, texObj, - texImage); - - t->dirty_images[0] |= (1 << level); -} - - -static void r200TexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - GLuint face; - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - if ( t != NULL ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) r200AllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); - return; - } - } - - texImage->IsClientData = GL_FALSE; - - if (r200ValidateClientStorage( ctx, target, - internalFormat, - width, height, - format, type, pixels, - packing, texObj, texImage)) { - if (R200_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); - } - else { - if (R200_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); - - /* Normal path: copy (to cached memory) and eventually upload - * via another copy to GART memory and then a blit... Could - * eliminate one copy by going straight to (permanent) GART. - * - * Note, this will call r200ChooseTextureFormat. - */ - _mesa_store_teximage2d(ctx, target, level, internalFormat, - width, height, border, format, type, pixels, - &ctx->Unpack, texObj, texImage); - - t->dirty_images[face] |= (1 << level); - } -} - - -static void r200TexSubImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - GLuint face; - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - assert( t ); /* this _should_ be true */ - if ( t ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) r200AllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D"); - return; - } - } - - _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, - height, format, type, pixels, packing, texObj, - texImage); - - t->dirty_images[face] |= (1 << level); -} - - -static void r200CompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - GLuint face; - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - if ( t != NULL ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) r200AllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D"); - return; - } - } - - texImage->IsClientData = GL_FALSE; -/* can't call this, different parameters. Would never evaluate to true anyway currently - if (r200ValidateClientStorage( ctx, target, - internalFormat, - width, height, - format, type, pixels, - packing, texObj, texImage)) { - if (R200_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); - } - else */{ - if (R200_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); - - /* Normal path: copy (to cached memory) and eventually upload - * via another copy to GART memory and then a blit... Could - * eliminate one copy by going straight to (permanent) GART. - * - * Note, this will call r200ChooseTextureFormat. - */ - _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width, - height, border, imageSize, data, texObj, texImage); - - t->dirty_images[face] |= (1 << level); - } + t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); } - -static void r200CompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - GLuint face; - - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - assert( t ); /* this _should_ be true */ - if ( t ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) r200AllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D"); - return; - } - } - - _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width, - height, format, imageSize, data, texObj, texImage); - - t->dirty_images[face] |= (1 << level); -} - - -#if ENABLE_HW_3D_TEXTURE -static void r200TexImage3D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint depth, - GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - - if ( t ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) r200AllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D"); - return; - } - } - - texImage->IsClientData = GL_FALSE; - -#if 0 - if (r200ValidateClientStorage( ctx, target, - internalFormat, - width, height, - format, type, pixels, - packing, texObj, texImage)) { - if (R200_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using client storage\n", __FUNCTION__); - } - else -#endif - { - if (R200_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using normal storage\n", __FUNCTION__); - - /* Normal path: copy (to cached memory) and eventually upload - * via another copy to GART memory and then a blit... Could - * eliminate one copy by going straight to (permanent) GART. - * - * Note, this will call r200ChooseTextureFormat. - */ - _mesa_store_teximage3d(ctx, target, level, internalFormat, - width, height, depth, border, - format, type, pixels, - &ctx->Unpack, texObj, texImage); - - t->dirty_images[0] |= (1 << level); - } -} -#endif - - -#if ENABLE_HW_3D_TEXTURE -static void -r200TexSubImage3D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - -/* fprintf(stderr, "%s\n", __FUNCTION__); */ - - assert( t ); /* this _should_ be true */ - if ( t ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) r200AllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D"); - return; - } - texObj->DriverData = t; - } - - _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, packing, texObj, texImage); - - t->dirty_images[0] |= (1 << level); -} -#endif - - - static void r200TexEnv( GLcontext *ctx, GLenum target, GLenum pname, const GLfloat *param ) { @@ -983,7 +301,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target, GLubyte c[4]; GLuint envColor; UNCLAMPED_FLOAT_TO_RGBA_CHAN( c, texUnit->EnvColor ); - envColor = r200PackColor( 4, c[0], c[1], c[2], c[3] ); + envColor = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); if ( rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] != envColor ) { R200_STATECHANGE( rmesa, tf ); rmesa->hw.tf.cmd[TF_TFACTOR_0 + unit] = envColor; @@ -1002,7 +320,7 @@ static void r200TexEnv( GLcontext *ctx, GLenum target, * NOTE: Add a small bias to the bias for conform mipsel.c test. */ bias = *param + .01; - min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ? + min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ? 0.0 : -16.0; bias = CLAMP( bias, min, 16.0 ); b = (int)(bias * fixed_one) & R200_LOD_BIAS_MASK; @@ -1039,7 +357,7 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, struct gl_texture_object *texObj, GLenum pname, const GLfloat *params ) { - r200TexObjPtr t = (r200TexObjPtr) texObj->DriverData; + radeonTexObj* t = radeon_tex_obj(texObj); if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { fprintf( stderr, "%s( %s )\n", __FUNCTION__, @@ -1073,59 +391,46 @@ static void r200TexParameter( GLcontext *ctx, GLenum target, * we just have to rely on loading the right subset of mipmap levels * to simulate a clamped LOD. */ - driSwapOutTextureObject( (driTextureObject *) t ); + if (t->mt) { + radeon_miptree_unreference(t->mt); + t->mt = 0; + t->validated = GL_FALSE; + } break; default: return; } - - /* Mark this texobj as dirty (one bit per tex unit) - */ - t->dirty_state = TEX_ALL; } - -static void r200BindTexture( GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj ) -{ - if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { - fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj, - ctx->Texture.CurrentUnit ); - } - - if ( (target == GL_TEXTURE_1D) - || (target == GL_TEXTURE_2D) -#if ENABLE_HW_3D_TEXTURE - || (target == GL_TEXTURE_3D) -#endif - || (target == GL_TEXTURE_CUBE_MAP) - || (target == GL_TEXTURE_RECTANGLE_NV) ) { - assert( texObj->DriverData != NULL ); - } -} - - -static void r200DeleteTexture( GLcontext *ctx, - struct gl_texture_object *texObj ) +static void r200DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - driTextureObject * t = (driTextureObject *) texObj->DriverData; - - if ( R200_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { - fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, - _mesa_lookup_enum_by_nr( texObj->Target ) ); + radeonTexObj* t = radeon_tex_obj(texObj); + + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, + (void *)texObj, + _mesa_lookup_enum_by_nr(texObj->Target)); + } + + if (rmesa) { + int i; + radeon_firevertices(&rmesa->radeon); + for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) { + if ( t == rmesa->state.texture.unit[i].texobj ) { + rmesa->state.texture.unit[i].texobj = NULL; + rmesa->hw.tex[i].dirty = GL_FALSE; + rmesa->hw.cube[i].dirty = GL_FALSE; + } + } } - - if ( t != NULL ) { - if ( rmesa ) { - R200_FIREVERTICES( rmesa ); - } - - driDestroyTextureObject( t ); + + if (t->mt) { + radeon_miptree_unreference(t->mt); + t->mt = 0; } - /* Free mipmap images and the texture object itself */ _mesa_delete_texture_object(ctx, texObj); } @@ -1155,46 +460,59 @@ static void r200TexGen( GLcontext *ctx, * Called via ctx->Driver.NewTextureObject. * Note: this function will be called during context creation to * allocate the default texture objects. - * Note: we could use containment here to 'derive' the driver-specific - * texture object from the core mesa gl_texture_object. Not done at this time. * Fixup MaxAnisotropy according to user preference. */ -static struct gl_texture_object * -r200NewTextureObject( GLcontext *ctx, GLuint name, GLenum target ) +static struct gl_texture_object *r200NewTextureObject(GLcontext * ctx, + GLuint name, + GLenum target) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - struct gl_texture_object *obj; - obj = _mesa_new_texture_object(ctx, name, target); - if (!obj) - return NULL; - obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; - r200AllocTexObj( obj ); - return obj; + radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); + + + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, + t, _mesa_lookup_enum_by_nr(target)); + } + + _mesa_initialize_texture_object(&t->base, name, target); + t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy; + + /* Initialize hardware state */ + r200SetTexWrap( t, t->base.WrapS, t->base.WrapT, t->base.WrapR ); + r200SetTexMaxAnisotropy( t, t->base.MaxAnisotropy ); + r200SetTexFilter(t, t->base.MinFilter, t->base.MagFilter); + r200SetTexBorderColor(t, t->base.BorderColor); + + return &t->base; } + void r200InitTextureFuncs( struct dd_function_table *functions ) { /* Note: we only plug in the functions we implement in the driver * since _mesa_init_driver_functions() was already called. */ - functions->ChooseTextureFormat = r200ChooseTextureFormat; - functions->TexImage1D = r200TexImage1D; - functions->TexImage2D = r200TexImage2D; + functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa; + functions->TexImage1D = radeonTexImage1D; + functions->TexImage2D = radeonTexImage2D; #if ENABLE_HW_3D_TEXTURE - functions->TexImage3D = r200TexImage3D; + functions->TexImage3D = radeonTexImage3D; #else functions->TexImage3D = _mesa_store_teximage3d; #endif - functions->TexSubImage1D = r200TexSubImage1D; - functions->TexSubImage2D = r200TexSubImage2D; + functions->TexSubImage1D = radeonTexSubImage1D; + functions->TexSubImage2D = radeonTexSubImage2D; #if ENABLE_HW_3D_TEXTURE - functions->TexSubImage3D = r200TexSubImage3D; + functions->TexSubImage3D = radeonTexSubImage3D; #else functions->TexSubImage3D = _mesa_store_texsubimage3d; #endif + functions->GetTexImage = radeonGetTexImage; + functions->GetCompressedTexImage = radeonGetCompressedTexImage; functions->NewTextureObject = r200NewTextureObject; - functions->BindTexture = r200BindTexture; + // functions->BindTexture = r200BindTexture; functions->DeleteTexture = r200DeleteTexture; functions->IsTextureResident = driIsTextureResident; @@ -1202,22 +520,16 @@ void r200InitTextureFuncs( struct dd_function_table *functions ) functions->TexParameter = r200TexParameter; functions->TexGen = r200TexGen; - functions->CompressedTexImage2D = r200CompressedTexImage2D; - functions->CompressedTexSubImage2D = r200CompressedTexSubImage2D; + functions->CompressedTexImage2D = radeonCompressedTexImage2D; + functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; - driInitTextureFormats(); + functions->GenerateMipmap = radeonGenerateMipmap; -#if 000 - /* moved or obsolete code */ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - driInitTextureObjects( ctx, & rmesa->swapped, - DRI_TEXMGR_DO_TEXTURE_1D - | DRI_TEXMGR_DO_TEXTURE_2D ); + functions->NewTextureImage = radeonNewTextureImage; + functions->FreeTexImageData = radeonFreeTexImageData; + functions->MapTexture = radeonMapTexture; + functions->UnmapTexture = radeonUnmapTexture; + + driInitTextureFormats(); - /* Hack: r200NewTextureObject is not yet installed when the - * default textures are created. Therefore set MaxAnisotropy of the - * default 2D texture now. */ - ctx->Shared->Default2D->MaxAnisotropy = driQueryOptionf (&rmesa->optionCache, - "def_max_anisotropy"); -#endif } diff --git a/src/mesa/drivers/dri/r200/r200_tex.h b/src/mesa/drivers/dri/r200/r200_tex.h index 10ff8e8a66..e122de6e5e 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.h +++ b/src/mesa/drivers/dri/r200/r200_tex.h @@ -35,15 +35,18 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef __R200_TEX_H__ #define __R200_TEX_H__ +extern void r200SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv); +extern void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, + __DRIdrawable *dPriv); extern void r200SetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch); extern void r200UpdateTextureState( GLcontext *ctx ); -extern int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face ); +extern int r200UploadTexImages( r200ContextPtr rmesa, radeonTexObjPtr t, GLuint face ); -extern void r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t ); +extern void r200DestroyTexObj( r200ContextPtr rmesa, radeonTexObjPtr t ); extern void r200InitTextureFuncs( struct dd_function_table *functions ); diff --git a/src/mesa/drivers/dri/r200/r200_texmem.c b/src/mesa/drivers/dri/r200/r200_texmem.c deleted file mode 100644 index 3b81ac0c80..0000000000 --- a/src/mesa/drivers/dri/r200/r200_texmem.c +++ /dev/null @@ -1,530 +0,0 @@ -/************************************************************************** - -Copyright (C) Tungsten Graphics 2002. All Rights Reserved. -The Weather Channel, Inc. funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 -license. This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation on the rights to use, copy, modify, merge, publish, -distribute, sub license, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR -SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Kevin E. Martin <martin@valinux.com> - * Gareth Hughes <gareth@valinux.com> - * - */ - -#include <errno.h> - -#include "main/glheader.h" -#include "main/imports.h" -#include "main/context.h" -#include "main/colormac.h" -#include "main/macros.h" -#include "r200_context.h" -#include "r200_ioctl.h" -#include "r200_tex.h" -#include "radeon_reg.h" - -#include <unistd.h> /* for usleep() */ - - -/** - * Destroy any device-dependent state associated with the texture. This may - * include NULLing out hardware state that points to the texture. - */ -void -r200DestroyTexObj( r200ContextPtr rmesa, r200TexObjPtr t ) -{ - if ( R200_DEBUG & DEBUG_TEXTURE ) { - fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, - (void *)t, (void *)t->base.tObj ); - } - - if ( rmesa != NULL ) { - unsigned i; - - - for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) { - if ( t == rmesa->state.texture.unit[i].texobj ) { - rmesa->state.texture.unit[i].texobj = NULL; - rmesa->hw.tex[i].dirty = GL_FALSE; - rmesa->hw.cube[i].dirty = GL_FALSE; - } - } - } -} - - -/* ------------------------------------------------------------ - * Texture image conversions - */ - - -static void r200UploadGARTClientSubImage( r200ContextPtr rmesa, - r200TexObjPtr t, - struct gl_texture_image *texImage, - GLint hwlevel, - GLint x, GLint y, - GLint width, GLint height ) -{ - const struct gl_texture_format *texFormat = texImage->TexFormat; - GLuint srcPitch, dstPitch; - int blit_format; - int srcOffset; - - /* - * XXX it appears that we always upload the full image, not a subimage. - * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever - * changed, the src pitch will have to change. - */ - switch ( texFormat->TexelBytes ) { - case 1: - blit_format = R200_CP_COLOR_FORMAT_CI8; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; - break; - case 2: - blit_format = R200_CP_COLOR_FORMAT_RGB565; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; - break; - case 4: - blit_format = R200_CP_COLOR_FORMAT_ARGB8888; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; - break; - default: - return; - } - - t->image[0][hwlevel].data = texImage->Data; - srcOffset = r200GartOffsetFromVirtual( rmesa, texImage->Data ); - - assert( srcOffset != ~0 ); - - /* Don't currently need to cope with small pitches? - */ - width = texImage->Width; - height = texImage->Height; - - r200EmitWait( rmesa, RADEON_WAIT_3D ); - - r200EmitBlit( rmesa, blit_format, - srcPitch, - srcOffset, - dstPitch, - t->bufAddr, - x, - y, - t->image[0][hwlevel].x + x, - t->image[0][hwlevel].y + y, - width, - height ); - - r200EmitWait( rmesa, RADEON_WAIT_2D ); -} - -static void r200UploadRectSubImage( r200ContextPtr rmesa, - r200TexObjPtr t, - struct gl_texture_image *texImage, - GLint x, GLint y, - GLint width, GLint height ) -{ - const struct gl_texture_format *texFormat = texImage->TexFormat; - int blit_format, dstPitch, done; - - switch ( texFormat->TexelBytes ) { - case 1: - blit_format = R200_CP_COLOR_FORMAT_CI8; - break; - case 2: - blit_format = R200_CP_COLOR_FORMAT_RGB565; - break; - case 4: - blit_format = R200_CP_COLOR_FORMAT_ARGB8888; - break; - default: - return; - } - - t->image[0][0].data = texImage->Data; - - /* Currently don't need to cope with small pitches. - */ - width = texImage->Width; - height = texImage->Height; - dstPitch = t->pp_txpitch + 32; - - if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) { - /* In this case, could also use GART texturing. This is - * currently disabled, but has been tested & works. - */ - if ( !t->image_override ) - t->pp_txoffset = r200GartOffsetFromVirtual( rmesa, texImage->Data ); - t->pp_txpitch = texImage->RowStride * texFormat->TexelBytes - 32; - - if (R200_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "Using GART texturing for rectangular client texture\n"); - - /* Release FB memory allocated for this image: - */ - /* FIXME This may not be correct as driSwapOutTextureObject sets - * FIXME dirty_images. It may be fine, though. - */ - if ( t->base.memBlock ) { - driSwapOutTextureObject( (driTextureObject *) t ); - } - } - else if (texImage->IsClientData) { - /* Data already in GART memory, with usable pitch. - */ - GLuint srcPitch; - srcPitch = texImage->RowStride * texFormat->TexelBytes; - r200EmitBlit( rmesa, - blit_format, - srcPitch, - r200GartOffsetFromVirtual( rmesa, texImage->Data ), - dstPitch, t->bufAddr, - 0, 0, - 0, 0, - width, height ); - } - else { - /* Data not in GART memory, or bad pitch. - */ - for (done = 0; done < height ; ) { - struct r200_dma_region region; - int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch ); - int src_pitch; - char *tex; - - src_pitch = texImage->RowStride * texFormat->TexelBytes; - - tex = (char *)texImage->Data + done * src_pitch; - - memset(®ion, 0, sizeof(region)); - r200AllocDmaRegion( rmesa, ®ion, lines * dstPitch, 1024 ); - - /* Copy texdata to dma: - */ - if (0) - fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n", - __FUNCTION__, src_pitch, dstPitch); - - if (src_pitch == dstPitch) { - memcpy( region.address + region.start, tex, lines * src_pitch ); - } - else { - char *buf = region.address + region.start; - int i; - for (i = 0 ; i < lines ; i++) { - memcpy( buf, tex, src_pitch ); - buf += dstPitch; - tex += src_pitch; - } - } - - r200EmitWait( rmesa, RADEON_WAIT_3D ); - - /* Blit to framebuffer - */ - r200EmitBlit( rmesa, - blit_format, - dstPitch, GET_START( ®ion ), - dstPitch | (t->tile_bits >> 16), - t->bufAddr, - 0, 0, - 0, done, - width, lines ); - - r200EmitWait( rmesa, RADEON_WAIT_2D ); - - r200ReleaseDmaRegion( rmesa, ®ion, __FUNCTION__ ); - done += lines; - } - } -} - - -/** - * Upload the texture image associated with texture \a t at the specified - * level at the address relative to \a start. - */ -static void uploadSubImage( r200ContextPtr rmesa, r200TexObjPtr t, - GLint hwlevel, - GLint x, GLint y, GLint width, GLint height, - GLuint face ) -{ - struct gl_texture_image *texImage = NULL; - GLuint offset; - GLint imageWidth, imageHeight; - GLint ret; - drm_radeon_texture_t tex; - drm_radeon_tex_image_t tmp; - const int level = hwlevel + t->base.firstLevel; - - if ( R200_DEBUG & DEBUG_TEXTURE ) { - fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", - __FUNCTION__, (void *)t, (void *)t->base.tObj, - level, width, height, face ); - } - - ASSERT(face < 6); - - /* Ensure we have a valid texture to upload */ - if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) { - _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); - return; - } - - texImage = t->base.tObj->Image[face][level]; - - if ( !texImage ) { - if ( R200_DEBUG & DEBUG_TEXTURE ) - fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level ); - return; - } - if ( !texImage->Data ) { - if ( R200_DEBUG & DEBUG_TEXTURE ) - fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ ); - return; - } - - - if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - assert(level == 0); - assert(hwlevel == 0); - if ( R200_DEBUG & DEBUG_TEXTURE ) - fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__); - r200UploadRectSubImage( rmesa, t, texImage, x, y, width, height ); - return; - } - else if (texImage->IsClientData) { - if ( R200_DEBUG & DEBUG_TEXTURE ) - fprintf( stderr, "%s: image data is in GART client storage\n", - __FUNCTION__); - r200UploadGARTClientSubImage( rmesa, t, texImage, hwlevel, - x, y, width, height ); - return; - } - else if ( R200_DEBUG & DEBUG_TEXTURE ) - fprintf( stderr, "%s: image data is in normal memory\n", - __FUNCTION__); - - - imageWidth = texImage->Width; - imageHeight = texImage->Height; - - offset = t->bufAddr + t->base.totalSize / 6 * face; - - if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { - GLint imageX = 0; - GLint imageY = 0; - GLint blitX = t->image[face][hwlevel].x; - GLint blitY = t->image[face][hwlevel].y; - GLint blitWidth = t->image[face][hwlevel].width; - GLint blitHeight = t->image[face][hwlevel].height; - fprintf( stderr, " upload image: %d,%d at %d,%d\n", - imageWidth, imageHeight, imageX, imageY ); - fprintf( stderr, " upload blit: %d,%d at %d,%d\n", - blitWidth, blitHeight, blitX, blitY ); - fprintf( stderr, " blit ofs: 0x%07x level: %d/%d\n", - (GLuint)offset, hwlevel, level ); - } - - t->image[face][hwlevel].data = texImage->Data; - - /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct. - * NOTE: we're always use a 1KB-wide blit and I8 texture format. - * We used to use 1, 2 and 4-byte texels and used to use the texture - * width to dictate the blit width - but that won't work for compressed - * textures. (Brian) - * NOTE: can't do that with texture tiling. (sroland) - */ - tex.offset = offset; - tex.image = &tmp; - /* copy (x,y,width,height,data) */ - memcpy( &tmp, &t->image[face][hwlevel], sizeof(tmp) ); - - if (texImage->TexFormat->TexelBytes) { - /* use multi-byte upload scheme */ - tex.height = imageHeight; - tex.width = imageWidth; - tex.format = t->pp_txformat & R200_TXFORMAT_FORMAT_MASK; - if (tex.format == R200_TXFORMAT_ABGR8888) { - /* drm will refuse abgr8888 textures. */ - tex.format = R200_TXFORMAT_ARGB8888; - } - tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1); - tex.offset += tmp.x & ~1023; - tmp.x = tmp.x % 1024; - if (t->tile_bits & R200_TXO_MICRO_TILE) { - /* need something like "tiled coordinates" ? */ - tmp.y = tmp.x / (tex.pitch * 128) * 2; - tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes; - tex.pitch |= RADEON_DST_TILE_MICRO >> 22; - } - else { - tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); - } - if ((t->tile_bits & R200_TXO_MACRO_TILE) && - (texImage->Width * texImage->TexFormat->TexelBytes >= 256) && - ((!(t->tile_bits & R200_TXO_MICRO_TILE) && (texImage->Height >= 8)) || - (texImage->Height >= 16))) { - /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes, - OR if height is smaller than 8 automatically, but if micro tiling is active - the limit is height 16 instead ? */ - tex.pitch |= RADEON_DST_TILE_MACRO >> 22; - } - } - else { - /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is - needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ - /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed - so the kernel module reads the right amount of data. */ - tex.format = R200_TXFORMAT_I8; /* any 1-byte texel format */ - tex.pitch = (BLIT_WIDTH_BYTES / 64); - tex.height = (imageHeight + 3) / 4; - tex.width = (imageWidth + 3) / 4; - switch (t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) { - case R200_TXFORMAT_DXT1: - tex.width *= 8; - break; - case R200_TXFORMAT_DXT23: - case R200_TXFORMAT_DXT45: - tex.width *= 16; - break; - default: - fprintf(stderr, "unknown compressed tex format in uploadSubImage\n"); - } - } - - LOCK_HARDWARE( rmesa ); - do { - ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE, - &tex, sizeof(drm_radeon_texture_t) ); - if (ret) { - if (R200_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "DRM_RADEON_TEXTURE: again!\n"); - usleep(1); - } - } while ( ret == -EAGAIN ); - - UNLOCK_HARDWARE( rmesa ); - - if ( ret ) { - fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret ); - fprintf( stderr, " offset=0x%08x\n", - offset ); - fprintf( stderr, " image width=%d height=%d\n", - imageWidth, imageHeight ); - fprintf( stderr, " blit width=%d height=%d data=%p\n", - t->image[face][hwlevel].width, t->image[face][hwlevel].height, - t->image[face][hwlevel].data ); - exit( 1 ); - } -} - - -/** - * Upload the texture images associated with texture \a t. This might - * require the allocation of texture memory. - * - * \param rmesa Context pointer - * \param t Texture to be uploaded - * \param face Cube map face to be uploaded. Zero for non-cube maps. - */ - -int r200UploadTexImages( r200ContextPtr rmesa, r200TexObjPtr t, GLuint face ) -{ - const int numLevels = t->base.lastLevel - t->base.firstLevel + 1; - - if ( R200_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { - fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__, - (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize, - t->base.firstLevel, t->base.lastLevel ); - } - - if ( !t || t->base.totalSize == 0 || t->image_override ) - return 0; - - if (R200_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "%s: Syncing\n", __FUNCTION__ ); - r200Finish( rmesa->glCtx ); - } - - LOCK_HARDWARE( rmesa ); - - if ( t->base.memBlock == NULL ) { - int heap; - - heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps, - (driTextureObject *) t ); - if ( heap == -1 ) { - UNLOCK_HARDWARE( rmesa ); - return -1; - } - - /* Set the base offset of the texture image */ - t->bufAddr = rmesa->r200Screen->texOffset[heap] - + t->base.memBlock->ofs; - t->pp_txoffset = t->bufAddr; - - if (!(t->base.tObj->Image[0][0]->IsClientData)) { - /* hope it's safe to add that here... */ - t->pp_txoffset |= t->tile_bits; - } - - /* Mark this texobj as dirty on all units: - */ - t->dirty_state = TEX_ALL; - } - - /* Let the world know we've used this memory recently. - */ - driUpdateTextureLRU( (driTextureObject *) t ); - UNLOCK_HARDWARE( rmesa ); - - /* Upload any images that are new */ - if (t->base.dirty_images[face]) { - int i; - for ( i = 0 ; i < numLevels ; i++ ) { - if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) { - uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width, - t->image[face][i].height, face ); - } - } - t->base.dirty_images[face] = 0; - } - - - if (R200_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "%s: Syncing\n", __FUNCTION__ ); - r200Finish( rmesa->glCtx ); - } - - return 0; -} diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 0ad5651cd4..ed1995e147 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -37,9 +37,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/macros.h" #include "main/texformat.h" +#include "main/teximage.h" #include "main/texobj.h" #include "main/enums.h" +#include "radeon_common.h" +#include "radeon_mipmap_tree.h" #include "r200_context.h" #include "r200_state.h" #include "r200_ioctl.h" @@ -139,257 +142,6 @@ static const struct tx_table tx_table_le[] = #undef _ALPHA #undef _INVALID -/** - * This function computes the number of bytes of storage needed for - * the given texture object (all mipmap levels, all cube faces). - * The \c image[face][level].x/y/width/height parameters for upload/blitting - * are computed here. \c pp_txfilter, \c pp_txformat, etc. will be set here - * too. - * - * \param rmesa Context pointer - * \param tObj GL texture object whose images are to be posted to - * hardware state. - */ -static void r200SetTexImages( r200ContextPtr rmesa, - struct gl_texture_object *tObj ) -{ - r200TexObjPtr t = (r200TexObjPtr)tObj->DriverData; - const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - GLint curOffset, blitWidth; - GLint i, texelBytes; - GLint numLevels; - GLint log2Width, log2Height, log2Depth; - - /* Set the hardware texture format - */ - if ( !t->image_override ) { - if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) { - const struct tx_table *table = _mesa_little_endian() ? tx_table_le : - tx_table_be; - - t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK | - R200_TXFORMAT_ALPHA_IN_MAP); - t->pp_txfilter &= ~R200_YUV_TO_RGB; - - t->pp_txformat |= table[ baseImage->TexFormat->MesaFormat ].format; - t->pp_txfilter |= table[ baseImage->TexFormat->MesaFormat ].filter; - } - else { - _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); - return; - } - } - - texelBytes = baseImage->TexFormat->TexelBytes; - - /* Compute which mipmap levels we really want to send to the hardware. - */ - - driCalculateTextureFirstLastLevel( (driTextureObject *) t ); - log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; - log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; - log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; - - numLevels = t->base.lastLevel - t->base.firstLevel + 1; - - assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); - - /* Calculate mipmap offsets and dimensions for blitting (uploading) - * The idea is that we lay out the mipmap levels within a block of - * memory organized as a rectangle of width BLIT_WIDTH_BYTES. - */ - curOffset = 0; - blitWidth = BLIT_WIDTH_BYTES; - t->tile_bits = 0; - - /* figure out if this texture is suitable for tiling. */ - if (texelBytes) { - if (rmesa->texmicrotile && (tObj->Target != GL_TEXTURE_RECTANGLE_NV) && - /* texrect might be able to use micro tiling too in theory? */ - (baseImage->Height > 1)) { - /* allow 32 (bytes) x 1 mip (which will use two times the space - the non-tiled version would use) max if base texture is large enough */ - if ((numLevels == 1) || - (((baseImage->Width * texelBytes / baseImage->Height) <= 32) && - (baseImage->Width * texelBytes > 64)) || - ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) { - t->tile_bits |= R200_TXO_MICRO_TILE; - } - } - if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) { - /* we can set macro tiling even for small textures, they will be untiled anyway */ - t->tile_bits |= R200_TXO_MACRO_TILE; - } - } - - for (i = 0; i < numLevels; i++) { - const struct gl_texture_image *texImage; - GLuint size; - - texImage = tObj->Image[0][i + t->base.firstLevel]; - if ( !texImage ) - break; - - /* find image size in bytes */ - if (texImage->IsCompressed) { - /* need to calculate the size AFTER padding even though the texture is - submitted without padding. - Only handle pot textures currently - don't know if npot is even possible, - size calculation would certainly need (trivial) adjustments. - Align (and later pad) to 32byte, not sure what that 64byte blit width is - good for? */ - if ((t->pp_txformat & R200_TXFORMAT_FORMAT_MASK) == R200_TXFORMAT_DXT1) { - /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */ - if ((texImage->Width + 3) < 8) /* width one block */ - size = texImage->CompressedSize * 4; - else if ((texImage->Width + 3) < 16) - size = texImage->CompressedSize * 2; - else size = texImage->CompressedSize; - } - else /* DXT3/5, 16 bytes per block */ - if ((texImage->Width + 3) < 8) - size = texImage->CompressedSize * 2; - else size = texImage->CompressedSize; - } - else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height; - } - else if (t->tile_bits & R200_TXO_MICRO_TILE) { - /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, - though the actual offset may be different (if texture is less than - 32 bytes width) to the untiled case */ - int w = (texImage->Width * texelBytes * 2 + 31) & ~31; - size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } - else { - int w = (texImage->Width * texelBytes + 31) & ~31; - size = w * texImage->Height * texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } - assert(size > 0); - - /* Align to 32-byte offset. It is faster to do this unconditionally - * (no branch penalty). - */ - - curOffset = (curOffset + 0x1f) & ~0x1f; - - if (texelBytes) { - t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */ - t->image[0][i].y = 0; - t->image[0][i].width = MIN2(size / texelBytes, blitWidth); - t->image[0][i].height = (size / texelBytes) / t->image[0][i].width; - } - else { - t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; - t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; - t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); - t->image[0][i].height = size / t->image[0][i].width; - } - -#if 0 - /* for debugging only and only applicable to non-rectangle targets */ - assert(size % t->image[0][i].width == 0); - assert(t->image[0][i].x == 0 - || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1)); -#endif - - if (0) - fprintf(stderr, - "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", - i, texImage->Width, texImage->Height, - t->image[0][i].x, t->image[0][i].y, - t->image[0][i].width, t->image[0][i].height, size, curOffset); - - curOffset += size; - - } - - /* Align the total size of texture memory block. - */ - t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; - - /* Setup remaining cube face blits, if needed */ - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - const GLuint faceSize = t->base.totalSize; - GLuint face; - /* reuse face 0 x/y/width/height - just update the offset when uploading */ - for (face = 1; face < 6; face++) { - for (i = 0; i < numLevels; i++) { - t->image[face][i].x = t->image[0][i].x; - t->image[face][i].y = t->image[0][i].y; - t->image[face][i].width = t->image[0][i].width; - t->image[face][i].height = t->image[0][i].height; - } - } - t->base.totalSize = 6 * faceSize; /* total texmem needed */ - } - - - /* Hardware state: - */ - t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK; - t->pp_txfilter |= (numLevels - 1) << R200_MAX_MIP_LEVEL_SHIFT; - - t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK | - R200_TXFORMAT_HEIGHT_MASK | - R200_TXFORMAT_CUBIC_MAP_ENABLE | - R200_TXFORMAT_F5_WIDTH_MASK | - R200_TXFORMAT_F5_HEIGHT_MASK); - t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) | - (log2Height << R200_TXFORMAT_HEIGHT_SHIFT)); - - t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK); - if (tObj->Target == GL_TEXTURE_3D) { - t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT); - t->pp_txformat_x |= R200_TEXCOORD_VOLUME; - } - else if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - ASSERT(log2Width == log2Height); - t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) | - (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) | -/* don't think we need this bit, if it exists at all - fglrx does not set it */ - (R200_TXFORMAT_CUBIC_MAP_ENABLE)); - t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV; - t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) | - (log2Height << R200_FACE_HEIGHT_1_SHIFT) | - (log2Width << R200_FACE_WIDTH_2_SHIFT) | - (log2Height << R200_FACE_HEIGHT_2_SHIFT) | - (log2Width << R200_FACE_WIDTH_3_SHIFT) | - (log2Height << R200_FACE_HEIGHT_3_SHIFT) | - (log2Width << R200_FACE_WIDTH_4_SHIFT) | - (log2Height << R200_FACE_HEIGHT_4_SHIFT)); - } - else { - /* If we don't in fact send enough texture coordinates, q will be 1, - * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?) - */ - t->pp_txformat_x |= R200_TEXCOORD_PROJ; - } - - t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) | - ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16)); - - /* Only need to round to nearest 32 for textures, but the blitter - * requires 64-byte aligned pitches, and we may/may not need the - * blitter. NPOT only! - */ - if ( !t->image_override ) { - if (baseImage->IsCompressed) - t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); - else - t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); - t->pp_txpitch -= 32; - } - - t->dirty_state = TEX_ALL; - - /* FYI: r200UploadTexImages( rmesa, t ) used to be called here */ -} - - - /* ================================================================ * Texture combine functions */ @@ -981,20 +733,19 @@ void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname, { r200ContextPtr rmesa = pDRICtx->driverPrivate; struct gl_texture_object *tObj = - _mesa_lookup_texture(rmesa->glCtx, texname); - r200TexObjPtr t; + _mesa_lookup_texture(rmesa->radeon.glCtx, texname); + radeonTexObjPtr t = radeon_tex_obj(tObj); if (!tObj) return; - t = (r200TexObjPtr) tObj->DriverData; - t->image_override = GL_TRUE; if (!offset) return; - t->pp_txoffset = offset; + t->bo = NULL; + t->override_offset = offset; t->pp_txpitch = pitch - 32; switch (depth) { @@ -1014,6 +765,125 @@ void r200SetTexOffset(__DRIcontext * pDRICtx, GLint texname, } } +void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, + __DRIdrawable *dPriv) +{ + struct gl_texture_unit *texUnit; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + struct radeon_renderbuffer *rb; + radeon_texture_image *rImage; + radeonContextPtr radeon; + r200ContextPtr rmesa; + struct radeon_framebuffer *rfb; + radeonTexObjPtr t; + uint32_t pitch_val; + uint32_t internalFormat, type, format; + + type = GL_BGRA; + format = GL_UNSIGNED_BYTE; + internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4); + + radeon = pDRICtx->driverPrivate; + rmesa = pDRICtx->driverPrivate; + + rfb = dPriv->driverPrivate; + texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; + texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target); + texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0); + + rImage = get_radeon_texture_image(texImage); + t = radeon_tex_obj(texObj); + if (t == NULL) { + return; + } + + radeon_update_renderbuffers(pDRICtx, dPriv); + /* back & depth buffer are useless free them right away */ + rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; + if (rb && rb->bo) { + radeon_bo_unref(rb->bo); + rb->bo = NULL; + } + rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; + if (rb && rb->bo) { + radeon_bo_unref(rb->bo); + rb->bo = NULL; + } + rb = rfb->color_rb[0]; + if (rb->bo == NULL) { + /* Failed to BO for the buffer */ + return; + } + + _mesa_lock_texture(radeon->glCtx, texObj); + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + if (rImage->bo) { + radeon_bo_unref(rImage->bo); + rImage->bo = NULL; + } + if (t->mt) { + radeon_miptree_unreference(t->mt); + t->mt = NULL; + } + if (rImage->mt) { + radeon_miptree_unreference(rImage->mt); + rImage->mt = NULL; + } + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, + rb->width, rb->height, 1, 0, rb->cpp); + texImage->RowStride = rb->pitch / rb->cpp; + texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx, + internalFormat, + type, format, 0); + rImage->bo = rb->bo; + radeon_bo_ref(rImage->bo); + t->bo = rb->bo; + radeon_bo_ref(t->bo); + t->tile_bits = 0; + t->image_override = GL_TRUE; + t->override_offset = 0; + t->pp_txpitch &= (1 << 13) -1; + pitch_val = rb->pitch; + switch (rb->cpp) { + case 4: + if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format; + else + t->pp_txformat = tx_table_le[MESA_FORMAT_ARGB8888].format; + t->pp_txfilter |= tx_table_le[MESA_FORMAT_ARGB8888].filter; + break; + case 3: + default: + t->pp_txformat = tx_table_le[MESA_FORMAT_RGB888].format; + t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB888].filter; + break; + case 2: + t->pp_txformat = tx_table_le[MESA_FORMAT_RGB565].format; + t->pp_txfilter |= tx_table_le[MESA_FORMAT_RGB565].filter; + break; + } + t->pp_txsize = ((rb->width - 1) << RADEON_TEX_USIZE_SHIFT) + | ((rb->height - 1) << RADEON_TEX_VSIZE_SHIFT); + t->pp_txformat |= R200_TXFORMAT_NON_POWER2; + t->pp_txpitch = pitch_val; + t->pp_txpitch -= 32; + + t->validated = GL_TRUE; + _mesa_unlock_texture(radeon->glCtx, texObj); + return; +} + + +void r200SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +{ + r200SetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); +} + + #define REF_COLOR 1 #define REF_ALPHA 2 @@ -1207,12 +1077,41 @@ static GLboolean r200UpdateAllTexEnv( GLcontext *ctx ) R200_VOLUME_FILTER_MASK) +static void disable_tex_obj_state( r200ContextPtr rmesa, + int unit ) +{ + + R200_STATECHANGE( rmesa, vtx ); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3)); + + if (rmesa->radeon.TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) { + TCL_FALLBACK( rmesa->radeon.glCtx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE); + } + + /* Actually want to keep all units less than max active texture + * enabled, right? Fix this for >2 texunits. + */ + + { + GLuint tmp = rmesa->TexGenEnabled; + + rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit); + rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit); + rmesa->TexGenNeedNormals[unit] = GL_FALSE; + rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit); + + if (tmp != rmesa->TexGenEnabled) { + rmesa->recheck_texgen[unit] = GL_TRUE; + rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX; + } + } +} static void import_tex_obj_state( r200ContextPtr rmesa, int unit, - r200TexObjPtr texobj ) + radeonTexObjPtr texobj ) { /* do not use RADEON_DB_STATE to avoid stale texture caches */ - int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0]; + GLuint *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0]; R200_STATECHANGE( rmesa, tex[unit] ); @@ -1225,36 +1124,21 @@ static void import_tex_obj_state( r200ContextPtr rmesa, cmd[TEX_PP_TXSIZE] = texobj->pp_txsize; /* NPOT only! */ cmd[TEX_PP_TXPITCH] = texobj->pp_txpitch; /* NPOT only! */ cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color; - if (rmesa->r200Screen->drmSupportsFragShader) { - cmd[TEX_PP_TXOFFSET_NEWDRM] = texobj->pp_txoffset; - } - else { - cmd[TEX_PP_TXOFFSET_OLDDRM] = texobj->pp_txoffset; - } - if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) { - int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; - GLuint bytesPerFace = texobj->base.totalSize / 6; - ASSERT(texobj->base.totalSize % 6 == 0); + if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) { + GLuint *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; R200_STATECHANGE( rmesa, cube[unit] ); cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces; - if (rmesa->r200Screen->drmSupportsFragShader) { + if (rmesa->radeon.radeonScreen->drmSupportsFragShader) { /* that value is submitted twice. could change cube atom to not include that command when new drm is used */ cmd[TEX_PP_CUBIC_FACES] = texobj->pp_cubic_faces; } - cube_cmd[CUBE_PP_CUBIC_OFFSET_F1] = texobj->pp_txoffset + 1 * bytesPerFace; - cube_cmd[CUBE_PP_CUBIC_OFFSET_F2] = texobj->pp_txoffset + 2 * bytesPerFace; - cube_cmd[CUBE_PP_CUBIC_OFFSET_F3] = texobj->pp_txoffset + 3 * bytesPerFace; - cube_cmd[CUBE_PP_CUBIC_OFFSET_F4] = texobj->pp_txoffset + 4 * bytesPerFace; - cube_cmd[CUBE_PP_CUBIC_OFFSET_F5] = texobj->pp_txoffset + 5 * bytesPerFace; } - texobj->dirty_state &= ~(1<<unit); } - static void set_texgen_matrix( r200ContextPtr rmesa, GLuint unit, const GLfloat *s_plane, @@ -1377,7 +1261,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) } else { tgcm |= R200_TEXGEN_COMP_T << (unit * 4); } - if (texUnit->TexGenEnabled & R_BIT) { if (texUnit->GenR.Mode != mode) mixed_fallback = GL_TRUE; @@ -1517,52 +1400,6 @@ static GLboolean r200_validate_texgen( GLcontext *ctx, GLuint unit ) return GL_TRUE; } - -static void disable_tex( GLcontext *ctx, int unit ) -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - - if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit)) { - /* Texture unit disabled */ - if ( rmesa->state.texture.unit[unit].texobj != NULL ) { - /* The old texture is no longer bound to this texture unit. - * Mark it as such. - */ - - rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit); - rmesa->state.texture.unit[unit].texobj = NULL; - } - - R200_STATECHANGE( rmesa, ctx ); - rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~(R200_TEX_0_ENABLE << unit); - - R200_STATECHANGE( rmesa, vtx ); - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3)); - - if (rmesa->TclFallback & (R200_TCL_FALLBACK_TEXGEN_0<<unit)) { - TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE); - } - - /* Actually want to keep all units less than max active texture - * enabled, right? Fix this for >2 texunits. - */ - - { - GLuint tmp = rmesa->TexGenEnabled; - - rmesa->TexGenEnabled &= ~(R200_TEXGEN_TEXMAT_0_ENABLE<<unit); - rmesa->TexGenEnabled &= ~(R200_TEXMAT_0_ENABLE<<unit); - rmesa->TexGenNeedNormals[unit] = GL_FALSE; - rmesa->TexGenCompSel &= ~(R200_OUTPUT_TEX_0 << unit); - - if (tmp != rmesa->TexGenEnabled) { - rmesa->recheck_texgen[unit] = GL_TRUE; - rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; - } - } - } -} - void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); @@ -1579,237 +1416,169 @@ void set_re_cntl_d3d( GLcontext *ctx, int unit, GLboolean use_d3d ) } } -static GLboolean enable_tex_2d( GLcontext *ctx, int unit ) +/** + * Compute the cached hardware register values for the given texture object. + * + * \param rmesa Context pointer + * \param t the r300 texture object + */ +static void setup_hardware_state(r200ContextPtr rmesa, radeonTexObj *t) { - r200ContextPtr rmesa = R200_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; - - /* Need to load the 2d images associated with this unit. - */ - if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) { - t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2; - t->base.dirty_images[0] = ~0; - } - - ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D); - - if ( t->base.dirty_images[0] ) { - R200_FIREVERTICES( rmesa ); - r200SetTexImages( rmesa, tObj ); - r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 ); - if ( !t->base.memBlock && !t->image_override ) - return GL_FALSE; + int firstlevel = t->mt ? t->mt->firstLevel : 0; + const struct gl_texture_image *firstImage = t->base.Image[0][firstlevel]; + GLint log2Width, log2Height, log2Depth, texelBytes; + + if ( t->bo ) { + return; } - set_re_cntl_d3d( ctx, unit, GL_FALSE ); + log2Width = firstImage->WidthLog2; + log2Height = firstImage->HeightLog2; + log2Depth = firstImage->DepthLog2; + texelBytes = firstImage->TexFormat->TexelBytes; - return GL_TRUE; -} -#if ENABLE_HW_3D_TEXTURE -static GLboolean enable_tex_3d( GLcontext *ctx, int unit ) -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; - - /* Need to load the 3d images associated with this unit. - */ - if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) { - t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2; - t->base.dirty_images[0] = ~0; + if (!t->image_override) { + if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) { + const struct tx_table *table = _mesa_little_endian() ? tx_table_le : + tx_table_be; + + t->pp_txformat &= ~(R200_TXFORMAT_FORMAT_MASK | + R200_TXFORMAT_ALPHA_IN_MAP); + t->pp_txfilter &= ~R200_YUV_TO_RGB; + + t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format; + t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter; + } else { + _mesa_problem(NULL, "unexpected texture format in %s", + __FUNCTION__); + return; + } } + + t->pp_txfilter &= ~R200_MAX_MIP_LEVEL_MASK; + t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << R200_MAX_MIP_LEVEL_SHIFT; + + t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK | + R200_TXFORMAT_HEIGHT_MASK | + R200_TXFORMAT_CUBIC_MAP_ENABLE | + R200_TXFORMAT_F5_WIDTH_MASK | + R200_TXFORMAT_F5_HEIGHT_MASK); + t->pp_txformat |= ((log2Width << R200_TXFORMAT_WIDTH_SHIFT) | + (log2Height << R200_TXFORMAT_HEIGHT_SHIFT)); + + t->tile_bits = 0; + + t->pp_txformat_x &= ~(R200_DEPTH_LOG2_MASK | R200_TEXCOORD_MASK); + if (t->base.Target == GL_TEXTURE_3D) { + t->pp_txformat_x |= (log2Depth << R200_DEPTH_LOG2_SHIFT); + t->pp_txformat_x |= R200_TEXCOORD_VOLUME; - ASSERT(tObj->Target == GL_TEXTURE_3D); - - /* R100 & R200 do not support mipmaps for 3D textures. - */ - if ( (tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR) ) { - return GL_FALSE; } - - if ( t->base.dirty_images[0] ) { - R200_FIREVERTICES( rmesa ); - r200SetTexImages( rmesa, tObj ); - r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 ); - if ( !t->base.memBlock ) - return GL_FALSE; + else if (t->base.Target == GL_TEXTURE_CUBE_MAP) { + ASSERT(log2Width == log2Height); + t->pp_txformat |= ((log2Width << R200_TXFORMAT_F5_WIDTH_SHIFT) | + (log2Height << R200_TXFORMAT_F5_HEIGHT_SHIFT) | + /* don't think we need this bit, if it exists at all - fglrx does not set it */ + (R200_TXFORMAT_CUBIC_MAP_ENABLE)); + t->pp_txformat_x |= R200_TEXCOORD_CUBIC_ENV; + t->pp_cubic_faces = ((log2Width << R200_FACE_WIDTH_1_SHIFT) | + (log2Height << R200_FACE_HEIGHT_1_SHIFT) | + (log2Width << R200_FACE_WIDTH_2_SHIFT) | + (log2Height << R200_FACE_HEIGHT_2_SHIFT) | + (log2Width << R200_FACE_WIDTH_3_SHIFT) | + (log2Height << R200_FACE_HEIGHT_3_SHIFT) | + (log2Width << R200_FACE_WIDTH_4_SHIFT) | + (log2Height << R200_FACE_HEIGHT_4_SHIFT)); } - - set_re_cntl_d3d( ctx, unit, GL_TRUE ); - - return GL_TRUE; -} -#endif - -static GLboolean enable_tex_cube( GLcontext *ctx, int unit ) -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; - GLuint face; - - /* Need to load the 2d images associated with this unit. - */ - if (t->pp_txformat & R200_TXFORMAT_NON_POWER2) { - t->pp_txformat &= ~R200_TXFORMAT_NON_POWER2; - for (face = 0; face < 6; face++) - t->base.dirty_images[face] = ~0; + else { + /* If we don't in fact send enough texture coordinates, q will be 1, + * making TEXCOORD_PROJ act like TEXCOORD_NONPROJ (Right?) + */ + t->pp_txformat_x |= R200_TEXCOORD_PROJ; } - ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP); + t->pp_txsize = (((firstImage->Width - 1) << R200_PP_TX_WIDTHMASK_SHIFT) + | ((firstImage->Height - 1) << R200_PP_TX_HEIGHTMASK_SHIFT)); - if ( t->base.dirty_images[0] || t->base.dirty_images[1] || - t->base.dirty_images[2] || t->base.dirty_images[3] || - t->base.dirty_images[4] || t->base.dirty_images[5] ) { - /* flush */ - R200_FIREVERTICES( rmesa ); - /* layout memory space, once for all faces */ - r200SetTexImages( rmesa, tObj ); - } - - /* upload (per face) */ - for (face = 0; face < 6; face++) { - if (t->base.dirty_images[face]) { - r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, face ); - } - } - - if ( !t->base.memBlock ) { - /* texmem alloc failed, use s/w fallback */ - return GL_FALSE; + if ( !t->image_override ) { + if (firstImage->IsCompressed) + t->pp_txpitch = (firstImage->Width + 63) & ~(63); + else + t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63); + t->pp_txpitch -= 32; } - set_re_cntl_d3d( ctx, unit, GL_TRUE ); - - return GL_TRUE; -} - -static GLboolean enable_tex_rect( GLcontext *ctx, int unit ) -{ - r200ContextPtr rmesa = R200_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; - - if (!(t->pp_txformat & R200_TXFORMAT_NON_POWER2)) { + if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) { t->pp_txformat |= R200_TXFORMAT_NON_POWER2; - t->base.dirty_images[0] = ~0; - } - - ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV); - - if ( t->base.dirty_images[0] ) { - R200_FIREVERTICES( rmesa ); - r200SetTexImages( rmesa, tObj ); - r200UploadTexImages( rmesa, (r200TexObjPtr) tObj->DriverData, 0 ); - if ( !t->base.memBlock && - !t->image_override && - !rmesa->prefer_gart_client_texturing ) - return GL_FALSE; } - set_re_cntl_d3d( ctx, unit, GL_FALSE ); - - return GL_TRUE; } - -static GLboolean update_tex_common( GLcontext *ctx, int unit ) +static GLboolean r200_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - r200TexObjPtr t = (r200TexObjPtr) tObj->DriverData; - - /* Fallback if there's a texture border */ - if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) - return GL_FALSE; - - /* Update state if this is a different texture object to last - * time. - */ - if ( rmesa->state.texture.unit[unit].texobj != t ) { - if ( rmesa->state.texture.unit[unit].texobj != NULL ) { - /* The old texture is no longer bound to this texture unit. - * Mark it as such. - */ + radeonTexObj *t = radeon_tex_obj(texObj); - rmesa->state.texture.unit[unit].texobj->base.bound &= - ~(1UL << unit); - } - - rmesa->state.texture.unit[unit].texobj = t; - t->base.bound |= (1UL << unit); - t->dirty_state |= 1<<unit; - driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */ - } - - - /* Newly enabled? - */ - if ( 1|| !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (R200_TEX_0_ENABLE<<unit))) { - R200_STATECHANGE( rmesa, ctx ); - rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit; - - R200_STATECHANGE( rmesa, vtx ); - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3)); - rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3); + if (!radeon_validate_texture_miptree(ctx, texObj)) + return GL_FALSE; - rmesa->recheck_texgen[unit] = GL_TRUE; - } + r200_validate_texgen(ctx, unit); + /* Configure the hardware registers (more precisely, the cached version + * of the hardware registers). */ + setup_hardware_state(rmesa, t); + + if (texObj->Target == GL_TEXTURE_RECTANGLE_NV || + texObj->Target == GL_TEXTURE_2D || + texObj->Target == GL_TEXTURE_1D) + set_re_cntl_d3d( ctx, unit, GL_FALSE ); + else + set_re_cntl_d3d( ctx, unit, GL_TRUE ); + R200_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_0_ENABLE << unit; + + R200_STATECHANGE( rmesa, vtx ); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] &= ~(7 << (unit * 3)); + rmesa->hw.vtx.cmd[VTX_TCL_OUTPUT_VTXFMT_1] |= 4 << (unit * 3); - if (t->dirty_state & (1<<unit)) { - import_tex_obj_state( rmesa, unit, t ); - } + rmesa->recheck_texgen[unit] = GL_TRUE; + import_tex_obj_state( rmesa, unit, t ); if (rmesa->recheck_texgen[unit]) { GLboolean fallback = !r200_validate_texgen( ctx, unit ); TCL_FALLBACK( ctx, (R200_TCL_FALLBACK_TEXGEN_0<<unit), fallback); rmesa->recheck_texgen[unit] = 0; - rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; + rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX; } - FALLBACK( rmesa, R200_FALLBACK_BORDER_MODE, t->border_fallback ); - return !t->border_fallback; -} + t->validated = GL_TRUE; + FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback ); + return !t->border_fallback; +} -static GLboolean r200UpdateTextureUnit( GLcontext *ctx, int unit ) +static GLboolean r200UpdateTextureUnit(GLcontext *ctx, int unit) { r200ContextPtr rmesa = R200_CONTEXT(ctx); GLuint unitneeded = rmesa->state.texture.unit[unit].unitneeded; - if ( unitneeded & (TEXTURE_RECT_BIT) ) { - return (enable_tex_rect( ctx, unit ) && - update_tex_common( ctx, unit )); - } - else if ( unitneeded & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) { - return (enable_tex_2d( ctx, unit ) && - update_tex_common( ctx, unit )); - } -#if ENABLE_HW_3D_TEXTURE - else if ( unitneeded & (TEXTURE_3D_BIT) ) { - return (enable_tex_3d( ctx, unit ) && - update_tex_common( ctx, unit )); - } -#endif - else if ( unitneeded & (TEXTURE_CUBE_BIT) ) { - return (enable_tex_cube( ctx, unit ) && - update_tex_common( ctx, unit )); - } - else if ( unitneeded ) { - return GL_FALSE; - } - else { - disable_tex( ctx, unit ); - return GL_TRUE; + if (!unitneeded) { + /* disable the unit */ + disable_tex_obj_state(rmesa, unit); + return GL_TRUE; } + + if (!r200_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) { + _mesa_warning(ctx, + "failed to validate texture for unit %d.\n", + unit); + rmesa->state.texture.unit[unit].texobj = NULL; + return GL_FALSE; + } + + rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current); + return GL_TRUE; } @@ -1850,11 +1619,11 @@ void r200UpdateTextureState( GLcontext *ctx ) FALLBACK( rmesa, R200_FALLBACK_TEXTURE, !ok ); - if (rmesa->TclFallback) + if (rmesa->radeon.TclFallback) r200ChooseVertexState( ctx ); - if (rmesa->r200Screen->chip_family == CHIP_FAMILY_R200) { + if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R200) { /* * T0 hang workaround ------------- @@ -1867,7 +1636,7 @@ void r200UpdateTextureState( GLcontext *ctx ) R200_STATECHANGE(rmesa, tex[1]); rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= R200_TEX_1_ENABLE; if (!(rmesa->hw.cst.cmd[CST_PP_CNTL_X] & R200_PPX_TEX_1_ENABLE)) - rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; + rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; rmesa->hw.tex[1].cmd[TEX_PP_TXFORMAT] |= R200_TXFORMAT_LOOKUP_DISABLE; } else if (!ctx->ATIFragmentShader._Enabled) { diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index 4ce93b5145..620f29b5c6 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -1110,9 +1110,9 @@ void r200SetupVertexProg( GLcontext *ctx ) { } /* could optimize setting up vertex progs away for non-tcl hw */ fallback = !(vp->native && r200VertexProgUpdateParams(ctx, vp) && - rmesa->r200Screen->drmSupportsVertexProgram); + rmesa->radeon.radeonScreen->drmSupportsVertexProgram); TCL_FALLBACK(ctx, R200_TCL_FALLBACK_VERTEX_PROGRAM, fallback); - if (rmesa->TclFallback) return; + if (rmesa->radeon.TclFallback) return; R200_STATECHANGE( rmesa, vap ); /* FIXME: fglrx sets R200_VAP_SINGLE_BUF_STATE_ENABLE too. Do we need it? diff --git a/src/mesa/drivers/dri/r300/.gitignore b/src/mesa/drivers/dri/r300/.gitignore index 3689a6a78e..2f9cd1a987 100644 --- a/src/mesa/drivers/dri/r300/.gitignore +++ b/src/mesa/drivers/dri/r300/.gitignore @@ -1,4 +1,15 @@ +radeon_bocs_wrapper.h +radeon_bo_legacy.[ch] radeon_chipset.h +radeon_cmdbuf.h +radeon_common.[ch] +radeon_common_context.[ch] +radeon_cs_legacy.[ch] +radeon_dma.[ch] +radeon_fbo.c +radeon_lock.[ch] +radeon_mipmap_tree.[ch] radeon_screen.[ch] -radeon_span.h +radeon_span.[ch] +radeon_texture.[ch] server diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 6ca934204f..bdb09624be 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -3,6 +3,8 @@ TOP = ../../../../.. include $(TOP)/configs/current +CFLAGS += $(RADEON_CFLAGS) + LIBNAME = r300_dri.so MINIGLX_SOURCES = server/radeon_dri.c @@ -20,20 +22,26 @@ COMMON_SOURCES = \ ../common/xmlconfig.c \ ../common/dri_util.c +RADEON_COMMON_SOURCES = \ + radeon_texture.c \ + radeon_common_context.c \ + radeon_common.c \ + radeon_dma.c \ + radeon_lock.c \ + radeon_bo_legacy.c \ + radeon_cs_legacy.c \ + radeon_mipmap_tree.c \ + radeon_span.c \ + radeon_fbo.c + DRIVER_SOURCES = \ radeon_screen.c \ - radeon_context.c \ - radeon_ioctl.c \ - radeon_lock.c \ - radeon_span.c \ - radeon_state.c \ - r300_mem.c \ r300_context.c \ + r300_draw.c \ r300_ioctl.c \ r300_cmdbuf.c \ r300_state.c \ r300_render.c \ - r300_texmem.c \ r300_tex.c \ r300_texstate.c \ radeon_program.c \ @@ -41,6 +49,7 @@ DRIVER_SOURCES = \ radeon_program_pair.c \ radeon_nqssadce.c \ r300_vertprog.c \ + r300_fragprog_common.c \ r300_fragprog.c \ r300_fragprog_swizzle.c \ r300_fragprog_emit.c \ @@ -49,12 +58,15 @@ DRIVER_SOURCES = \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ + $(RADEON_COMMON_SOURCES) \ $(EGL_SOURCES) C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES) DRIVER_DEFINES = -DCOMPILE_R300 -DR200_MERGED=0 \ - -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 + -DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R300 \ +# -DRADEON_BO_TRACK \ + -Wall SYMLINKS = \ server/radeon_dri.c \ @@ -68,7 +80,29 @@ COMMON_SYMLINKS = \ radeon_chipset.h \ radeon_screen.c \ radeon_screen.h \ - radeon_span.h + radeon_span.h \ + radeon_span.c \ + radeon_bo_legacy.c \ + radeon_cs_legacy.c \ + radeon_bo_legacy.h \ + radeon_cs_legacy.h \ + radeon_bocs_wrapper.h \ + radeon_lock.c \ + radeon_lock.h \ + radeon_common.c \ + radeon_common.h \ + radeon_common_context.c \ + radeon_common_context.h \ + radeon_cmdbuf.h \ + radeon_dma.c \ + radeon_dma.h \ + radeon_mipmap_tree.c \ + radeon_mipmap_tree.h \ + radeon_texture.c \ + radeon_texture.h \ + radeon_fbo.c + +DRI_LIB_DEPS += $(RADEON_LDFLAGS) ##### TARGETS ##### diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index f4472756f1..b5c6bd1835 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -44,245 +44,417 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drm.h" #include "radeon_drm.h" -#include "radeon_ioctl.h" #include "r300_context.h" #include "r300_ioctl.h" #include "radeon_reg.h" #include "r300_reg.h" #include "r300_cmdbuf.h" #include "r300_emit.h" +#include "radeon_bocs_wrapper.h" +#include "radeon_mipmap_tree.h" #include "r300_state.h" +#include "radeon_reg.h" -// Set this to 1 for extremely verbose debugging of command buffers -#define DEBUG_CMDBUF 0 - -/** - * Send the current command buffer via ioctl to the hardware. +/** # of dwords reserved for additional instructions that may need to be written + * during flushing. */ -int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller) +#define SPACE_FOR_FLUSHING 4 + +static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt) { - int ret; - int i; - drm_radeon_cmd_buffer_t cmd; - int start; - - if (r300->radeon.lost_context) { - start = 0; - r300->radeon.lost_context = GL_FALSE; - } else - start = r300->cmdbuf.count_reemit; - - if (RADEON_DEBUG & DEBUG_IOCTL) { - fprintf(stderr, "%s from %s - %i cliprects\n", - __FUNCTION__, caller, r300->radeon.numClipRects); - - if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_VERBOSE) - for (i = start; i < r300->cmdbuf.count_used; ++i) - fprintf(stderr, "%d: %08x\n", i, - r300->cmdbuf.cmd_buf[i]); - } + if (r300->radeon.radeonScreen->kernel_mm) { + return ((((*pkt) >> 16) & 0x3FFF) + 1); + } else { + drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt; + return t->packet0.count; + } +} - cmd.buf = (char *)(r300->cmdbuf.cmd_buf + start); - cmd.bufsz = (r300->cmdbuf.count_used - start) * 4; +#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) +#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) - if (r300->radeon.state.scissor.enabled) { - cmd.nbox = r300->radeon.state.scissor.numClipRects; - cmd.boxes = - (drm_clip_rect_t *) r300->radeon.state.scissor.pClipRects; - } else { - cmd.nbox = r300->radeon.numClipRects; - cmd.boxes = (drm_clip_rect_t *) r300->radeon.pClipRects; +void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + drm_r300_cmd_header_t cmd; + uint32_t addr, ndw, i; + + if (!r300->radeon.radeonScreen->kernel_mm) { + uint32_t dwords; + dwords = (*atom->check) (ctx, atom); + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(atom->cmd, dwords); + END_BATCH(); + return; } - ret = drmCommandWrite(r300->radeon.dri.fd, - DRM_RADEON_CMDBUF, &cmd, sizeof(cmd)); - - if (RADEON_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "Syncing in %s (from %s)\n\n", - __FUNCTION__, caller); - radeonWaitForIdleLocked(&r300->radeon); + cmd.u = atom->cmd[0]; + addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo; + ndw = cmd.vpu.count * 4; + if (ndw) { + + if (r300->vap_flush_needed) { + BEGIN_BATCH_NO_AUTOSTATE(15 + ndw); + + /* flush processing vertices */ + OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0); + OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_BATCH_REGVAL(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN); + OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0xffffff); + OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); + r300->vap_flush_needed = GL_FALSE; + } else { + BEGIN_BATCH_NO_AUTOSTATE(5 + ndw); + } + OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr); + OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR); + for (i = 0; i < ndw; i++) { + OUT_BATCH(atom->cmd[i+1]); + } + OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); + END_BATCH(); } - - r300->dma.nr_released_bufs = 0; - r300->cmdbuf.count_used = 0; - r300->cmdbuf.count_reemit = 0; - - return ret; } -int r300FlushCmdBuf(r300ContextPtr r300, const char *caller) +void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom) { - int ret; + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + drm_r300_cmd_header_t cmd; + uint32_t addr, ndw, i, sz; + int type, clamp, stride; + + if (!r300->radeon.radeonScreen->kernel_mm) { + uint32_t dwords; + dwords = (*atom->check) (ctx, atom); + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(atom->cmd, dwords); + END_BATCH(); + return; + } - LOCK_HARDWARE(&r300->radeon); + cmd.u = atom->cmd[0]; + sz = cmd.r500fp.count; + addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo; + type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE); + clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP); - ret = r300FlushCmdBufLocked(r300, caller); + addr |= (type << 16); + addr |= (clamp << 17); - UNLOCK_HARDWARE(&r300->radeon); + stride = type ? 4 : 6; - if (ret) { - fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret); - _mesa_exit(ret); - } + ndw = sz * stride; + if (ndw) { - return ret; + BEGIN_BATCH_NO_AUTOSTATE(3 + ndw); + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0)); + OUT_BATCH(addr); + OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR); + for (i = 0; i < ndw; i++) { + OUT_BATCH(atom->cmd[i+1]); + } + END_BATCH(); + } } -static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state) +static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom) { - int i, j, reg; - int dwords = (*state->check) (r300, state); - drm_r300_cmd_header_t cmd; + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd); + int notexture = 0; - fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, - state->cmd_size); - - if (RADEON_DEBUG & DEBUG_VERBOSE) { - for (i = 0; i < dwords;) { - cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]); - reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo; - fprintf(stderr, " %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n", - state->name, i, reg, cmd.packet0.count); - ++i; - for (j = 0; j < cmd.packet0.count; j++) { - fprintf(stderr, " %s[%d]: 0x%04x = %08x\n", - state->name, i, reg, state->cmd[i]); - reg += 4; - ++i; - } + if (numtmus) { + int i; + + for(i = 0; i < numtmus; ++i) { + radeonTexObj *t = r300->hw.textures[i]; + + if (!t) + notexture = 1; + } + + if (r300->radeon.radeonScreen->kernel_mm && notexture) { + return; + } + for(i = 0; i < numtmus; ++i) { + radeonTexObj *t = r300->hw.textures[i]; + if (t && !t->image_override) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } else if (!t) { + /* Texture unit hasn't a texture bound nothings to do */ + } else { /* override cases */ + if (t->bo) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + END_BATCH(); + } else if (!r300->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1); + OUT_BATCH(t->override_offset); + END_BATCH(); + } else { + /* Texture unit hasn't a texture bound nothings to do */ + } + } } } } -/** - * Emit all atoms with a dirty field equal to dirty. - * - * The caller must have ensured that there is enough space in the command - * buffer. - */ -static INLINE void r300EmitAtoms(r300ContextPtr r300, GLboolean dirty) +void r300_emit_scissor(GLcontext *ctx) { - struct r300_state_atom *atom; - uint32_t *dest; - int dwords; - - dest = r300->cmdbuf.cmd_buf + r300->cmdbuf.count_used; - - /* Emit WAIT */ - *dest = cmdwait(R300_WAIT_3D | R300_WAIT_3D_CLEAN); - dest++; - r300->cmdbuf.count_used++; - - /* Emit cache flush */ - *dest = cmdpacket0(R300_TX_INVALTAGS, 1); - dest++; - r300->cmdbuf.count_used++; - - *dest = R300_TX_FLUSH; - dest++; - r300->cmdbuf.count_used++; - - /* Emit END3D */ - *dest = cmdpacify(); - dest++; - r300->cmdbuf.count_used++; - - /* Emit actual atoms */ - - foreach(atom, &r300->hw.atomlist) { - if ((atom->dirty || r300->hw.all_dirty) == dirty) { - dwords = (*atom->check) (r300, atom); - if (dwords) { - if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { - r300PrintStateAtom(r300, atom); - } - memcpy(dest, atom->cmd, dwords * 4); - dest += dwords; - r300->cmdbuf.count_used += dwords; - atom->dirty = GL_FALSE; - } else { - if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { - fprintf(stderr, " skip state %s\n", - atom->name); - } - } - } + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + unsigned x1, y1, x2, y2; + struct radeon_renderbuffer *rrb; + + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + return; + } + rrb = radeon_get_colorbuffer(&r300->radeon); + if (!rrb || !rrb->bo) { + fprintf(stderr, "no rrb\n"); + return; } + if (r300->radeon.state.scissor.enabled) { + x1 = r300->radeon.state.scissor.rect.x1; + y1 = r300->radeon.state.scissor.rect.y1; + x2 = r300->radeon.state.scissor.rect.x2 - 1; + y2 = r300->radeon.state.scissor.rect.y2 - 1; + } else { + x1 = 0; + y1 = 0; + x2 = rrb->width - 1; + y2 = rrb->height - 1; + } + if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { + x1 += R300_SCISSORS_OFFSET; + y1 += R300_SCISSORS_OFFSET; + x2 += R300_SCISSORS_OFFSET; + y2 += R300_SCISSORS_OFFSET; + } + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH((x1 << R300_SCISSORS_X_SHIFT)|(y1 << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); } -/** - * Copy dirty hardware state atoms into the command buffer. - * - * We also copy out clean state if we're at the start of a buffer. That makes - * it easy to recover from lost contexts. - */ -void r300EmitState(r300ContextPtr r300) +static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom) { - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_PRIMS)) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (r300->cmdbuf.count_used && !r300->hw.is_dirty - && !r300->hw.all_dirty) + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + struct radeon_renderbuffer *rrb; + uint32_t cbpitch; + uint32_t offset = r300->radeon.state.color.draw_offset; + uint32_t dw = 6; + int i; + + rrb = radeon_get_colorbuffer(&r300->radeon); + if (!rrb || !rrb->bo) { + fprintf(stderr, "no rrb\n"); return; + } + + cbpitch = (rrb->pitch / rrb->cpp); + if (rrb->cpp == 4) + cbpitch |= R300_COLOR_FORMAT_ARGB8888; + else + cbpitch |= R300_COLOR_FORMAT_RGB565; + + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + cbpitch |= R300_COLOR_TILE_ENABLE; + + if (r300->radeon.radeonScreen->kernel_mm) + dw += 2; + BEGIN_BATCH_NO_AUTOSTATE(dw); + OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_BATCH_RELOC(offset, rrb->bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1); + if (!r300->radeon.radeonScreen->kernel_mm) + OUT_BATCH(cbpitch); + else + OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + if (r300->radeon.radeonScreen->driScreen->dri2.enabled) { + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH(0); + OUT_BATCH(((rrb->width - 1) << R300_SCISSORS_X_SHIFT) | + ((rrb->height - 1) << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(16); + for (i = 0; i < 4; i++) { + OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); + OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT)); + OUT_BATCH(((rrb->width - 1) << R300_CLIPRECT_X_SHIFT) | ((rrb->height - 1) << R300_CLIPRECT_Y_SHIFT)); + } + OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); + OUT_BATCH(0xAAAA); + OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); + OUT_BATCH(0xffffff); + END_BATCH(); + } else { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2); + OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) | + (R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT)); + OUT_BATCH(((rrb->width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) | + ((rrb->height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT)); + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(16); + for (i = 0; i < 4; i++) { + OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2); + OUT_BATCH((R300_SCISSORS_OFFSET << R300_CLIPRECT_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_CLIPRECT_Y_SHIFT)); + OUT_BATCH(((R300_SCISSORS_OFFSET + rrb->width - 1) << R300_CLIPRECT_X_SHIFT) | + ((R300_SCISSORS_OFFSET + rrb->height - 1) << R300_CLIPRECT_Y_SHIFT)); + } + OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1); + OUT_BATCH(0xAAAA); + OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1); + OUT_BATCH(0xffffff); + END_BATCH(); + } + } +} - /* To avoid going across the entire set of states multiple times, just check - * for enough space for the case of emitting all state, and inline the - * r300AllocCmdBuf code here without all the checks. - */ - r300EnsureCmdBufSpace(r300, r300->hw.max_state_size, __FUNCTION__); +static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + struct radeon_renderbuffer *rrb; + uint32_t zbpitch; - if (!r300->cmdbuf.count_used) { - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "Begin reemit state\n"); + rrb = radeon_get_depthbuffer(&r300->radeon); + if (!rrb) + return; - r300EmitAtoms(r300, GL_FALSE); - r300->cmdbuf.count_reemit = r300->cmdbuf.count_used; + zbpitch = (rrb->pitch / rrb->cpp); + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { + zbpitch |= R300_DEPTHMACROTILE_ENABLE; + } + if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ + zbpitch |= R300_DEPTHMICROTILE_TILED; } - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "Begin dirty state\n"); + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, zbpitch); + END_BATCH(); +} - r300EmitAtoms(r300, GL_TRUE); +static void emit_gb_misc(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH(atom->cmd[0]); + OUT_BATCH(atom->cmd[1]); + OUT_BATCH(atom->cmd[2]); + OUT_BATCH(atom->cmd[3]); + END_BATCH(); + } +} - assert(r300->cmdbuf.count_used < r300->cmdbuf.size); +static void emit_threshold_misc(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + BEGIN_BATCH_NO_AUTOSTATE(3); + OUT_BATCH(atom->cmd[0]); + OUT_BATCH(atom->cmd[1]); + OUT_BATCH(atom->cmd[2]); + END_BATCH(); + } +} - r300->hw.is_dirty = GL_FALSE; - r300->hw.all_dirty = GL_FALSE; +static void emit_shade_misc(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH(atom->cmd[0]); + OUT_BATCH(atom->cmd[1]); + END_BATCH(); + } } -#define packet0_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->packet0.count) -#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count) -#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count) +static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + BATCH_LOCALS(&r300->radeon); + struct radeon_renderbuffer *rrb; + uint32_t format = 0; + + rrb = radeon_get_depthbuffer(&r300->radeon); + if (!rrb) + format = 0; + else { + if (rrb->cpp == 2) + format = R300_DEPTHFORMAT_16BIT_INT_Z; + else if (rrb->cpp == 4) + format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; + } + + OUT_BATCH(atom->cmd[0]); + atom->cmd[1] &= ~0xf; + atom->cmd[1] |= format; + OUT_BATCH(atom->cmd[1]); + OUT_BATCH(atom->cmd[2]); + OUT_BATCH(atom->cmd[3]); + OUT_BATCH(atom->cmd[4]); +} -static int check_always(r300ContextPtr r300, struct r300_state_atom *atom) +static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) { return atom->cmd_size; } -static int check_variable(r300ContextPtr r300, struct r300_state_atom *atom) +static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom) { + r300ContextPtr r300 = R300_CONTEXT(ctx); int cnt; - cnt = packet0_count(atom->cmd); + if (atom->cmd[0] == CP_PACKET2) { + return 0; + } + cnt = packet0_count(r300, atom->cmd); return cnt ? cnt + 1 : 0; } -static int check_vpu(r300ContextPtr r300, struct r300_state_atom *atom) +int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom) { int cnt; + cnt = vpu_count(atom->cmd); return cnt ? (cnt * 4) + 1 : 0; } -static int check_r500fp(r300ContextPtr r300, struct r300_state_atom *atom) +int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom) { int cnt; + cnt = r500fp_count(atom->cmd); return cnt ? (cnt * 6) + 1 : 0; } -static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) +int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom) { int cnt; + cnt = r500fp_count(atom->cmd); return cnt ? (cnt * 4) + 1 : 0; } @@ -295,8 +467,8 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) r300->hw.ATOM.idx = (IDX); \ r300->hw.ATOM.check = check_##CHK; \ r300->hw.ATOM.dirty = GL_FALSE; \ - r300->hw.max_state_size += (SZ); \ - insert_at_tail(&r300->hw.atomlist, &r300->hw.ATOM); \ + r300->radeon.hw.max_state_size += (SZ); \ + insert_at_tail(&r300->radeon.hw.atomlist, &r300->hw.ATOM); \ } while (0) /** * Allocate memory for the command buffer and initialize the state atom @@ -304,18 +476,16 @@ static int check_r500fp_const(r300ContextPtr r300, struct r300_state_atom *atom) */ void r300InitCmdBuf(r300ContextPtr r300) { - int size, mtu; - int has_tcl = 1; + int mtu; + int has_tcl; int is_r500 = 0; - int i; - if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - has_tcl = 0; + has_tcl = r300->options.hw_tcl_enabled; if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) is_r500 = 1; - r300->hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ + r300->radeon.hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */ mtu = r300->radeon.glCtx->Const.MaxTextureUnits; if (RADEON_DEBUG & DEBUG_TEXTURE) { @@ -323,232 +493,248 @@ void r300InitCmdBuf(r300ContextPtr r300) } /* Setup the atom linked list */ - make_empty_list(&r300->hw.atomlist); - r300->hw.atomlist.name = "atom-list"; + make_empty_list(&r300->radeon.hw.atomlist); + r300->radeon.hw.atomlist.name = "atom-list"; /* Initialize state atoms */ ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0); - r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(R300_SE_VPORT_XSCALE, 6); + r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VPORT_XSCALE, 6); ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0); - r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(R300_VAP_PVS_STATE_FLUSH_REG, 1); + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1); r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0; - r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(R300_VAP_CNTL, 1); - if (is_r500) { + r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1); + if (is_r500 && !r300->radeon.radeonScreen->kernel_mm) { ALLOC_STATE(vap_index_offset, always, 2, 0); - r300->hw.vap_index_offset.cmd[0] = cmdpacket0(R500_VAP_INDEX_OFFSET, 1); + r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1); r300->hw.vap_index_offset.cmd[1] = 0; } ALLOC_STATE(vte, always, 3, 0); - r300->hw.vte.cmd[0] = cmdpacket0(R300_SE_VTE_CNTL, 2); + r300->hw.vte.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VTE_CNTL, 2); ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0); - r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(R300_VAP_VF_MAX_VTX_INDX, 2); + r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VF_MAX_VTX_INDX, 2); ALLOC_STATE(vap_cntl_status, always, 2, 0); - r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(R300_VAP_CNTL_STATUS, 1); + r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL_STATUS, 1); ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0); r300->hw.vir[0].cmd[R300_VIR_CMD_0] = - cmdpacket0(R300_VAP_PROG_STREAM_CNTL_0, 1); + cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_0, 1); ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1); r300->hw.vir[1].cmd[R300_VIR_CMD_0] = - cmdpacket0(R300_VAP_PROG_STREAM_CNTL_EXT_0, 1); + cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_EXT_0, 1); ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0); - r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(R300_VAP_VTX_STATE_CNTL, 2); + r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VTX_STATE_CNTL, 2); ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0); - r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE); + r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE); if (has_tcl) { ALLOC_STATE(vap_clip_cntl, always, 2, 0); - r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(R300_VAP_CLIP_CNTL, 1); + r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CLIP_CNTL, 1); ALLOC_STATE(vap_clip, always, 5, 0); - r300->hw.vap_clip.cmd[0] = cmdpacket0(R300_VAP_GB_VERT_CLIP_ADJ, 4); + r300->hw.vap_clip.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_GB_VERT_CLIP_ADJ, 4); ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0); - r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(VAP_PVS_VTX_TIMEOUT_REG, 1); + r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1); } ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0); r300->hw.vof.cmd[R300_VOF_CMD_0] = - cmdpacket0(R300_VAP_OUTPUT_VTX_FMT_0, 2); + cmdpacket0(r300->radeon.radeonScreen, R300_VAP_OUTPUT_VTX_FMT_0, 2); if (has_tcl) { ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0); r300->hw.pvs.cmd[R300_PVS_CMD_0] = - cmdpacket0(R300_VAP_PVS_CODE_CNTL_0, 3); + cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_CODE_CNTL_0, 3); } ALLOC_STATE(gb_enable, always, 2, 0); - r300->hw.gb_enable.cmd[0] = cmdpacket0(R300_GB_ENABLE, 1); + r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1); ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0); - r300->hw.gb_misc.cmd[0] = cmdpacket0(R300_GB_MSPOS0, 5); + r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 3); + r300->hw.gb_misc.emit = emit_gb_misc; + ALLOC_STATE(gb_misc2, always, R300_GB_MISC2_CMDSIZE, 0); + r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2); ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0); - r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(R300_TX_ENABLE, 1); + r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1); ALLOC_STATE(ga_point_s0, always, 5, 0); - r300->hw.ga_point_s0.cmd[0] = cmdpacket0(R300_GA_POINT_S0, 4); + r300->hw.ga_point_s0.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_S0, 4); ALLOC_STATE(ga_triangle_stipple, always, 2, 0); - r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(R300_GA_TRIANGLE_STIPPLE, 1); + r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_TRIANGLE_STIPPLE, 1); ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0); - r300->hw.ps.cmd[0] = cmdpacket0(R300_GA_POINT_SIZE, 1); + r300->hw.ps.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_SIZE, 1); ALLOC_STATE(ga_point_minmax, always, 4, 0); - r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(R300_GA_POINT_MINMAX, 3); + r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_MINMAX, 3); ALLOC_STATE(lcntl, always, 2, 0); - r300->hw.lcntl.cmd[0] = cmdpacket0(R300_GA_LINE_CNTL, 1); + r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1); ALLOC_STATE(ga_line_stipple, always, 4, 0); - r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(R300_GA_LINE_STIPPLE_VALUE, 3); - ALLOC_STATE(shade, always, 5, 0); - r300->hw.shade.cmd[0] = cmdpacket0(R300_GA_ENHANCE, 4); + r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3); + ALLOC_STATE(shade, always, 2, 0); + r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 1); + r300->hw.shade.emit = emit_shade_misc; + ALLOC_STATE(shade2, always, 4, 0); + r300->hw.shade2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4278, 3); ALLOC_STATE(polygon_mode, always, 4, 0); - r300->hw.polygon_mode.cmd[0] = cmdpacket0(R300_GA_POLY_MODE, 3); + r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3); ALLOC_STATE(fogp, always, 3, 0); - r300->hw.fogp.cmd[0] = cmdpacket0(R300_GA_FOG_SCALE, 2); + r300->hw.fogp.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_FOG_SCALE, 2); ALLOC_STATE(zbias_cntl, always, 2, 0); - r300->hw.zbias_cntl.cmd[0] = cmdpacket0(R300_SU_TEX_WRAP, 1); + r300->hw.zbias_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_TEX_WRAP, 1); ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0); r300->hw.zbs.cmd[R300_ZBS_CMD_0] = - cmdpacket0(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); + cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_FRONT_SCALE, 4); ALLOC_STATE(occlusion_cntl, always, 2, 0); - r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(R300_SU_POLY_OFFSET_ENABLE, 1); + r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_ENABLE, 1); ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0); - r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(R300_SU_CULL_MODE, 1); + r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_CULL_MODE, 1); ALLOC_STATE(su_depth_scale, always, 3, 0); - r300->hw.su_depth_scale.cmd[0] = cmdpacket0(R300_SU_DEPTH_SCALE, 2); + r300->hw.su_depth_scale.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_DEPTH_SCALE, 2); ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0); - r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(R300_RS_COUNT, 2); + r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2); if (is_r500) { - ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0); - r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R500_RS_IP_0, 16); - for (i = 0; i < 8; i++) { - r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] = - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); - } + ALLOC_STATE(ri, variable, R500_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16); ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, 1); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1); } else { - ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0); - r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(R300_RS_IP_0, 8); + ALLOC_STATE(ri, variable, R300_RI_CMDSIZE, 0); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8); ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, 1); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1); } ALLOC_STATE(sc_hyperz, always, 3, 0); - r300->hw.sc_hyperz.cmd[0] = cmdpacket0(R300_SC_HYPERZ, 2); + r300->hw.sc_hyperz.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_HYPERZ, 2); ALLOC_STATE(sc_screendoor, always, 2, 0); - r300->hw.sc_screendoor.cmd[0] = cmdpacket0(R300_SC_SCREENDOOR, 1); + r300->hw.sc_screendoor.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1); ALLOC_STATE(us_out_fmt, always, 6, 0); - r300->hw.us_out_fmt.cmd[0] = cmdpacket0(R300_US_OUT_FMT, 5); + r300->hw.us_out_fmt.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_OUT_FMT, 5); if (is_r500) { ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0); - r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(R500_US_CONFIG, 2); + r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CONFIG, 2); r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO; - r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(R500_US_CODE_ADDR, 3); - r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(R500_US_FC_CTRL, 1); + r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CODE_ADDR, 3); + r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(r300->radeon.radeonScreen, R500_US_FC_CTRL, 1); r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */ ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0); - r300->hw.r500fp.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 0, 0); + r300->hw.r500fp.cmd[R300_FPI_CMD_0] = + cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0); + r300->hw.r500fp.emit = emit_r500fp; ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0); - r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = cmdr500fp(0, 0, 1, 0); + r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] = + cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0); + r300->hw.r500fp_const.emit = emit_r500fp; } else { ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0); - r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(R300_US_CONFIG, 3); - r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(R300_US_CODE_ADDR_0, 4); + r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3); + r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CODE_ADDR_0, 4); + ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0); - r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(R300_US_TEX_INST_0, 0); + r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_TEX_INST_0, 0); ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0); - r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, 1); + r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, 1); ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1); - r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, 1); + r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, 1); ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2); - r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, 1); + r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, 1); ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3); - r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, 1); + r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, 1); ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0); - r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, 0); + r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_PFS_PARAM_0_X, 0); } ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0); - r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(R300_FG_FOG_BLEND, 1); + r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_BLEND, 1); ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0); - r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(R300_FG_FOG_COLOR_R, 3); + r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_COLOR_R, 3); ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0); - r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(R300_FG_ALPHA_FUNC, 2); + r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_ALPHA_FUNC, 2); ALLOC_STATE(fg_depth_src, always, 2, 0); - r300->hw.fg_depth_src.cmd[0] = cmdpacket0(R300_FG_DEPTH_SRC, 1); + r300->hw.fg_depth_src.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_DEPTH_SRC, 1); ALLOC_STATE(rb3d_cctl, always, 2, 0); - r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(R300_RB3D_CCTL, 1); + r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CCTL, 1); ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0); - r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(R300_RB3D_CBLEND, 2); + r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CBLEND, 2); ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0); - r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(RB3D_COLOR_CHANNEL_MASK, 1); + r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1); if (is_r500) { ALLOC_STATE(blend_color, always, 3, 0); - r300->hw.blend_color.cmd[0] = cmdpacket0(R500_RB3D_CONSTANT_COLOR_AR, 2); + r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_CONSTANT_COLOR_AR, 2); } else { ALLOC_STATE(blend_color, always, 2, 0); - r300->hw.blend_color.cmd[0] = cmdpacket0(R300_RB3D_BLEND_COLOR, 1); + r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_BLEND_COLOR, 1); } ALLOC_STATE(rop, always, 2, 0); - r300->hw.rop.cmd[0] = cmdpacket0(R300_RB3D_ROPCNTL, 1); + r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1); ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0); - r300->hw.cb.cmd[R300_CB_CMD_0] = cmdpacket0(R300_RB3D_COLOROFFSET0, 1); - r300->hw.cb.cmd[R300_CB_CMD_1] = cmdpacket0(R300_RB3D_COLORPITCH0, 1); + r300->hw.cb.emit = &emit_cb_offset; ALLOC_STATE(rb3d_dither_ctl, always, 10, 0); - r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(R300_RB3D_DITHER_CTL, 9); + r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9); ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0); - r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(R300_RB3D_AARESOLVE_CTL, 1); - ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); - r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); + r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1); + ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0); + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2); + r300->hw.rb3d_discard_src_pixel_lte_threshold.emit = emit_threshold_misc; ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0); r300->hw.zs.cmd[R300_ZS_CMD_0] = - cmdpacket0(R300_ZB_CNTL, 3); + cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3); + ALLOC_STATE(zstencil_format, always, 5, 0); r300->hw.zstencil_format.cmd[0] = - cmdpacket0(R300_ZB_FORMAT, 4); + cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4); + r300->hw.zstencil_format.emit = emit_zstencil_format; + ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0); - r300->hw.zb.cmd[R300_ZB_CMD_0] = cmdpacket0(R300_ZB_DEPTHOFFSET, 2); + r300->hw.zb.emit = emit_zb_offset; ALLOC_STATE(zb_depthclearvalue, always, 2, 0); - r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(R300_ZB_DEPTHCLEARVALUE, 1); - ALLOC_STATE(unk4F30, always, 3, 0); - r300->hw.unk4F30.cmd[0] = cmdpacket0(0x4F30, 2); + r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1); + ALLOC_STATE(zb_zmask, always, 3, 0); + r300->hw.zb_zmask.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZMASK_OFFSET, 2); ALLOC_STATE(zb_hiz_offset, always, 2, 0); - r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(R300_ZB_HIZ_OFFSET, 1); + r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1); ALLOC_STATE(zb_hiz_pitch, always, 2, 0); - r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(R300_ZB_HIZ_PITCH, 1); + r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_PITCH, 1); /* VPU only on TCL */ if (has_tcl) { int i; ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0); - r300->hw.vpi.cmd[R300_VPI_CMD_0] = - cmdvpu(R300_PVS_CODE_START, 0); + r300->hw.vpi.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0); + r300->hw.vpi.emit = emit_vpu; if (is_r500) { ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); - r300->hw.vpp.cmd[R300_VPP_CMD_0] = - cmdvpu(R500_PVS_CONST_START, 0); + r300->hw.vpp.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0); + r300->hw.vpp.emit = emit_vpu; ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); - r300->hw.vps.cmd[R300_VPS_CMD_0] = - cmdvpu(R500_POINT_VPORT_SCALE_OFFSET, 1); + r300->hw.vps.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1); + r300->hw.vps.emit = emit_vpu; for (i = 0; i < 6; i++) { - ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); - r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = - cmdvpu(R500_PVS_UCP_START + i, 1); + ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); + r300->hw.vpucp[i].cmd[0] = + cmdvpu(r300->radeon.radeonScreen, + R500_PVS_UCP_START + i, 1); + r300->hw.vpucp[i].emit = emit_vpu; } } else { ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0); - r300->hw.vpp.cmd[R300_VPP_CMD_0] = - cmdvpu(R300_PVS_CONST_START, 0); + r300->hw.vpp.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0); + r300->hw.vpp.emit = emit_vpu; ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0); - r300->hw.vps.cmd[R300_VPS_CMD_0] = - cmdvpu(R300_POINT_VPORT_SCALE_OFFSET, 1); + r300->hw.vps.cmd[0] = + cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1); + r300->hw.vps.emit = emit_vpu; for (i = 0; i < 6; i++) { ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0); - r300->hw.vpucp[i].cmd[R300_VPUCP_CMD_0] = - cmdvpu(R300_PVS_UCP_START + i, 1); + r300->hw.vpucp[i].cmd[0] = + cmdvpu(r300->radeon.radeonScreen, + R300_PVS_UCP_START + i, 1); + r300->hw.vpucp[i].emit = emit_vpu; } } } @@ -556,130 +742,37 @@ void r300InitCmdBuf(r300ContextPtr r300) /* Textures */ ALLOC_STATE(tex.filter, variable, mtu + 1, 0); r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_FILTER0_0, 0); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 0); ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0); r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_FILTER1_0, 0); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, 0); ALLOC_STATE(tex.size, variable, mtu + 1, 0); - r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_SIZE_0, 0); + r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, 0); ALLOC_STATE(tex.format, variable, mtu + 1, 0); r300->hw.tex.format.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_FORMAT_0, 0); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, 0); ALLOC_STATE(tex.pitch, variable, mtu + 1, 0); - r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(R300_TX_FORMAT2_0, 0); + r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0); - ALLOC_STATE(tex.offset, variable, mtu + 1, 0); + ALLOC_STATE(tex.offset, variable, 1, 0); r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_OFFSET_0, 0); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0); + r300->hw.tex.offset.emit = &emit_tex_offsets; ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0); r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_CHROMA_KEY_0, 0); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, 0); ALLOC_STATE(tex.border_color, variable, mtu + 1, 0); r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_BORDER_COLOR_0, 0); - - r300->hw.is_dirty = GL_TRUE; - r300->hw.all_dirty = GL_TRUE; - - /* Initialize command buffer */ - size = - 256 * driQueryOptioni(&r300->radeon.optionCache, - "command_buffer_size"); - if (size < 2 * r300->hw.max_state_size) { - size = 2 * r300->hw.max_state_size + 65535; - } - if (size > 64 * 256) - size = 64 * 256; - - if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) { - fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n", - sizeof(drm_r300_cmd_header_t)); - fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n", - sizeof(drm_radeon_cmd_buffer_t)); - fprintf(stderr, - "Allocating %d bytes command buffer (max state is %d bytes)\n", - size * 4, r300->hw.max_state_size * 4); - } - - r300->cmdbuf.size = size; - r300->cmdbuf.cmd_buf = (uint32_t *) CALLOC(size * 4); - r300->cmdbuf.count_used = 0; - r300->cmdbuf.count_reemit = 0; -} - -/** - * Destroy the command buffer and state atoms. - */ -void r300DestroyCmdBuf(r300ContextPtr r300) -{ - struct r300_state_atom *atom; - - FREE(r300->cmdbuf.cmd_buf); - - foreach(atom, &r300->hw.atomlist) { - FREE(atom->cmd); - } -} - -void r300EmitBlit(r300ContextPtr rmesa, - GLuint color_fmt, - GLuint src_pitch, - GLuint src_offset, - GLuint dst_pitch, - GLuint dst_offset, - GLint srcx, GLint srcy, - GLint dstx, GLint dsty, GLuint w, GLuint h) -{ - drm_r300_cmd_header_t *cmd; - - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, - "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", - __FUNCTION__, src_pitch, src_offset, srcx, srcy, - dst_pitch, dst_offset, dstx, dsty, w, h); - - assert((src_pitch & 63) == 0); - assert((dst_pitch & 63) == 0); - assert((src_offset & 1023) == 0); - assert((dst_offset & 1023) == 0); - assert(w < (1 << 16)); - assert(h < (1 << 16)); - - cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 8, __FUNCTION__); - - cmd[0].header.cmd_type = R300_CMD_PACKET3; - cmd[0].header.pad0 = R300_CMD_PACKET3_RAW; - cmd[1].u = R300_CP_CMD_BITBLT_MULTI | (5 << 16); - cmd[2].u = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | - RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_NONE | - (color_fmt << 8) | - RADEON_GMC_SRC_DATATYPE_COLOR | - RADEON_ROP3_S | - RADEON_DP_SRC_SOURCE_MEMORY | - RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS); - - cmd[3].u = ((src_pitch / 64) << 22) | (src_offset >> 10); - cmd[4].u = ((dst_pitch / 64) << 22) | (dst_offset >> 10); - cmd[5].u = (srcx << 16) | srcy; - cmd[6].u = (dstx << 16) | dsty; /* dst */ - cmd[7].u = (w << 16) | h; -} - -void r300EmitWait(r300ContextPtr rmesa, GLuint flags) -{ - drm_r300_cmd_header_t *cmd; + cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0); - assert(!(flags & ~(R300_WAIT_2D | R300_WAIT_3D))); + r300->radeon.hw.is_dirty = GL_TRUE; + r300->radeon.hw.all_dirty = GL_TRUE; - cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); - cmd[0].u = 0; - cmd[0].wait.cmd_type = R300_CMD_WAIT; - cmd[0].wait.flags = flags; + rcommonInitCmdBuf(&r300->radeon); } diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h index a8eaa580bd..53bcc0eeb4 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h @@ -38,79 +38,14 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" -extern int r300FlushCmdBufLocked(r300ContextPtr r300, const char *caller); -extern int r300FlushCmdBuf(r300ContextPtr r300, const char *caller); - -extern void r300EmitState(r300ContextPtr r300); - extern void r300InitCmdBuf(r300ContextPtr r300); -extern void r300DestroyCmdBuf(r300ContextPtr r300); - -/** - * Make sure that enough space is available in the command buffer - * by flushing if necessary. - * - * \param dwords The number of dwords we need to be free on the command buffer - */ -static INLINE void r300EnsureCmdBufSpace(r300ContextPtr r300, - int dwords, const char *caller) -{ - assert(dwords < r300->cmdbuf.size); - - if (r300->cmdbuf.count_used + dwords > r300->cmdbuf.size) - r300FlushCmdBuf(r300, caller); -} - -/** - * Allocate the given number of dwords in the command buffer and return - * a pointer to the allocated area. - * When necessary, these functions cause a flush. r300AllocCmdBuf() also - * causes state reemission after a flush. This is necessary to ensure - * correct hardware state after an unlock. - */ -static INLINE uint32_t *r300RawAllocCmdBuf(r300ContextPtr r300, - int dwords, const char *caller) -{ - uint32_t *ptr; - - r300EnsureCmdBufSpace(r300, dwords, caller); - - ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used]; - r300->cmdbuf.count_used += dwords; - return ptr; -} - -static INLINE uint32_t *r300AllocCmdBuf(r300ContextPtr r300, - int dwords, const char *caller) -{ - uint32_t *ptr; - - r300EnsureCmdBufSpace(r300, dwords, caller); - - if (!r300->cmdbuf.count_used) { - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, - "Reemit state after flush (from %s)\n", caller); - r300EmitState(r300); - } - - ptr = &r300->cmdbuf.cmd_buf[r300->cmdbuf.count_used]; - r300->cmdbuf.count_used += dwords; - return ptr; -} +void r300_emit_scissor(GLcontext *ctx); -extern void r300EmitBlit(r300ContextPtr rmesa, - GLuint color_fmt, - GLuint src_pitch, - GLuint src_offset, - GLuint dst_pitch, - GLuint dst_offset, - GLint srcx, GLint srcy, - GLint dstx, GLint dsty, GLuint w, GLuint h); +void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom); +int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom); -extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags); -extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start); -extern void r300EmitVertexShader(r300ContextPtr rmesa); -extern void r300EmitPixelShader(r300ContextPtr rmesa); +void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom); +int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom); +int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom); #endif /* __R300_CMDBUF_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 7d6705058f..b7d75426c5 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -43,8 +43,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/matrix.h" #include "main/extensions.h" #include "main/state.h" -#include "main/texobj.h" #include "main/bufferobj.h" +#include "main/texobj.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" @@ -56,42 +56,40 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drivers/common/driverfuncs.h" -#include "radeon_ioctl.h" -#include "radeon_span.h" #include "r300_context.h" +#include "radeon_context.h" +#include "radeon_span.h" #include "r300_cmdbuf.h" #include "r300_state.h" #include "r300_ioctl.h" #include "r300_tex.h" #include "r300_emit.h" +#include "r300_render.h" #include "r300_swtcl.h" +#include "radeon_bocs_wrapper.h" -#ifdef USER_BUFFERS -#include "r300_mem.h" -#endif #include "vblank.h" #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ -/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */ -int future_hw_tcl_on = 1; -int hw_tcl_on = 1; - #define need_GL_VERSION_2_0 #define need_GL_ARB_point_parameters #define need_GL_ARB_vertex_program #define need_GL_EXT_blend_equation_separate #define need_GL_EXT_blend_func_separate #define need_GL_EXT_blend_minmax +#define need_GL_EXT_framebuffer_object #define need_GL_EXT_fog_coord #define need_GL_EXT_gpu_program_parameters #define need_GL_EXT_secondary_color #define need_GL_EXT_stencil_two_side #define need_GL_ATI_separate_stencil #define need_GL_NV_vertex_program + #include "extension_helper.h" + const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ {"GL_ARB_depth_texture", NULL}, @@ -112,6 +110,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, {"GL_EXT_blend_subtract", NULL}, + {"GL_EXT_packed_depth_stencil", NULL}, {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, @@ -125,6 +124,8 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_texture_lod_bias", NULL}, {"GL_EXT_texture_mirror_clamp", NULL}, {"GL_EXT_texture_rectangle", NULL}, + {"GL_EXT_texture_sRGB", NULL}, + {"GL_EXT_vertex_array_bgra", NULL}, {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions}, {"GL_ATI_texture_env_combine3", NULL}, {"GL_ATI_texture_mirror_once", NULL}, @@ -139,6 +140,11 @@ const struct dri_extension card_extensions[] = { }; +const struct dri_extension mm_extensions[] = { + { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, + { NULL, NULL } +}; + /** * The GL 2.0 functions are needed to make display lists work with * functions added by GL_ATI_separate_stencil. @@ -147,16 +153,8 @@ const struct dri_extension gl_20_extension[] = { {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, }; - -extern struct tnl_pipeline_stage _r300_render_stage; -extern const struct tnl_pipeline_stage _r300_tcl_stage; - static const struct tnl_pipeline_stage *r300_pipeline[] = { - /* Try and go straight to t&l - */ - &_r300_tcl_stage, - /* Catch any t&l fallbacks */ &_tnl_vertex_transform_stage, @@ -165,6 +163,7 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = { &_tnl_fog_coordinate_stage, &_tnl_texgen_stage, &_tnl_texture_transform_stage, + &_tnl_point_attenuation_stage, &_tnl_vertex_program_stage, /* Try again to go to tcl? @@ -184,6 +183,192 @@ static const struct tnl_pipeline_stage *r300_pipeline[] = { 0, }; +static void r300_get_lock(radeonContextPtr rmesa) +{ + drm_radeon_sarea_t *sarea = rmesa->sarea; + + if (sarea->ctx_owner != rmesa->dri.hwContext) { + sarea->ctx_owner = rmesa->dri.hwContext; + if (!rmesa->radeonScreen->kernel_mm) + radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom); + } +} + +static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) +{ + /* please flush pipe do all pending work */ + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_SC_SCREENDOOR, 1)); + radeon_cs_write_dword(cs, 0x0); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_SC_SCREENDOOR, 1)); + radeon_cs_write_dword(cs, 0x00FFFFFF); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_SC_HYPERZ, 1)); + radeon_cs_write_dword(cs, 0x0); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_US_CONFIG, 1)); + radeon_cs_write_dword(cs, 0x0); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_ZB_CNTL, 1)); + radeon_cs_write_dword(cs, 0x0); + radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, R300_WAIT_3D)); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_RB3D_DSTCACHE_CTLSTAT, 1)); + radeon_cs_write_dword(cs, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen, + R300_ZB_ZCACHE_CTLSTAT, 1)); + radeon_cs_write_dword(cs, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE); + radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, + R300_WAIT_3D | R300_WAIT_3D_CLEAN)); +} + +static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon) +{ + r300ContextPtr r300 = (r300ContextPtr)radeon; + BATCH_LOCALS(radeon); + + r300->vap_flush_needed = GL_TRUE; + + cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH); + END_BATCH(); + end_3d(radeon); +} + +static void r300_fallback(GLcontext *ctx, GLuint bit, GLboolean mode) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + if (mode) + r300->radeon.Fallback |= bit; + else + r300->radeon.Fallback &= ~bit; +} + +static void r300_init_vtbl(radeonContextPtr radeon) +{ + radeon->vtbl.get_lock = r300_get_lock; + radeon->vtbl.update_viewport_offset = r300UpdateViewportOffset; + radeon->vtbl.emit_cs_header = r300_vtbl_emit_cs_header; + radeon->vtbl.swtcl_flush = r300_swtcl_flush; + radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms; + radeon->vtbl.fallback = r300_fallback; +} + +static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + ctx->Const.MaxTextureImageUnits = + driQueryOptioni(&r300->radeon.optionCache, "texture_image_units"); + ctx->Const.MaxTextureCoordUnits = + driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units"); + ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureImageUnits, + ctx->Const.MaxTextureCoordUnits); + + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + ctx->Const.MaxTextureLodBias = 16.0; + + if (screen->chip_family >= CHIP_FAMILY_RV515) { + ctx->Const.MaxTextureLevels = 13; + ctx->Const.MaxCubeTextureLevels = 13; + ctx->Const.MaxTextureRectSize = 4096; + } + else { + ctx->Const.MaxTextureLevels = 12; + ctx->Const.MaxCubeTextureLevels = 12; + ctx->Const.MaxTextureRectSize = 2048; + } + + ctx->Const.MinPointSize = 1.0; + ctx->Const.MinPointSizeAA = 1.0; + ctx->Const.MaxPointSize = R300_POINTSIZE_MAX; + ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX; + + ctx->Const.MinLineWidth = 1.0; + ctx->Const.MinLineWidthAA = 1.0; + ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; + ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; + + ctx->Const.MaxDrawBuffers = 1; + + /* currently bogus data */ + if (r300->options.hw_tcl_enabled) { + ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeInstructions = + VSF_MAX_FRAGMENT_LENGTH / 4; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ + ctx->Const.VertexProgram.MaxTemps = 32; + ctx->Const.VertexProgram.MaxNativeTemps = + /*VSF_MAX_FRAGMENT_TEMPS */ 32; + ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + } + + if (screen->chip_family >= CHIP_FAMILY_RV515) { + ctx->Const.FragmentProgram.MaxNativeTemps = R500_PFS_NUM_TEMP_REGS; + ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ + ctx->Const.FragmentProgram.MaxNativeParameters = R500_PFS_NUM_CONST_REGS; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = R500_PFS_MAX_INST; + ctx->Const.FragmentProgram.MaxNativeTexInstructions = R500_PFS_MAX_INST; + ctx->Const.FragmentProgram.MaxNativeInstructions = R500_PFS_MAX_INST; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = R500_PFS_MAX_INST; + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; + } else { + ctx->Const.FragmentProgram.MaxNativeTemps = R300_PFS_NUM_TEMP_REGS; + ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ + ctx->Const.FragmentProgram.MaxNativeParameters = R300_PFS_NUM_CONST_REGS; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = R300_PFS_MAX_ALU_INST; + ctx->Const.FragmentProgram.MaxNativeTexInstructions = R300_PFS_MAX_TEX_INST; + ctx->Const.FragmentProgram.MaxNativeInstructions = R300_PFS_MAX_ALU_INST + R300_PFS_MAX_TEX_INST; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = R300_PFS_MAX_TEX_INDIRECT; + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; + } +} + +static void r300ParseOptions(r300ContextPtr r300, radeonScreenPtr screen) +{ + struct r300_options options = { 0 }; + + driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache, + screen->driScreen->myNum, "r300"); + + r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, "def_max_anisotropy"); + + options.stencil_two_side_disabled = driQueryOptionb(&r300->radeon.optionCache, "disable_stencil_two_side"); + options.s3tc_force_enabled = driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable"); + options.s3tc_force_disabled = driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc"); + + if (!(screen->chip_flags & RADEON_CHIPSET_TCL) || driQueryOptioni(&r300->radeon.optionCache, "tcl_mode") == DRI_CONF_TCL_SW) + options.hw_tcl_enabled = 0; + else + options.hw_tcl_enabled = 1; + + options.conformance_mode = !driQueryOptionb(&r300->radeon.optionCache, "disable_lowimpact_fallback"); + + r300->options = options; +} + +static void r300InitGLExtensions(GLcontext *ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + + driInitExtensions(ctx, card_extensions, GL_TRUE); + if (r300->radeon.radeonScreen->kernel_mm) + driInitExtensions(ctx, mm_extensions, GL_FALSE); + + if (r300->options.stencil_two_side_disabled) + _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); + + if (ctx->Mesa_DXTn && !r300->options.s3tc_force_enabled) { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + _mesa_enable_extension(ctx, "GL_S3_s3tc"); + } else if (r300->options.s3tc_force_disabled) { + _mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + } +} + /* Create the device specific rendering context. */ GLboolean r300CreateContext(const __GLcontextModes * glVisual, @@ -195,42 +380,25 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, struct dd_function_table functions; r300ContextPtr r300; GLcontext *ctx; - int tcl_mode, i; assert(glVisual); assert(driContextPriv); assert(screen); - /* Allocate the R300 context */ r300 = (r300ContextPtr) CALLOC(sizeof(*r300)); if (!r300) return GL_FALSE; - if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) - hw_tcl_on = future_hw_tcl_on = 0; + r300ParseOptions(r300, screen); - /* Parse configuration files. - * Do this here so that initialMaxAnisotropy is set before we create - * the default textures. - */ - driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache, - screen->driScreen->myNum, "r300"); - r300->initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, - "def_max_anisotropy"); + r300_init_vtbl(&r300->radeon); - /* Init default driver functions then plug in our R300-specific functions - * (the texture functions are especially important) - */ _mesa_init_driver_functions(&functions); r300InitIoctlFuncs(&functions); r300InitStateFuncs(&functions); r300InitTextureFuncs(&functions); r300InitShaderFuncs(&functions); -#ifdef USER_BUFFERS - r300_mem_init(r300); -#endif - if (!radeonInitContext(&r300->radeon, &functions, glVisual, driContextPriv, sharedContextPrivate)) { @@ -238,94 +406,17 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, return GL_FALSE; } - /* Init r300 context data */ - r300->dma.buf0_address = - r300->radeon.radeonScreen->buffers->list[0].address; - - (void)memset(r300->texture_heaps, 0, sizeof(r300->texture_heaps)); - make_empty_list(&r300->swapped); - - r300->nr_heaps = 1 /* screen->numTexHeaps */ ; - assert(r300->nr_heaps < RADEON_NR_TEX_HEAPS); - for (i = 0; i < r300->nr_heaps; i++) { - /* *INDENT-OFF* */ - r300->texture_heaps[i] = driCreateTextureHeap(i, r300, - screen-> - texSize[i], 12, - RADEON_NR_TEX_REGIONS, - (drmTextureRegionPtr) - r300->radeon.sarea-> - tex_list[i], - &r300->radeon.sarea-> - tex_age[i], - &r300->swapped, - sizeof - (r300TexObj), - (destroy_texture_object_t - *) - r300DestroyTexObj); - /* *INDENT-ON* */ - } - r300->texture_depth = driQueryOptioni(&r300->radeon.optionCache, - "texture_depth"); - if (r300->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) - r300->texture_depth = (screen->cpp == 4) ? - DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; - - /* Set the maximum texture size small enough that we can guarentee that - * all texture units can bind a maximal texture and have them both in - * texturable memory at once. - */ - ctx = r300->radeon.glCtx; - ctx->Const.MaxTextureImageUnits = - driQueryOptioni(&r300->radeon.optionCache, "texture_image_units"); - ctx->Const.MaxTextureCoordUnits = - driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units"); - ctx->Const.MaxTextureUnits = - MIN2(ctx->Const.MaxTextureImageUnits, - ctx->Const.MaxTextureCoordUnits); - ctx->Const.MaxTextureMaxAnisotropy = 16.0; - ctx->Const.MaxTextureLodBias = 16.0; + r300->fallback = 0; + if (r300->options.hw_tcl_enabled) + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; - if (screen->chip_family >= CHIP_FAMILY_RV515) - ctx->Const.MaxTextureLevels = 13; - else - ctx->Const.MaxTextureLevels = 12; - - driCalculateMaxTextureLevels( r300->texture_heaps, - r300->nr_heaps, - & ctx->Const, - 4, - ctx->Const.MaxTextureLevels - 1, - MIN2(ctx->Const.MaxTextureLevels, - MAX_3D_TEXTURE_LEVELS) - 1, - ctx->Const.MaxTextureLevels - 1, - ctx->Const.MaxTextureLevels - 1, - ctx->Const.MaxTextureLevels - 1, - GL_FALSE, - 2 ); - - ctx->Const.MinPointSize = 1.0; - ctx->Const.MinPointSizeAA = 1.0; - ctx->Const.MaxPointSize = R300_POINTSIZE_MAX; - ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX; - - ctx->Const.MinLineWidth = 1.0; - ctx->Const.MinLineWidthAA = 1.0; - ctx->Const.MaxLineWidth = R300_LINESIZE_MAX; - ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX; + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; -#ifdef USER_BUFFERS - /* Needs further modifications */ -#if 0 - ctx->Const.MaxArrayLockSize = - ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4); -#endif -#endif + r300InitConstValues(ctx, screen); - ctx->Const.MaxDrawBuffers = 1; + _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); @@ -336,16 +427,12 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _tnl_CreateContext(ctx); _swsetup_CreateContext(ctx); _swsetup_Wakeup(ctx); - _ae_create_context(ctx); /* Install the customized pipeline: */ _tnl_destroy_pipeline(ctx); _tnl_install_pipeline(ctx, r300_pipeline); - - /* Try and keep materials and vertices separate: - */ -/* _tnl_isolate_materials(ctx, GL_TRUE); */ + TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; /* Configure swrast and TNL to match hardware characteristics: */ @@ -354,226 +441,25 @@ GLboolean r300CreateContext(const __GLcontextModes * glVisual, _tnl_allow_pixel_fog(ctx, GL_FALSE); _tnl_allow_vertex_fog(ctx, GL_TRUE); - /* currently bogus data */ - if (screen->chip_flags & RADEON_CHIPSET_TCL) { - ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeInstructions = - VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ - ctx->Const.VertexProgram.MaxTemps = 32; - ctx->Const.VertexProgram.MaxNativeTemps = - /*VSF_MAX_FRAGMENT_TEMPS */ 32; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + if (r300->options.hw_tcl_enabled) { + r300InitDraw(ctx); + } else { + r300InitSwtcl(ctx); } - ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; - ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ - ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS; - ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST; - ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeInstructions = - PFS_MAX_ALU_INST + PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeTexIndirections = - PFS_MAX_TEX_INDIRECT; - ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ - ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; - ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - - driInitExtensions(ctx, card_extensions, GL_TRUE); - - if (driQueryOptionb - (&r300->radeon.optionCache, "disable_stencil_two_side")) - _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); - - if (r300->radeon.glCtx->Mesa_DXTn - && !driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc")) { - _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); - _mesa_enable_extension(ctx, "GL_S3_s3tc"); - } else - if (driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable")) - { - _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); - } - - r300->disable_lowimpact_fallback = - driQueryOptionb(&r300->radeon.optionCache, - "disable_lowimpact_fallback"); - - radeonInitSpanFuncs(ctx); + radeon_fbo_init(&r300->radeon); + radeonInitSpanFuncs( ctx ); r300InitCmdBuf(r300); r300InitState(r300); - if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) - r300InitSwtcl(ctx); - - TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; + r300InitShaderFunctions(r300); - tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode"); - if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) { - fprintf(stderr, "disabling 3D acceleration\n"); -#if R200_MERGED - FALLBACK(&r300->radeon, RADEON_FALLBACK_DISABLE, 1); -#endif - } - if (tcl_mode == DRI_CONF_TCL_SW || - !(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { - if (r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { - r300->radeon.radeonScreen->chip_flags &= - ~RADEON_CHIPSET_TCL; - fprintf(stderr, "Disabling HW TCL support\n"); - } - TCL_FALLBACK(r300->radeon.glCtx, - RADEON_TCL_FALLBACK_TCL_DISABLE, 1); + if (screen->chip_family == CHIP_FAMILY_RS600 || screen->chip_family == CHIP_FAMILY_RS690 || + screen->chip_family == CHIP_FAMILY_RS740) { + r300->radeon.texture_row_align = 64; } - return GL_TRUE; -} - -static void r300FreeGartAllocations(r300ContextPtr r300) -{ - int i, ret, tries = 0, done_age, in_use = 0; - drm_radeon_mem_free_t memfree; + r300InitGLExtensions(ctx); - memfree.region = RADEON_MEM_REGION_GART; - -#ifdef USER_BUFFERS - for (i = r300->rmm->u_last; i > 0; i--) { - if (r300->rmm->u_list[i].ptr == NULL) { - continue; - } - - /* check whether this buffer is still in use */ - if (r300->rmm->u_list[i].pending) { - in_use++; - } - } - /* Cannot flush/lock if no context exists. */ - if (in_use) - r300FlushCmdBuf(r300, __FUNCTION__); - - done_age = radeonGetAge((radeonContextPtr) r300); - - for (i = r300->rmm->u_last; i > 0; i--) { - if (r300->rmm->u_list[i].ptr == NULL) { - continue; - } - - /* check whether this buffer is still in use */ - if (!r300->rmm->u_list[i].pending) { - continue; - } - - assert(r300->rmm->u_list[i].h_pending == 0); - - tries = 0; - while (r300->rmm->u_list[i].age > done_age && tries++ < 1000) { - usleep(10); - done_age = radeonGetAge((radeonContextPtr) r300); - } - if (tries >= 1000) { - WARN_ONCE("Failed to idle region!"); - } - - memfree.region_offset = (char *)r300->rmm->u_list[i].ptr - - (char *)r300->radeon.radeonScreen->gartTextures.map; - - ret = drmCommandWrite(r300->radeon.radeonScreen->driScreen->fd, - DRM_RADEON_FREE, &memfree, - sizeof(memfree)); - if (ret) { - fprintf(stderr, "Failed to free at %p\nret = %s\n", - r300->rmm->u_list[i].ptr, strerror(-ret)); - } else { - if (i == r300->rmm->u_last) - r300->rmm->u_last--; - - r300->rmm->u_list[i].pending = 0; - r300->rmm->u_list[i].ptr = NULL; - } - } - r300->rmm->u_head = i; -#endif /* USER_BUFFERS */ + return GL_TRUE; } -/* Destroy the device specific context. - */ -void r300DestroyContext(__DRIcontextPrivate * driContextPriv) -{ - GET_CURRENT_CONTEXT(ctx); - r300ContextPtr r300 = (r300ContextPtr) driContextPriv->driverPrivate; - radeonContextPtr radeon = (radeonContextPtr) r300; - radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL; - int i; - - if (RADEON_DEBUG & DEBUG_DRI) { - fprintf(stderr, "Destroying context !\n"); - } - - /* check if we're deleting the currently bound context */ - if (&r300->radeon == current) { - radeonFlush(r300->radeon.glCtx); - _mesa_make_current(NULL, NULL, NULL); - } - - /* Free r300 context resources */ - assert(r300); /* should never be null */ - - if (r300) { - GLboolean release_texture_heaps; - - release_texture_heaps = - (r300->radeon.glCtx->Shared->RefCount == 1); - _swsetup_DestroyContext(r300->radeon.glCtx); - _tnl_DestroyContext(r300->radeon.glCtx); - _vbo_DestroyContext(r300->radeon.glCtx); - _swrast_DestroyContext(r300->radeon.glCtx); - - if (r300->dma.current.buf) { - r300ReleaseDmaRegion(r300, &r300->dma.current, - __FUNCTION__); -#ifndef USER_BUFFERS - r300FlushCmdBuf(r300, __FUNCTION__); -#endif - } - r300FreeGartAllocations(r300); - r300DestroyCmdBuf(r300); - - if (radeon->state.scissor.pClipRects) { - FREE(radeon->state.scissor.pClipRects); - radeon->state.scissor.pClipRects = NULL; - } - - if (release_texture_heaps) { - /* This share group is about to go away, free our private - * texture object data. - */ - int i; - - for (i = 0; i < r300->nr_heaps; i++) { - driDestroyTextureHeap(r300->texture_heaps[i]); - r300->texture_heaps[i] = NULL; - } - - assert(is_empty_list(&r300->swapped)); - } - - /* Drop texture object references from current hardware state */ - for (i = 0; i < 8; i++) { - _mesa_reference_texobj(&r300->state.texture.unit[i].texobj, NULL); - } - - radeonCleanupContext(&r300->radeon); - -#ifdef USER_BUFFERS - /* the memory manager might be accessed when Mesa frees the shared - * state, so don't destroy it earlier - */ - r300_mem_destroy(r300); -#endif - - /* free the option cache */ - driDestroyOptionCache(&r300->radeon.optionCache); - - FREE(r300); - } -} diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 96a3205f1a..026c33c67c 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -37,26 +37,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef __R300_CONTEXT_H__ #define __R300_CONTEXT_H__ -#include "tnl/t_vertex.h" #include "drm.h" #include "radeon_drm.h" #include "dri_util.h" -#include "texmem.h" +#include "radeon_common.h" -#include "main/macros.h" #include "main/mtypes.h" -#include "main/colormac.h" - -#define USER_BUFFERS +#include "shader/prog_instruction.h" struct r300_context; typedef struct r300_context r300ContextRec; typedef struct r300_context *r300ContextPtr; -#include "radeon_lock.h" -#include "main/mm.h" -/* From http://gcc.gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . +/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html . I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble with other compilers ... GLUE! */ @@ -73,180 +67,14 @@ typedef struct r300_context *r300ContextPtr; } #include "r300_vertprog.h" -#include "r500_fragprog.h" - -/** - * This function takes a float and packs it into a uint32_t - */ -static INLINE uint32_t r300PackFloat32(float fl) -{ - union { - float fl; - uint32_t u; - } u; - - u.fl = fl; - return u.u; -} - -/* This is probably wrong for some values, I need to test this - * some more. Range checking would be a good idea also.. - * - * But it works for most things. I'll fix it later if someone - * else with a better clue doesn't - */ -static INLINE uint32_t r300PackFloat24(float f) -{ - float mantissa; - int exponent; - uint32_t float24 = 0; - - if (f == 0.0) - return 0; - - mantissa = frexpf(f, &exponent); - - /* Handle -ve */ - if (mantissa < 0) { - float24 |= (1 << 23); - mantissa = mantissa * -1.0; - } - /* Handle exponent, bias of 63 */ - exponent += 62; - float24 |= (exponent << 16); - /* Kill 7 LSB of mantissa */ - float24 |= (r300PackFloat32(mantissa) & 0x7FFFFF) >> 7; - - return float24; -} - -/************ DMA BUFFERS **************/ - -/* Need refcounting on dma buffers: - */ -struct r300_dma_buffer { - int refcount; /**< the number of retained regions in buf */ - drmBufPtr buf; - int id; -}; -#undef GET_START -#ifdef USER_BUFFERS -#define GET_START(rvb) (r300GartOffsetFromVirtual(rmesa, (rvb)->address+(rvb)->start)) -#else -#define GET_START(rvb) (rmesa->radeon.radeonScreen->gart_buffer_offset + \ - (rvb)->address - rmesa->dma.buf0_address + \ - (rvb)->start) -#endif -/* A retained region, eg vertices for indexed vertices. - */ -struct r300_dma_region { - struct r300_dma_buffer *buf; - char *address; /* == buf->address */ - int start, end, ptr; /* offsets from start of buf */ - - int aos_offset; /* address in GART memory */ - int aos_stride; /* distance between elements, in dwords */ - int aos_size; /* number of components (1-4) */ -}; - -struct r300_dma { - /* Active dma region. Allocations for vertices and retained - * regions come from here. Also used for emitting random vertices, - * these may be flushed by calling flush_current(); - */ - struct r300_dma_region current; - void (*flush) (r300ContextPtr); - - char *buf0_address; /* start of buf[0], for index calcs */ - - /* Number of "in-flight" DMA buffers, i.e. the number of buffers - * for which a DISCARD command is currently queued in the command buffer. - */ - GLuint nr_released_bufs; -}; - - /* Texture related */ - -typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr; - -/* Maximum number of mipmap levels supported by any supported GPU - */ -#define R300_MAX_TEXTURE_LEVELS 13 - -/* Texture object in locally shared texture space. - */ -struct r300_tex_obj { - driTextureObject base; - - GLuint bufAddr; /* Offset to start of locally - shared texture block */ - - drm_radeon_tex_image_t image[6][R300_MAX_TEXTURE_LEVELS]; - /* Six, for the cube faces */ - - GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ - - GLuint pitch; /* this isn't sent to hardware just used in calculations */ - /* hardware register values */ - /* Note that R200 has 8 registers per texture and R300 only 7 */ - GLuint filter; - GLuint filter_1; - GLuint pitch_reg; - GLuint size; /* npot only */ - GLuint format; - GLuint offset; /* Image location in the card's address space. - All cube faces follow. */ - GLuint unknown4; - GLuint unknown5; - /* end hardware registers */ - - /* registers computed by r200 code - keep them here to - compare against what is actually written. - - to be removed later.. */ - GLuint pp_border_color; - GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ - GLuint format_x; - - GLboolean border_fallback; - - GLuint tile_bits; /* hw texture tile bits used on this texture */ -}; - -struct r300_texture_env_state { - struct gl_texture_object *texobj; - GLenum format; - GLenum envMode; -}; /* The blit width for texture uploads */ #define R300_BLIT_WIDTH_BYTES 1024 #define R300_MAX_TEXTURE_UNITS 8 -struct r300_texture_state { - struct r300_texture_env_state unit[R300_MAX_TEXTURE_UNITS]; - int tc_count; /* number of incoming texture coordinates from VAP */ -}; -/** - * A block of hardware state. - * - * When check returns non-zero, the returned number of dwords must be - * copied verbatim into the command buffer in order to update a state atom - * when it is dirty. - */ -struct r300_state_atom { - struct r300_state_atom *next, *prev; - const char *name; /* for debug */ - int cmd_size; /* maximum size in dwords */ - GLuint idx; /* index in an array (e.g. textures) */ - uint32_t *cmd; - GLboolean dirty; - - int (*check) (r300ContextPtr, struct r300_state_atom * atom); -}; #define R300_VPT_CMD_0 0 #define R300_VPT_XSCALE 1 @@ -288,9 +116,11 @@ struct r300_state_atom { #define R300_GB_MISC_MSPOS_0 1 #define R300_GB_MISC_MSPOS_1 2 #define R300_GB_MISC_TILE_CONFIG 3 -#define R300_GB_MISC_SELECT 4 -#define R300_GB_MISC_AA_CONFIG 5 -#define R300_GB_MISC_CMDSIZE 6 +#define R300_GB_MISC_CMDSIZE 4 +#define R300_GB_MISC2_CMD_0 0 +#define R300_GB_MISC2_SELECT 1 +#define R300_GB_MISC2_AA_CONFIG 2 +#define R300_GB_MISC2_CMDSIZE 3 #define R300_TXE_CMD_0 0 #define R300_TXE_ENABLE 1 @@ -463,124 +293,100 @@ struct r300_state_atom { * Cache for hardware register state. */ struct r300_hw_state { - struct r300_state_atom atomlist; - - GLboolean is_dirty; - GLboolean all_dirty; - int max_state_size; /* in dwords */ - - struct r300_state_atom vpt; /* viewport (1D98) */ - struct r300_state_atom vap_cntl; - struct r300_state_atom vap_index_offset; /* 0x208c r5xx only */ - struct r300_state_atom vof; /* VAP output format register 0x2090 */ - struct r300_state_atom vte; /* (20B0) */ - struct r300_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */ - struct r300_state_atom vap_cntl_status; - struct r300_state_atom vir[2]; /* vap input route (2150/21E0) */ - struct r300_state_atom vic; /* vap input control (2180) */ - struct r300_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */ - struct r300_state_atom vap_clip_cntl; - struct r300_state_atom vap_clip; - struct r300_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */ - struct r300_state_atom pvs; /* pvs_cntl (22D0) */ - struct r300_state_atom gb_enable; /* (4008) */ - struct r300_state_atom gb_misc; /* Multisampling position shifts ? (4010) */ - struct r300_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */ - struct r300_state_atom ga_triangle_stipple; /* (4214) */ - struct r300_state_atom ps; /* pointsize (421C) */ - struct r300_state_atom ga_point_minmax; /* (4230) */ - struct r300_state_atom lcntl; /* line control */ - struct r300_state_atom ga_line_stipple; /* (4260) */ - struct r300_state_atom shade; - struct r300_state_atom polygon_mode; - struct r300_state_atom fogp; /* fog parameters (4294) */ - struct r300_state_atom ga_soft_reset; /* (429C) */ - struct r300_state_atom zbias_cntl; - struct r300_state_atom zbs; /* zbias (42A4) */ - struct r300_state_atom occlusion_cntl; - struct r300_state_atom cul; /* cull cntl (42B8) */ - struct r300_state_atom su_depth_scale; /* (42C0) */ - struct r300_state_atom rc; /* rs control (4300) */ - struct r300_state_atom ri; /* rs interpolators (4310) */ - struct r300_state_atom rr; /* rs route (4330) */ - struct r300_state_atom sc_hyperz; /* (43A4) */ - struct r300_state_atom sc_screendoor; /* (43E8) */ - struct r300_state_atom fp; /* fragment program cntl + nodes (4600) */ - struct r300_state_atom fpt; /* texi - (4620) */ - struct r300_state_atom us_out_fmt; /* (46A4) */ - struct r300_state_atom r500fp; /* r500 fp instructions */ - struct r300_state_atom r500fp_const; /* r500 fp constants */ - struct r300_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ - struct r300_state_atom fogs; /* fog state (4BC0) */ - struct r300_state_atom fogc; /* fog color (4BC8) */ - struct r300_state_atom at; /* alpha test (4BD4) */ - struct r300_state_atom fg_depth_src; /* (4BD8) */ - struct r300_state_atom fpp; /* 0x4C00 and following */ - struct r300_state_atom rb3d_cctl; /* (4E00) */ - struct r300_state_atom bld; /* blending (4E04) */ - struct r300_state_atom cmk; /* colormask (4E0C) */ - struct r300_state_atom blend_color; /* constant blend color */ - struct r300_state_atom rop; /* ropcntl */ - struct r300_state_atom cb; /* colorbuffer (4E28) */ - struct r300_state_atom rb3d_dither_ctl; /* (4E50) */ - struct r300_state_atom rb3d_aaresolve_ctl; /* (4E88) */ - struct r300_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */ - struct r300_state_atom zs; /* zstencil control (4F00) */ - struct r300_state_atom zstencil_format; - struct r300_state_atom zb; /* z buffer (4F20) */ - struct r300_state_atom zb_depthclearvalue; /* (4F28) */ - struct r300_state_atom unk4F30; /* (4F30) */ - struct r300_state_atom zb_hiz_offset; /* (4F44) */ - struct r300_state_atom zb_hiz_pitch; /* (4F54) */ - - struct r300_state_atom vpi; /* vp instructions */ - struct r300_state_atom vpp; /* vp parameters */ - struct r300_state_atom vps; /* vertex point size (?) */ - struct r300_state_atom vpucp[6]; /* vp user clip plane - 6 */ + struct radeon_state_atom vpt; /* viewport (1D98) */ + struct radeon_state_atom vap_cntl; + struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */ + struct radeon_state_atom vof; /* VAP output format register 0x2090 */ + struct radeon_state_atom vte; /* (20B0) */ + struct radeon_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */ + struct radeon_state_atom vap_cntl_status; + struct radeon_state_atom vir[2]; /* vap input route (2150/21E0) */ + struct radeon_state_atom vic; /* vap input control (2180) */ + struct radeon_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */ + struct radeon_state_atom vap_clip_cntl; + struct radeon_state_atom vap_clip; + struct radeon_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */ + struct radeon_state_atom pvs; /* pvs_cntl (22D0) */ + struct radeon_state_atom gb_enable; /* (4008) */ + struct radeon_state_atom gb_misc; /* Multisampling position shifts ? (4010) */ + struct radeon_state_atom gb_misc2; /* Multisampling position shifts ? (4010) */ + struct radeon_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */ + struct radeon_state_atom ga_triangle_stipple; /* (4214) */ + struct radeon_state_atom ps; /* pointsize (421C) */ + struct radeon_state_atom ga_point_minmax; /* (4230) */ + struct radeon_state_atom lcntl; /* line control */ + struct radeon_state_atom ga_line_stipple; /* (4260) */ + struct radeon_state_atom shade; + struct radeon_state_atom shade2; + struct radeon_state_atom polygon_mode; + struct radeon_state_atom fogp; /* fog parameters (4294) */ + struct radeon_state_atom ga_soft_reset; /* (429C) */ + struct radeon_state_atom zbias_cntl; + struct radeon_state_atom zbs; /* zbias (42A4) */ + struct radeon_state_atom occlusion_cntl; + struct radeon_state_atom cul; /* cull cntl (42B8) */ + struct radeon_state_atom su_depth_scale; /* (42C0) */ + struct radeon_state_atom rc; /* rs control (4300) */ + struct radeon_state_atom ri; /* rs interpolators (4310) */ + struct radeon_state_atom rr; /* rs route (4330) */ + struct radeon_state_atom sc_hyperz; /* (43A4) */ + struct radeon_state_atom sc_screendoor; /* (43E8) */ + struct radeon_state_atom fp; /* fragment program cntl + nodes (4600) */ + struct radeon_state_atom fpt; /* texi - (4620) */ + struct radeon_state_atom us_out_fmt; /* (46A4) */ + struct radeon_state_atom r500fp; /* r500 fp instructions */ + struct radeon_state_atom r500fp_const; /* r500 fp constants */ + struct radeon_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */ + struct radeon_state_atom fogs; /* fog state (4BC0) */ + struct radeon_state_atom fogc; /* fog color (4BC8) */ + struct radeon_state_atom at; /* alpha test (4BD4) */ + struct radeon_state_atom fg_depth_src; /* (4BD8) */ + struct radeon_state_atom fpp; /* 0x4C00 and following */ + struct radeon_state_atom rb3d_cctl; /* (4E00) */ + struct radeon_state_atom bld; /* blending (4E04) */ + struct radeon_state_atom cmk; /* colormask (4E0C) */ + struct radeon_state_atom blend_color; /* constant blend color */ + struct radeon_state_atom rop; /* ropcntl */ + struct radeon_state_atom cb; /* colorbuffer (4E28) */ + struct radeon_state_atom rb3d_dither_ctl; /* (4E50) */ + struct radeon_state_atom rb3d_aaresolve_ctl; /* (4E88) */ + struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */ + struct radeon_state_atom zs; /* zstencil control (4F00) */ + struct radeon_state_atom zstencil_format; + struct radeon_state_atom zb; /* z buffer (4F20) */ + struct radeon_state_atom zb_depthclearvalue; /* (4F28) */ + struct radeon_state_atom zb_zmask; /* (4F30) */ + struct radeon_state_atom zb_hiz_offset; /* (4F44) */ + struct radeon_state_atom zb_hiz_pitch; /* (4F54) */ + + struct radeon_state_atom vpi; /* vp instructions */ + struct radeon_state_atom vpp; /* vp parameters */ + struct radeon_state_atom vps; /* vertex point size (?) */ + struct radeon_state_atom vpucp[6]; /* vp user clip plane - 6 */ /* 8 texture units */ /* the state is grouped by function and not by texture unit. This makes single unit updates really awkward - we are much better off updating the whole thing at once */ struct { - struct r300_state_atom filter; - struct r300_state_atom filter_1; - struct r300_state_atom size; - struct r300_state_atom format; - struct r300_state_atom pitch; - struct r300_state_atom offset; - struct r300_state_atom chroma_key; - struct r300_state_atom border_color; + struct radeon_state_atom filter; + struct radeon_state_atom filter_1; + struct radeon_state_atom size; + struct radeon_state_atom format; + struct radeon_state_atom pitch; + struct radeon_state_atom offset; + struct radeon_state_atom chroma_key; + struct radeon_state_atom border_color; } tex; - struct r300_state_atom txe; /* tex enable (4104) */ -}; + struct radeon_state_atom txe; /* tex enable (4104) */ -/** - * This structure holds the command buffer while it is being constructed. - * - * The first batch of commands in the buffer is always the state that needs - * to be re-emitted when the context is lost. This batch can be skipped - * otherwise. - */ -struct r300_cmdbuf { - int size; /* DWORDs allocated for buffer */ - uint32_t *cmd_buf; - int count_used; /* DWORDs filled so far */ - int count_reemit; /* size of re-emission batch */ + radeonTexObj *textures[R300_MAX_TEXTURE_UNITS]; }; /** * State cache */ -struct r300_depthbuffer_state { - GLfloat scale; -}; - -struct r300_stencilbuffer_state { - GLboolean hw_stencil; -}; - /* Vertex shader state */ /* Perhaps more if we store programs in vmem? */ @@ -593,73 +399,55 @@ struct r300_stencilbuffer_state { #define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) #define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) -struct r300_vertex_shader_fragment { - int length; - union { - GLuint d[VSF_MAX_FRAGMENT_LENGTH]; - float f[VSF_MAX_FRAGMENT_LENGTH]; - GLuint i[VSF_MAX_FRAGMENT_LENGTH]; - } body; -}; - -struct r300_vertex_shader_state { - struct r300_vertex_shader_fragment program; -}; - -extern int hw_tcl_on; - #define COLOR_IS_RGBA #define TAG(x) r300##x #include "tnl_dd/t_dd_vertex.h" #undef TAG -//#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current) -#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->selected_vp) - -/* Should but doesnt work */ -//#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->curr_vp) - -/* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday. - * Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly. - */ - -struct r300_vertex_program_key { - GLuint InputsRead; - GLuint OutputsWritten; - GLuint OutputsAdded; -}; - struct r300_vertex_program { struct r300_vertex_program *next; - struct r300_vertex_program_key key; - int translated; - struct r300_vertex_shader_fragment program; + struct r300_vertex_program_key { + GLuint InputsRead; + GLuint OutputsWritten; + GLuint OutputsAdded; + } key; + + struct r300_vertex_shader_hw_code { + int length; + union { + GLuint d[VSF_MAX_FRAGMENT_LENGTH]; + float f[VSF_MAX_FRAGMENT_LENGTH]; + } body; + } hw_code; + + GLboolean translated; + GLboolean error; int pos_end; int num_temporaries; /* Number of temp vars used by program */ int wpos_idx; int inputs[VERT_ATTRIB_MAX]; int outputs[VERT_RESULT_MAX]; - int native; - int ref_count; - int use_ref_count; }; struct r300_vertex_program_cont { struct gl_vertex_program mesa_program; /* Must be first */ - struct r300_vertex_shader_fragment params; struct r300_vertex_program *progs; }; -#define PFS_MAX_ALU_INST 64 -#define PFS_MAX_TEX_INST 64 -#define PFS_MAX_TEX_INDIRECT 4 -#define PFS_NUM_TEMP_REGS 32 -#define PFS_NUM_CONST_REGS 16 +#define R300_PFS_MAX_ALU_INST 64 +#define R300_PFS_MAX_TEX_INST 32 +#define R300_PFS_MAX_TEX_INDIRECT 4 +#define R300_PFS_NUM_TEMP_REGS 32 +#define R300_PFS_NUM_CONST_REGS 32 -struct r300_pfs_compile_state; +#define R500_PFS_MAX_INST 512 +#define R500_PFS_NUM_TEMP_REGS 128 +#define R500_PFS_NUM_CONST_REGS 256 +struct r300_pfs_compile_state; +struct r500_pfs_compile_state; /** * Stores state that influences the compilation of a fragment program. @@ -702,7 +490,7 @@ struct r300_fragment_program_node { struct r300_fragment_program_code { struct { int length; /**< total # of texture instructions used */ - GLuint inst[PFS_MAX_TEX_INST]; + GLuint inst[R300_PFS_MAX_TEX_INST]; } tex; struct { @@ -712,7 +500,7 @@ struct r300_fragment_program_code { GLuint inst1; GLuint inst2; GLuint inst3; - } inst[PFS_MAX_ALU_INST]; + } inst[R300_PFS_MAX_ALU_INST]; } alu; struct r300_fragment_program_node node[4]; @@ -723,53 +511,12 @@ struct r300_fragment_program_code { * Remember which program register a given hardware constant * belongs to. */ - struct prog_src_register constant[PFS_NUM_CONST_REGS]; + struct prog_src_register constant[R300_PFS_NUM_CONST_REGS]; int const_nr; int max_temp_idx; }; -/** - * Store everything about a fragment program that is needed - * to render with that program. - */ -struct r300_fragment_program { - struct gl_fragment_program mesa_program; - - GLboolean translated; - GLboolean error; - - struct r300_fragment_program_external_state state; - struct r300_fragment_program_code code; - - GLboolean WritesDepth; - GLuint optimization; -}; - -struct r500_pfs_compile_state; - -struct r500_fragment_program_external_state { - struct { - /** - * If the sampler is used as a shadow sampler, - * this field is: - * 0 - GL_LUMINANCE - * 1 - GL_INTENSITY - * 2 - GL_ALPHA - * depending on the depth texture mode. - */ - GLuint depth_texture_mode : 2; - - /** - * If the sampler is used as a shadow sampler, - * this field is (texture_compare_func - GL_NEVER). - * [e.g. if compare function is GL_LEQUAL, this field is 3] - * - * Otherwise, this field is 0. - */ - GLuint texture_compare_func : 3; - } unit[16]; -}; struct r500_fragment_program_code { struct { @@ -779,7 +526,7 @@ struct r500_fragment_program_code { GLuint inst3; GLuint inst4; GLuint inst5; - } inst[512]; + } inst[R500_PFS_MAX_INST]; int inst_offset; int inst_end; @@ -788,94 +535,46 @@ struct r500_fragment_program_code { * Remember which program register a given hardware constant * belongs to. */ - struct prog_src_register constant[PFS_NUM_CONST_REGS]; + struct prog_src_register constant[R500_PFS_NUM_CONST_REGS]; int const_nr; int max_temp_idx; }; -struct r500_fragment_program { - struct gl_fragment_program mesa_program; +/** +* Store everything about a fragment program that is needed +* to render with that program. +*/ +struct r300_fragment_program { + struct gl_fragment_program Base; - GLcontext *ctx; GLboolean translated; GLboolean error; - struct r500_fragment_program_external_state state; - struct r500_fragment_program_code code; + struct r300_fragment_program_external_state state; + union rX00_fragment_program_code { + struct r300_fragment_program_code r300; + struct r500_fragment_program_code r500; + } code; GLboolean writes_depth; - GLuint optimization; }; -#define R300_MAX_AOS_ARRAYS 16 - -#define REG_COORDS 0 -#define REG_COLOR0 1 -#define REG_TEX0 2 - -struct r300_state { - struct r300_depthbuffer_state depth; - struct r300_texture_state texture; - int sw_tcl_inputs[VERT_ATTRIB_MAX]; - struct r300_vertex_shader_state vertex_shader; - struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; - int aos_count; - - GLuint *Elts; - struct r300_dma_region elt_dma; - - struct r300_dma_region swtcl_dma; - DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for. - They are the same as tnl->render_inputs for fixed pipeline */ - - struct r300_stencilbuffer_state stencil; - +struct r300_fragment_program_compiler { + r300ContextPtr r300; + struct r300_fragment_program *fp; + union rX00_fragment_program_code *code; + struct gl_program *program; }; -#define R300_FALLBACK_NONE 0 -#define R300_FALLBACK_TCL 1 -#define R300_FALLBACK_RAST 2 +#define R300_MAX_AOS_ARRAYS 16 + /* r300_swtcl.c */ struct r300_swtcl_info { - GLuint RenderIndex; - - /** - * Size of a hardware vertex. This is calculated when \c ::vertex_attrs is - * installed in the Mesa state vector. - */ - GLuint vertex_size; - - /** - * Attributes instructing the Mesa TCL pipeline where / how to put vertex - * data in the hardware buffer. - */ - struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; - - /** - * Number of elements of \c ::vertex_attrs that are actually used. - */ - GLuint vertex_attr_count; - - /** - * Cached pointer to the buffer where Mesa will store vertex data. - */ - GLubyte *verts; - - /* Fallback rasterization functions - */ - // r200_point_func draw_point; - // r200_line_func draw_line; - // r200_tri_func draw_tri; - - GLuint hw_primitive; - GLenum render_primitive; - GLuint numverts; - - /** + /* * Offset of the 4UB color data within a hardware (swtcl) vertex. */ GLuint coloroffset; @@ -884,15 +583,44 @@ struct r300_swtcl_info { * Offset of the 3UB specular color data within a hardware (swtcl) vertex. */ GLuint specoffset; +}; - /** - * Should Mesa project vertex data or will the hardware do it? - */ - GLboolean needproj; +struct r300_vtable { + void (* SetupRSUnit)(GLcontext *ctx); + void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings); + GLboolean (* BuildFragmentProgramHwCode)(struct r300_fragment_program_compiler *compiler); + void (* FragmentProgramDump)(union rX00_fragment_program_code *code); + void (* SetupPixelShader)(GLcontext *ctx); +}; - struct r300_dma_region indexed_verts; +struct r300_vertex_buffer { + struct vertex_attribute { + /* generic */ + GLubyte element; + GLvoid *data; + GLboolean free_needed; + GLuint stride; + GLuint dwords; + GLubyte size; /* number of components */ + + /* hw specific */ + uint32_t data_type:4; + uint32_t dst_loc:5; + uint32_t _signed:1; + uint32_t normalize:1; + uint32_t swizzle:12; + uint32_t write_mask:4; + } attribs[VERT_ATTRIB_MAX]; + + GLubyte num_attribs; }; +struct r300_index_buffer { + GLvoid *ptr; + GLboolean is_32bit; + GLboolean free_needed; + GLuint count; +}; /** * \brief R300 context structure. @@ -900,46 +628,33 @@ struct r300_swtcl_info { struct r300_context { struct radeon_context radeon; /* parent class, must be first */ + struct r300_vtable vtbl; + struct r300_hw_state hw; - struct r300_cmdbuf cmdbuf; - struct r300_state state; - struct gl_vertex_program *curr_vp; + struct r300_vertex_program *selected_vp; /* Vertex buffers */ - struct r300_dma dma; - GLboolean save_on_next_unlock; - GLuint NewGLState; - - /* Texture object bookkeeping - */ - unsigned nr_heaps; - driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS]; - driTextureObject swapped; - int texture_depth; - float initialMaxAnisotropy; - - /* Clientdata textures; - */ - GLuint prefer_gart_client_texturing; - -#ifdef USER_BUFFERS - struct r300_memory_manager *rmm; -#endif - GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; - GLboolean disable_lowimpact_fallback; - - DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ + struct r300_options { + uint32_t conformance_mode:1; + uint32_t hw_tcl_enabled:1; + uint32_t s3tc_force_enabled:1; + uint32_t s3tc_force_disabled:1; + uint32_t stencil_two_side_disabled:1; + } options; + struct r300_swtcl_info swtcl; -}; + struct r300_vertex_buffer vbuf; + struct r300_index_buffer ind_buf; + GLboolean vap_flush_needed; + + uint32_t fallback; -struct r300_buffer_object { - struct gl_buffer_object mesa_obj; - int id; + DECLARE_RENDERINPUTS(render_inputs_bitset); }; #define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx)) @@ -955,9 +670,11 @@ extern int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program_cont *vp, float *dst); -#define RADEON_D_CAPTURE 0 -#define RADEON_D_PLAYBACK 1 -#define RADEON_D_PLAYBACK_RAW 2 -#define RADEON_D_T 3 +extern void r300InitShaderFunctions(r300ContextPtr r300); + +extern void r300InitDraw(GLcontext *ctx); + +#define r300PackFloat32 radeonPackFloat32 +#define r300PackFloat24 radeonPackFloat24 #endif /* __R300_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c new file mode 100644 index 0000000000..92bb0ee338 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_draw.c @@ -0,0 +1,484 @@ +/************************************************************************** + * + * Copyright 2009 Maciej Cencora + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL THE AUTHOR(S) AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "main/glheader.h" +#include "main/context.h" +#include "main/state.h" +#include "main/api_validate.h" +#include "main/enums.h" + +#include "r300_reg.h" +#include "r300_context.h" +#include "r300_emit.h" +#include "r300_render.h" +#include "r300_state.h" +#include "r300_tex.h" + +#include "tnl/tnl.h" +#include "tnl/t_vp_build.h" +#include "vbo/vbo_context.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" + +static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf, struct gl_buffer_object **bo, GLuint *nr_bo) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_index_buffer *ind_buf = &r300->ind_buf; + GLvoid *src_ptr; + + if (!mesa_ind_buf) { + ind_buf->ptr = NULL; + return; + } + + ind_buf->count = mesa_ind_buf->count; + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { + bo[*nr_bo] = mesa_ind_buf->obj; + (*nr_bo)++; + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + } + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) { + GLubyte *in = (GLubyte *)src_ptr; + GLuint *out = _mesa_malloc(sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1)); + int i; + + ind_buf->ptr = out; + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) { + *out++ = in[i]; + } + + ind_buf->free_needed = GL_TRUE; + ind_buf->is_32bit = GL_FALSE; + } else if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) { +#if MESA_BIG_ENDIAN + GLushort *in = (GLushort *)src_ptr; + GLuint *out = _mesa_malloc(sizeof(GLushort) * + ((mesa_ind_buf->count + 1) & ~1)); + int i; + + ind_buf->ptr = out; + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) { + *out++ = in[i]; + } + + ind_buf->free_needed = GL_TRUE; +#else + ind_buf->ptr = src_ptr; + ind_buf->free_needed = GL_FALSE; +#endif + ind_buf->is_32bit = GL_FALSE; + } else { + ind_buf->ptr = src_ptr; + ind_buf->free_needed = GL_FALSE; + ind_buf->is_32bit = GL_TRUE; + } +} + +static int getTypeSize(GLenum type) +{ + switch (type) { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + +#define CONVERT( TYPE, MACRO ) do { \ + GLuint i, j, sz; \ + sz = input->Size; \ + if (input->Normalized) { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = MACRO(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } else { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = (GLfloat)(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } \ +} while (0) + +static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input, struct gl_buffer_object **bo, GLuint *nr_bo) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + struct vertex_attribute r300_attr; + const void *src_ptr; + GLenum type; + GLuint stride; + + if (input->BufferObj->Name) { + if (!input->BufferObj->Pointer) { + bo[*nr_bo] = input->BufferObj; + (*nr_bo)++; + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + assert(input->BufferObj->Pointer != NULL); + } + + src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + } else + src_ptr = input->Ptr; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input->Type) != 4 || +#endif + stride < 4) { + if (RADEON_DEBUG & DEBUG_FALLBACKS) { + fprintf(stderr, "%s: Converting vertex attributes, attribute data format %x,", __FUNCTION__, input->Type); + fprintf(stderr, "stride %d, components %d\n", stride, input->Size); + } + + GLfloat *dst_ptr, *tmp; + tmp = dst_ptr = _mesa_malloc(sizeof(GLfloat) * input->Size * count); + + switch (input->Type) { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + type = GL_FLOAT; + r300_attr.free_needed = GL_TRUE; + r300_attr.data = tmp; + r300_attr.stride = sizeof(GLfloat) * input->Size; + r300_attr.dwords = input->Size; + } else { + type = input->Type; + r300_attr.free_needed = GL_FALSE; + r300_attr.data = (GLvoid *)src_ptr; + r300_attr.stride = stride; + r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4; + } + + r300_attr.size = input->Size; + r300_attr.element = attr; + r300_attr.dst_loc = vbuf->num_attribs; + + switch (type) { + case GL_FLOAT: + switch (input->Size) { + case 1: r300_attr.data_type = R300_DATA_TYPE_FLOAT_1; break; + case 2: r300_attr.data_type = R300_DATA_TYPE_FLOAT_2; break; + case 3: r300_attr.data_type = R300_DATA_TYPE_FLOAT_3; break; + case 4: r300_attr.data_type = R300_DATA_TYPE_FLOAT_4; break; + } + r300_attr._signed = 0; + r300_attr.normalize = 0; + break; + case GL_SHORT: + r300_attr._signed = 1; + r300_attr.normalize = input->Normalized; + switch (input->Size) { + case 1: + case 2: + r300_attr.data_type = R300_DATA_TYPE_SHORT_2; + break; + case 3: + case 4: + r300_attr.data_type = R300_DATA_TYPE_SHORT_4; + break; + } + break; + case GL_BYTE: + r300_attr._signed = 1; + r300_attr.normalize = input->Normalized; + r300_attr.data_type = R300_DATA_TYPE_BYTE; + break; + case GL_UNSIGNED_SHORT: + r300_attr._signed = 0; + r300_attr.normalize = input->Normalized; + switch (input->Size) { + case 1: + case 2: + r300_attr.data_type = R300_DATA_TYPE_SHORT_2; + break; + case 3: + case 4: + r300_attr.data_type = R300_DATA_TYPE_SHORT_4; + break; + } + break; + case GL_UNSIGNED_BYTE: + r300_attr._signed = 0; + r300_attr.normalize = input->Normalized; + if (input->Format == GL_BGRA) + r300_attr.data_type = R300_DATA_TYPE_D3DCOLOR; + else + r300_attr.data_type = R300_DATA_TYPE_BYTE; + break; + + default: + case GL_DOUBLE: + case GL_INT: + case GL_UNSIGNED_INT: + assert(0); + break; + } + + switch (input->Size) { + case 4: + r300_attr.swizzle = SWIZZLE_XYZW; + break; + case 3: + r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + break; + case 2: + r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE); + break; + case 1: + r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); + break; + } + + r300_attr.write_mask = MASK_XYZW; + + vbuf->attribs[vbuf->num_attribs] = r300_attr; + ++vbuf->num_attribs; +} + +static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count, struct gl_buffer_object **bo, GLuint *nr_bo) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + + { + int i, tmp; + + tmp = r300->selected_vp->key.InputsRead; + i = 0; + vbuf->num_attribs = 0; + while (tmp) { + /* find first enabled bit */ + while (!(tmp & 1)) { + tmp >>= 1; + ++i; + } + + r300TranslateAttrib(ctx, i, count, arrays[i], bo, nr_bo); + + tmp >>= 1; + ++i; + } + } + + r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS); + if (r300->fallback) + return; + + { + int i; + + for (i = 0; i < vbuf->num_attribs; i++) { + rcommon_emit_vector(ctx, &r300->radeon.tcl.aos[i], + vbuf->attribs[i].data, vbuf->attribs[i].dwords, + vbuf->attribs[i].stride, count); + } + + r300->radeon.tcl.aos_count = vbuf->num_attribs; + } +} + +static void r300FreeData(GLcontext *ctx, struct gl_buffer_object **bo, GLuint nr_bo) +{ + { + struct r300_vertex_buffer *vbuf = &R300_CONTEXT(ctx)->vbuf; + int i; + + for (i = 0; i < vbuf->num_attribs; i++) { + if (vbuf->attribs[i].free_needed) + _mesa_free(vbuf->attribs[i].data); + } + } + + { + struct r300_index_buffer *ind_buf = &R300_CONTEXT(ctx)->ind_buf; + if (ind_buf->free_needed) + _mesa_free(ind_buf->ptr); + } + + { + int i; + + for (i = 0; i < nr_bo; ++i) { + ctx->Driver.UnmapBuffer(ctx, 0, bo[i]); + } + } +} + +static GLboolean r300TryDrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) +{ + struct r300_context *r300 = R300_CONTEXT(ctx); + struct gl_buffer_object *bo[VERT_ATTRIB_MAX+1]; + GLuint i, nr_bo = 0; + + if (ctx->NewState) + _mesa_update_state( ctx ); + + if (r300->options.hw_tcl_enabled) + _tnl_UpdateFixedFunctionProgram(ctx); + + r300UpdateShaders(r300); + + r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx)); + + r300FixupIndexBuffer(ctx, ib, bo, &nr_bo); + + r300SetVertexFormat(ctx, arrays, max_index + 1, bo, &nr_bo); + + if (r300->fallback) + return GL_FALSE; + + r300SetupVAP(ctx, r300->selected_vp->key.InputsRead, r300->selected_vp->key.OutputsWritten); + + r300UpdateShaderStates(r300); + + r300EmitCacheFlush(r300); + radeonEmitState(&r300->radeon); + + for (i = 0; i < nr_prims; ++i) { + r300RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, prim[i].mode); + } + + r300EmitCacheFlush(r300); + + radeonReleaseArrays(ctx, ~0); + + r300FreeData(ctx, bo, nr_bo); + + return GL_TRUE; +} + +/* TODO: rebase if number of indices in any of primitives is > 8192 for 32bit indices or 16384 for 16bit indices */ + +static void r300DrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index) +{ + struct split_limits limits; + GLboolean retval; + + limits.max_verts = 65535; + limits.max_indices = 65535; + limits.max_vb_size = 1024*1024; + + if (min_index) { + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims ); + return; + } + if ((ib && ib->count > 65536)) { + vbo_split_prims (ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims, &limits); + return; + } + + /* Make an attempt at drawing */ + retval = r300TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + + /* If failed run tnl pipeline - it should take care of fallbacks */ + if (!retval) + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); +} + +void r300InitDraw(GLcontext *ctx) +{ + struct vbo_context *vbo = vbo_context(ctx); + + vbo->draw_prims = r300DrawPrims; +} diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c index 28c3157427..c3817721dc 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.c +++ b/src/mesa/drivers/dri/r300/r300_emit.c @@ -31,6 +31,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * \file * * \author Keith Whitwell <keith@tungstengraphics.com> + * \author Maciej Cencora <m.cencora@gmail.com> */ #include "main/glheader.h" @@ -46,222 +47,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/t_context.h" #include "r300_context.h" -#include "radeon_ioctl.h" #include "r300_state.h" #include "r300_emit.h" #include "r300_ioctl.h" - -#ifdef USER_BUFFERS -#include "r300_mem.h" -#endif - -#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \ - SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \ - SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \ - SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \ - SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \ - SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE -#error Cannot change these! -#endif - -#define DEBUG_ALL DEBUG_VERTS - -#if defined(USE_X86_ASM) -#define COPY_DWORDS( dst, src, nr ) \ -do { \ - int __tmp; \ - __asm__ __volatile__( "rep ; movsl" \ - : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ - : "0" (nr), \ - "D" ((long)dst), \ - "S" ((long)src) ); \ -} while (0) -#else -#define COPY_DWORDS( dst, src, nr ) \ -do { \ - int j; \ - for ( j = 0 ; j < nr ; j++ ) \ - dst[j] = ((int *)src)[j]; \ - dst += nr; \ -} while (0) -#endif - -static void r300EmitVec4(GLcontext * ctx, struct r300_dma_region *rvb, - GLvoid * data, int stride, int count) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d out %p data %p\n", - __FUNCTION__, count, stride, (void *)out, (void *)data); - - if (stride == 4) - COPY_DWORDS(out, data, count); - else - for (i = 0; i < count; i++) { - out[0] = *(int *)data; - out++; - data += stride; - } -} - -static void r300EmitVec8(GLcontext * ctx, struct r300_dma_region *rvb, - GLvoid * data, int stride, int count) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d out %p data %p\n", - __FUNCTION__, count, stride, (void *)out, (void *)data); - - if (stride == 8) - COPY_DWORDS(out, data, count * 2); - else - for (i = 0; i < count; i++) { - out[0] = *(int *)data; - out[1] = *(int *)(data + 4); - out += 2; - data += stride; - } -} - -static void r300EmitVec12(GLcontext * ctx, struct r300_dma_region *rvb, - GLvoid * data, int stride, int count) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d out %p data %p\n", - __FUNCTION__, count, stride, (void *)out, (void *)data); - - if (stride == 12) - COPY_DWORDS(out, data, count * 3); - else - for (i = 0; i < count; i++) { - out[0] = *(int *)data; - out[1] = *(int *)(data + 4); - out[2] = *(int *)(data + 8); - out += 3; - data += stride; - } -} - -static void r300EmitVec16(GLcontext * ctx, struct r300_dma_region *rvb, - GLvoid * data, int stride, int count) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d out %p data %p\n", - __FUNCTION__, count, stride, (void *)out, (void *)data); - - if (stride == 16) - COPY_DWORDS(out, data, count * 4); - else - for (i = 0; i < count; i++) { - out[0] = *(int *)data; - out[1] = *(int *)(data + 4); - out[2] = *(int *)(data + 8); - out[3] = *(int *)(data + 12); - out += 4; - data += stride; - } -} - -static void r300EmitVec(GLcontext * ctx, struct r300_dma_region *rvb, - GLvoid * data, int size, int stride, int count) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - if (stride == 0) { - r300AllocDmaRegion(rmesa, rvb, size * 4, 4); - count = 1; - rvb->aos_offset = GET_START(rvb); - rvb->aos_stride = 0; - } else { - r300AllocDmaRegion(rmesa, rvb, size * count * 4, 4); - rvb->aos_offset = GET_START(rvb); - rvb->aos_stride = size; - } - - switch (size) { - case 1: - r300EmitVec4(ctx, rvb, data, stride, count); - break; - case 2: - r300EmitVec8(ctx, rvb, data, stride, count); - break; - case 3: - r300EmitVec12(ctx, rvb, data, stride, count); - break; - case 4: - r300EmitVec16(ctx, rvb, data, stride, count); - break; - default: - assert(0); - break; - } -} - -#define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \ - (attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT) - -GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, - int *inputs, GLint * tab, GLuint nr) -{ - GLuint i, dw; - - /* type, inputs, stop bit, size */ - for (i = 0; i < nr; i += 2) { - /* make sure input is valid, would lockup the gpu */ - assert(inputs[tab[i]] != -1); - dw = (R300_SIGNED | DW_SIZE(i)); - if (i + 1 == nr) { - dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; - } else { - assert(inputs[tab[i + 1]] != -1); - dw |= (R300_SIGNED | - DW_SIZE(i + 1)) << R300_DATA_TYPE_1_SHIFT; - if (i + 2 == nr) { - dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; - } - } - dst[i >> 1] = dw; - } - - return (nr + 1) >> 1; -} - -static GLuint r300VAPInputRoute1Swizzle(int swizzle[4]) -{ - return (swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) | - (swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) | - (swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) | - (swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT); -} - -GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr) -{ - GLuint i, dw; - - for (i = 0; i < nr; i += 2) { - dw = (r300VAPInputRoute1Swizzle(swizzle[i]) | - ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | - R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT; - if (i + 1 < nr) { - dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) | - ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | - R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT; - } - dst[i >> 1] = dw; - } - - return (nr + 1) >> 1; -} +#include "r300_render.h" +#include "r300_swtcl.h" GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead) { @@ -272,7 +62,6 @@ GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead) GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); GLuint i, vic_1 = 0; if (InputsRead & (1 << VERT_ATTRIB_POS)) @@ -284,281 +73,112 @@ GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead) if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) vic_1 |= R300_INPUT_CNTL_COLOR; - rmesa->state.texture.tc_count = 0; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) { - rmesa->state.texture.tc_count++; vic_1 |= R300_INPUT_CNTL_TC0 << i; } return vic_1; } -GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten) +GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads) { GLuint ret = 0; - if (OutputsWritten & (1 << VERT_RESULT_HPOS)) + if (vp_writes & (1 << VERT_RESULT_HPOS)) ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; - if (OutputsWritten & (1 << VERT_RESULT_COL0)) + if (vp_writes & (1 << VERT_RESULT_COL0) && fp_reads & FRAG_BIT_COL0) ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT; - if (OutputsWritten & (1 << VERT_RESULT_COL1)) + if (vp_writes & (1 << VERT_RESULT_COL1) && fp_reads & FRAG_BIT_COL1) ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT; - if (OutputsWritten & (1 << VERT_RESULT_BFC0) - || OutputsWritten & (1 << VERT_RESULT_BFC1)) - ret |= - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT | - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; + /* Two sided lighting works only if all 4 colors are written */ + if (vp_writes & (1 << VERT_RESULT_BFC0) || vp_writes & (1 << VERT_RESULT_BFC1)) + ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT; - if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) + if (vp_writes & (1 << VERT_RESULT_PSIZ)) ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; return ret; } -GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten) +GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads) { GLuint i, ret = 0, first_free_texcoord = 0; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) { - ret |= (4 << (3 * i)); + if (vp_writes & (1 << (VERT_RESULT_TEX0 + i)) && fp_reads & FRAG_BIT_TEX(i)) { + ret |= (4 << (3 * first_free_texcoord)); ++first_free_texcoord; } } - if (OutputsWritten & (1 << VERT_RESULT_FOGC)) { - if (first_free_texcoord > 8) { - fprintf(stderr, "\tout of free texcoords to write fog coord\n"); - _mesa_exit(-1); - } - ret |= 4 << (3 * first_free_texcoord); - } - - return ret; -} - -/* Emit vertex data to GART memory - * Route inputs to the vertex processor - * This function should never return R300_FALLBACK_TCL when using software tcl. - */ -int r300EmitArrays(GLcontext * ctx) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - GLuint nr; - GLuint count = vb->Count; - GLuint i; - GLuint InputsRead = 0, OutputsWritten = 0; - int *inputs = NULL; - int vir_inputs[VERT_ATTRIB_MAX]; - GLint tab[VERT_ATTRIB_MAX]; - int swizzle[VERT_ATTRIB_MAX][4]; - struct r300_vertex_program *prog = - (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); - - if (hw_tcl_on) { - inputs = prog->inputs; - InputsRead = prog->key.InputsRead; - OutputsWritten = prog->key.OutputsWritten; - } else { - inputs = rmesa->state.sw_tcl_inputs; - - DECLARE_RENDERINPUTS(render_inputs_bitset); - RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); - - vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr; - - assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)); - assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0); - //assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)); - - if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) { - InputsRead |= 1 << VERT_ATTRIB_POS; - OutputsWritten |= 1 << VERT_RESULT_HPOS; - } - - if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) { - InputsRead |= 1 << VERT_ATTRIB_COLOR0; - OutputsWritten |= 1 << VERT_RESULT_COL0; - } - - if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) { - InputsRead |= 1 << VERT_ATTRIB_COLOR1; - OutputsWritten |= 1 << VERT_RESULT_COL1; - } - - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) { - InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); - OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); - } - } - - for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { - if (InputsRead & (1 << i)) { - inputs[i] = nr++; - } else { - inputs[i] = -1; - } - } - - /* Fixed, apply to vir0 only */ - memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int)); - inputs = vir_inputs; - if (InputsRead & VERT_ATTRIB_POS) - inputs[VERT_ATTRIB_POS] = 0; - if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) - inputs[VERT_ATTRIB_COLOR0] = 2; - if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) - inputs[VERT_ATTRIB_COLOR1] = 3; - for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) - if (InputsRead & (1 << i)) - inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); - - RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); + if (fp_reads & FRAG_BIT_WPOS) { + ret |= (4 << (3 * first_free_texcoord)); + ++first_free_texcoord; } - assert(InputsRead); - assert(OutputsWritten); - - for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { - if (InputsRead & (1 << i)) { - tab[nr++] = i; - } + if (vp_writes & (1 << VERT_RESULT_FOGC) && fp_reads & FRAG_BIT_FOGC) { + ret |= 4 << (3 * first_free_texcoord); } - if (nr > R300_MAX_AOS_ARRAYS) { - return R300_FALLBACK_TCL; + if (first_free_texcoord > 8) { + fprintf(stderr, "\tout of free texcoords\n"); + _mesa_exit(-1); } - for (i = 0; i < nr; i++) { - int ci, fix, found = 0; - - swizzle[i][0] = SWIZZLE_ZERO; - swizzle[i][1] = SWIZZLE_ZERO; - swizzle[i][2] = SWIZZLE_ZERO; - swizzle[i][3] = SWIZZLE_ONE; + return ret; +} - for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { - swizzle[i][ci] = ci; - } +GLboolean r300EmitArrays(GLcontext * ctx) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_vertex_buffer *vbuf = &r300->vbuf; + GLuint InputsRead, OutputsWritten; - if (r300IsGartMemory(rmesa, vb->AttribPtr[tab[i]]->data, 4)) { - if (vb->AttribPtr[tab[i]]->stride % 4) { - return R300_FALLBACK_TCL; - } - rmesa->state.aos[i].address = (void *)(vb->AttribPtr[tab[i]]->data); - rmesa->state.aos[i].start = 0; - rmesa->state.aos[i].aos_offset = r300GartOffsetFromVirtual(rmesa, vb->AttribPtr[tab[i]]->data); - rmesa->state.aos[i].aos_stride = vb->AttribPtr[tab[i]]->stride / 4; - rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; - } else { - r300EmitVec(ctx, &rmesa->state.aos[i], - vb->AttribPtr[tab[i]]->data, - vb->AttribPtr[tab[i]]->size, - vb->AttribPtr[tab[i]]->stride, count); - } + r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten); - rmesa->state.aos[i].aos_size = vb->AttribPtr[tab[i]]->size; + r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS); + if (r300->fallback & R300_RASTER_FALLBACK_MASK) + return GL_FALSE; - for (fix = 0; fix <= 4 - vb->AttribPtr[tab[i]]->size; fix++) { - if ((rmesa->state.aos[i].aos_offset - _mesa_sizeof_type(GL_FLOAT) * fix) % 4) { - continue; - } - found = 1; - break; - } + { + struct vertex_buffer *mesa_vb = &TNL_CONTEXT(ctx)->vb; + GLuint attr, i; - if (found) { - if (fix > 0) { - WARN_ONCE("Feeling lucky?\n"); - } - rmesa->state.aos[i].aos_offset -= _mesa_sizeof_type(GL_FLOAT) * fix; - for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) { - swizzle[i][ci] += fix; - } - } else { - WARN_ONCE - ("Cannot handle offset %x with stride %d, comp %d\n", - rmesa->state.aos[i].aos_offset, - rmesa->state.aos[i].aos_stride, - vb->AttribPtr[tab[i]]->size); - return R300_FALLBACK_TCL; + for (i = 0; i < vbuf->num_attribs; i++) { + attr = vbuf->attribs[i].element; + rcommon_emit_vector(ctx, &r300->radeon.tcl.aos[i], mesa_vb->AttribPtr[attr]->data, + mesa_vb->AttribPtr[attr]->size, mesa_vb->AttribPtr[attr]->stride, mesa_vb->Count); } - } - /* Setup INPUT_ROUTE. */ - R300_STATECHANGE(rmesa, vir[0]); - ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = - r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], - vb->AttribPtr, inputs, tab, nr); - R300_STATECHANGE(rmesa, vir[1]); - ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = - r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, - nr); - - /* Setup INPUT_CNTL. */ - R300_STATECHANGE(rmesa, vic); - rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); - rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); - - /* Setup OUTPUT_VTX_FMT. */ - R300_STATECHANGE(rmesa, vof); - rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = - r300VAPOutputCntl0(ctx, OutputsWritten); - rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = - r300VAPOutputCntl1(ctx, OutputsWritten); - - rmesa->state.aos_count = nr; - - return R300_FALLBACK_NONE; -} + r300->radeon.tcl.aos_count = vbuf->num_attribs; -#ifdef USER_BUFFERS -void r300UseArrays(GLcontext * ctx) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - int i; - - if (rmesa->state.elt_dma.buf) - r300_mem_use(rmesa, rmesa->state.elt_dma.buf->id); - - for (i = 0; i < rmesa->state.aos_count; i++) { - if (rmesa->state.aos[i].buf) - r300_mem_use(rmesa, rmesa->state.aos[i].buf->id); + /* Fill index buffer info */ + r300->ind_buf.ptr = mesa_vb->Elts; + r300->ind_buf.is_32bit = GL_TRUE; + r300->ind_buf.free_needed = GL_FALSE; } -} -#endif -void r300ReleaseArrays(GLcontext * ctx) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - int i; + r300SetupVAP(ctx, InputsRead, OutputsWritten); - r300ReleaseDmaRegion(rmesa, &rmesa->state.elt_dma, __FUNCTION__); - for (i = 0; i < rmesa->state.aos_count; i++) { - r300ReleaseDmaRegion(rmesa, &rmesa->state.aos[i], __FUNCTION__); - } + return GL_TRUE; } void r300EmitCacheFlush(r300ContextPtr rmesa) { - int cmd_reserved = 0; - int cmd_written = 0; - - drm_radeon_cmd_header_t *cmd = NULL; - - reg_start(R300_RB3D_DSTCACHE_CTLSTAT, 0); - e32(R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); - - reg_start(R300_ZB_ZCACHE_CTLSTAT, 0); - e32(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + BATCH_LOCALS(&rmesa->radeon); + + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + END_BATCH(); + COMMIT_BATCH(); } diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h index 89d738339f..2fb8b82d3a 100644 --- a/src/mesa/drivers/dri/r300/r300_emit.h +++ b/src/mesa/drivers/dri/r300/r300_emit.h @@ -44,28 +44,31 @@ #include "r300_cmdbuf.h" #include "radeon_reg.h" -/* TODO: move these defines (and the ones from DRM) into r300_reg.h and sync up - * with DRM */ -#define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2)) -#define CP_PACKET3( pkt, n ) \ - (RADEON_CP_PACKET3 | (pkt) | ((n) << 16)) - -static INLINE uint32_t cmdpacket0(int reg, int count) +static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn, + int reg, int count) { - drm_r300_cmd_header_t cmd; - - cmd.packet0.cmd_type = R300_CMD_PACKET0; - cmd.packet0.count = count; - cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8; - cmd.packet0.reglo = ((unsigned int)reg & 0x00FF); - - return cmd.u; + if (!rscrn->kernel_mm) { + drm_r300_cmd_header_t cmd; + + cmd.u = 0; + cmd.packet0.cmd_type = R300_CMD_PACKET0; + cmd.packet0.count = count; + cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8; + cmd.packet0.reglo = ((unsigned int)reg & 0x00FF); + + return cmd.u; + } + if (count) { + return CP_PACKET0(reg, count - 1); + } + return CP_PACKET2; } -static INLINE uint32_t cmdvpu(int addr, int count) +static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count) { drm_r300_cmd_header_t cmd; + cmd.u = 0; cmd.vpu.cmd_type = R300_CMD_VPU; cmd.vpu.count = count; cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8; @@ -74,10 +77,12 @@ static INLINE uint32_t cmdvpu(int addr, int count) return cmd.u; } -static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp) +static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn, + int addr, int count, int type, int clamp) { drm_r300_cmd_header_t cmd; + cmd.u = 0; cmd.r500fp.cmd_type = R300_CMD_R500FP; cmd.r500fp.count = count; cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8; @@ -88,181 +93,139 @@ static INLINE uint32_t cmdr500fp(int addr, int count, int type, int clamp) return cmd.u; } -static INLINE uint32_t cmdpacket3(int packet) +static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet) { drm_r300_cmd_header_t cmd; + cmd.u = 0; cmd.packet3.cmd_type = R300_CMD_PACKET3; cmd.packet3.packet = packet; return cmd.u; } -static INLINE uint32_t cmdcpdelay(unsigned short count) +static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn, + unsigned short count) { drm_r300_cmd_header_t cmd; + cmd.u = 0; + cmd.delay.cmd_type = R300_CMD_CP_DELAY; cmd.delay.count = count; return cmd.u; } -static INLINE uint32_t cmdwait(unsigned char flags) +static INLINE uint32_t cmdwait(struct radeon_screen *rscrn, + unsigned char flags) { drm_r300_cmd_header_t cmd; + cmd.u = 0; cmd.wait.cmd_type = R300_CMD_WAIT; cmd.wait.flags = flags; return cmd.u; } -static INLINE uint32_t cmdpacify(void) +static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn) { drm_r300_cmd_header_t cmd; + cmd.u = 0; cmd.header.cmd_type = R300_CMD_END3D; return cmd.u; } /** - * Prepare to write a register value to register at address reg. - * If num_extra > 0 then the following extra values are written - * to registers with address +4, +8 and so on.. - */ -#define reg_start(reg, num_extra) \ - do { \ - int _n; \ - _n=(num_extra); \ - cmd = (drm_radeon_cmd_header_t*) \ - r300AllocCmdBuf(rmesa, \ - (_n+2), \ - __FUNCTION__); \ - cmd_reserved=_n+2; \ - cmd_written=1; \ - cmd[0].i=cmdpacket0((reg), _n+1); \ - } while (0); - -/** - * Emit GLuint freestyle + * Write the header of a packet3 to the command buffer. + * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards. */ -#define e32(dword) \ - do { \ - if(cmd_written<cmd_reserved) { \ - cmd[cmd_written].i=(dword); \ - cmd_written++; \ - } else { \ - fprintf(stderr, \ - "e32 but no previous packet " \ - "declaration.\n" \ - "Aborting! in %s::%s at line %d, " \ - "cmd_written=%d cmd_reserved=%d\n", \ - __FILE__, __FUNCTION__, __LINE__, \ - cmd_written, cmd_reserved); \ - _mesa_exit(-1); \ - } \ +#define OUT_BATCH_PACKET3(packet, num_extra) do {\ + if (!b_l_rmesa->radeonScreen->kernel_mm) { \ + OUT_BATCH(cmdpacket3(b_l_rmesa->radeonScreen,\ + R300_CMD_PACKET3_RAW)); \ + } else b_l_rmesa->cmdbuf.cs->section_cdw++;\ + OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ } while(0) -#define efloat(f) e32(r300PackFloat32(f)) - -#define vsf_start_fragment(dest, length) \ - do { \ - int _n; \ - _n = (length); \ - cmd = (drm_radeon_cmd_header_t*) \ - r300AllocCmdBuf(rmesa, \ - (_n+1), \ - __FUNCTION__); \ - cmd_reserved = _n+2; \ - cmd_written =1; \ - cmd[0].i = cmdvpu((dest), _n/4); \ - } while (0); - -#define r500fp_start_fragment(dest, length) \ - do { \ - int _n; \ - _n = (length); \ - cmd = (drm_radeon_cmd_header_t*) \ - r300AllocCmdBuf(rmesa, \ - (_n+1), \ - __FUNCTION__); \ - cmd_reserved = _n+1; \ - cmd_written =1; \ - cmd[0].i = cmdr500fp((dest), _n/6, 0, 0); \ - } while (0); - -#define start_packet3(packet, count) \ - { \ - int _n; \ - GLuint _p; \ - _n = (count); \ - _p = (packet); \ - cmd = (drm_radeon_cmd_header_t*) \ - r300AllocCmdBuf(rmesa, \ - (_n+3), \ - __FUNCTION__); \ - cmd_reserved = _n+3; \ - cmd_written = 2; \ - if(_n > 0x3fff) { \ - fprintf(stderr,"Too big packet3 %08x: cannot " \ - "store %d dwords\n", \ - _p, _n); \ - _mesa_exit(-1); \ - } \ - cmd[0].i = cmdpacket3(R300_CMD_PACKET3_RAW); \ - cmd[1].i = _p | ((_n & 0x3fff)<<16); \ - } - /** * Must be sent to switch to 2d commands */ -void static INLINE end_3d(r300ContextPtr rmesa) +void static INLINE end_3d(radeonContextPtr radeon) { - drm_radeon_cmd_header_t *cmd = NULL; + BATCH_LOCALS(radeon); - cmd = - (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); - cmd[0].header.cmd_type = R300_CMD_END3D; + if (!radeon->radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(1); + OUT_BATCH(cmdpacify(radeon->radeonScreen)); + END_BATCH(); + } } void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count) { - drm_radeon_cmd_header_t *cmd = NULL; + BATCH_LOCALS(&rmesa->radeon); - cmd = - (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); - cmd[0].i = cmdcpdelay(count); + if (!rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(1); + OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count)); + END_BATCH(); + } } -void static INLINE cp_wait(r300ContextPtr rmesa, unsigned char flags) +void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags) { - drm_radeon_cmd_header_t *cmd = NULL; - - cmd = - (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, 1, __FUNCTION__); - cmd[0].i = cmdwait(flags); + BATCH_LOCALS(radeon); + uint32_t wait_until; + + if (!radeon->radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(1); + OUT_BATCH(cmdwait(radeon->radeonScreen, flags)); + END_BATCH(); + } else { + switch(flags) { + case R300_WAIT_2D: + wait_until = (1 << 14); + break; + case R300_WAIT_3D: + wait_until = (1 << 15); + break; + case R300_NEW_WAIT_2D_3D: + wait_until = (1 << 14) | (1 << 15); + break; + case R300_NEW_WAIT_2D_2D_CLEAN: + wait_until = (1 << 14) | (1 << 16) | (1 << 18); + break; + case R300_NEW_WAIT_3D_3D_CLEAN: + wait_until = (1 << 15) | (1 << 17) | (1 << 18); + break; + case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN: + wait_until = (1 << 14) | (1 << 16) | (1 << 18); + wait_until |= (1 << 15) | (1 << 17) | (1 << 18); + break; + default: + return; + } + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0)); + OUT_BATCH(wait_until); + END_BATCH(); + } } -extern int r300EmitArrays(GLcontext * ctx); - -#ifdef USER_BUFFERS -void r300UseArrays(GLcontext * ctx); -#endif +extern GLboolean r300EmitArrays(GLcontext * ctx); -extern void r300ReleaseArrays(GLcontext * ctx); extern int r300PrimitiveType(r300ContextPtr rmesa, int prim); extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim); extern void r300EmitCacheFlush(r300ContextPtr rmesa); -extern GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr, - int *inputs, GLint * tab, GLuint nr); -extern GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr); extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead); extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead); -extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten); -extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten); +extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads); +extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes, GLuint fp_reads); #endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 873cde4414..55c1cfe631 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -25,32 +25,12 @@ * */ -/** - * \file - * - * Fragment program compiler. Perform transformations on the intermediate - * representation until the program is in a form where we can translate - * it more or less directly into machine-readable form. - * - * \author Ben Skeggs <darktama@iinet.net.au> - * \author Jerome Glisse <j.glisse@gmail.com> - */ +#include "r300_fragprog.h" -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/prog_instruction.h" #include "shader/prog_parameter.h" -#include "shader/prog_print.h" #include "r300_context.h" -#include "r300_fragprog.h" #include "r300_fragprog_swizzle.h" -#include "r300_state.h" - -#include "radeon_nqssadce.h" -#include "radeon_program_alu.h" - static void reset_srcreg(struct prog_src_register* reg) { @@ -81,7 +61,7 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t * \todo If/when r5xx uses the radeon_program architecture, this can probably * be reused. */ -static GLboolean transform_TEX( +GLboolean r300_transform_TEX( struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data) { @@ -160,6 +140,8 @@ static GLboolean transform_TEX( inst.DstReg.Index = tempreg; inst.DstReg.WriteMask = WRITEMASK_XYZW; destredirect = GL_TRUE; + } else if (inst.SaturateMode) { + destredirect = GL_TRUE; } } @@ -175,7 +157,7 @@ static GLboolean transform_TEX( inst.SrcReg[0].File = PROGRAM_TEMPORARY; inst.SrcReg[0].Index = tmpreg; } - + tgt = radeonAppendInstructions(t->Program, 1); _mesa_copy_instructions(tgt, &inst, 1); @@ -239,6 +221,7 @@ static GLboolean transform_TEX( tgt->Opcode = OPCODE_MOV; tgt->DstReg = orig_inst->DstReg; + tgt->SaturateMode = inst.SaturateMode; tgt->SrcReg[0].File = PROGRAM_TEMPORARY; tgt->SrcReg[0].Index = inst.DstReg.Index; } @@ -246,241 +229,10 @@ static GLboolean transform_TEX( return GL_TRUE; } - -static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp) -{ - struct gl_fragment_program *mp = &fp->mesa_program; - - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (mp->Base.Parameters) - _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); -} - - -/** - * Transform the program to support fragment.position. - * - * Introduce a small fragment at the start of the program that will be - * the only code that directly reads the FRAG_ATTRIB_WPOS input. - * All other code pieces that reference that input will be rewritten - * to read from a newly allocated temporary. - * - * \todo if/when r5xx supports the radeon_program architecture, this is a - * likely candidate for code sharing. - */ -static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) -{ - GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; - - if (!(InputsRead & FRAG_BIT_WPOS)) - return; - - static gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 - }; - struct prog_instruction *fpi; - GLuint window_index; - int i = 0; - GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); - - _mesa_insert_instructions(compiler->program, 0, 3); - fpi = compiler->program->Instructions; - - /* perspective divide */ - fpi[i].Opcode = OPCODE_RCP; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_W; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; - i++; - - fpi[i].Opcode = OPCODE_MUL; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[1].Index = tempregi; - fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); - - fpi[i].Opcode = OPCODE_MAD; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[0].Index = tempregi; - fpi[i].SrcReg[0].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[1].Index = window_index; - fpi[i].SrcReg[1].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[2].Index = window_index; - fpi[i].SrcReg[2].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - for (; i < compiler->program->NumInstructions; ++i) { - int reg; - for (reg = 0; reg < 3; reg++) { - if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && - fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { - fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[reg].Index = tempregi; - } - } - } -} - - -static void nqssadce_init(struct nqssadce_state* s) -{ - s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; -} - - -static GLuint build_dtm(GLuint depthmode) -{ - switch(depthmode) { - default: - case GL_LUMINANCE: return 0; - case GL_INTENSITY: return 1; - case GL_ALPHA: return 2; - } -} - -static GLuint build_func(GLuint comparefunc) -{ - return comparefunc - GL_NEVER; -} - - -/** - * Collect all external state that is relevant for compiling the given - * fragment program. - */ -static void build_state( - r300ContextPtr r300, - struct r300_fragment_program *fp, - struct r300_fragment_program_external_state *state) -{ - int unit; - - _mesa_bzero(state, sizeof(*state)); - - for(unit = 0; unit < 16; ++unit) { - if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { - struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; - - state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); - state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); - } - } -} - - -void r300TranslateFragmentShader(r300ContextPtr r300, - struct r300_fragment_program *fp) -{ - struct r300_fragment_program_external_state state; - - build_state(r300, fp, &state); - if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { - /* TODO: cache compiled programs */ - fp->translated = GL_FALSE; - _mesa_memcpy(&fp->state, &state, sizeof(state)); - } - - if (!fp->translated) { - struct r300_fragment_program_compiler compiler; - - compiler.r300 = r300; - compiler.fp = fp; - compiler.code = &fp->code; - compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Fragment Program: Initial program:\n"); - _mesa_print_program(compiler.program); - } - - insert_WPOS_trailer(&compiler); - - struct radeon_program_transformation transformations[] = { - { &transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformTrigSimple, 0 } - }; - radeonLocalTransform( - r300->radeon.glCtx, - compiler.program, - 3, transformations); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Fragment Program: After native rewrite:\n"); - _mesa_print_program(compiler.program); - } - - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &r300FPIsNativeSwizzle, - .BuildSwizzle = &r300FPBuildSwizzle, - .RewriteDepthOut = GL_TRUE - }; - radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); - _mesa_print_program(compiler.program); - } - - if (!r300FragmentProgramEmit(&compiler)) - fp->error = GL_TRUE; - - /* Subtle: Rescue any parameters that have been added during transformations */ - _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); - fp->mesa_program.Base.Parameters = compiler.program->Parameters; - compiler.program->Parameters = 0; - - _mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL); - - if (!fp->error) - fp->translated = GL_TRUE; - if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) - r300FragmentProgramDump(fp, &fp->code); - r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); - } - - update_params(r300, fp); -} - /* just some random things... */ -void r300FragmentProgramDump( - struct r300_fragment_program *fp, - struct r300_fragment_program_code *code) +void r300FragmentProgramDump(union rX00_fragment_program_code *c) { + struct r300_fragment_program_code *code = &c->r300; int n, i, j; static int pc = 0; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 94fb554fb3..5ce6f33cee 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -33,9 +33,6 @@ #ifndef __R300_FRAGPROG_H_ #define __R300_FRAGPROG_H_ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" #include "shader/program.h" #include "shader/prog_instruction.h" @@ -105,28 +102,10 @@ #endif -struct r300_fragment_program; +extern GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); -extern void r300TranslateFragmentShader(r300ContextPtr r300, - struct r300_fragment_program *fp); +extern void r300FragmentProgramDump(union rX00_fragment_program_code *c); - -/** - * Used internally by the r300 fragment program code to store compile-time - * only data. - */ -struct r300_fragment_program_compiler { - r300ContextPtr r300; - struct r300_fragment_program *fp; - struct r300_fragment_program_code *code; - struct gl_program *program; -}; - -extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); - - -extern void r300FragmentProgramDump( - struct r300_fragment_program *fp, - struct r300_fragment_program_code *code); +extern GLboolean r300_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); #endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c new file mode 100644 index 0000000000..abc8757ba1 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -0,0 +1,291 @@ +/* + * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Fragment program compiler. Perform transformations on the intermediate + * representation until the program is in a form where we can translate + * it more or less directly into machine-readable form. + * + * \author Ben Skeggs <darktama@iinet.net.au> + * \author Jerome Glisse <j.glisse@gmail.com> + */ + +#include "r300_fragprog_common.h" + +#include "shader/program.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "r300_state.h" +#include "r300_fragprog.h" +#include "r300_fragprog_swizzle.h" +#include "r500_fragprog.h" + +#include "radeon_program.h" +#include "radeon_program_alu.h" + +static void update_params(GLcontext *ctx, struct gl_fragment_program *fp) +{ + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (fp->Base.Parameters) + _mesa_load_state_parameters(ctx, fp->Base.Parameters); +} + +static void nqssadce_init(struct nqssadce_state* s) +{ + s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; + s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; +} + +/** + * Transform the program to support fragment.position. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the FRAG_ATTRIB_WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. + * + */ +static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) +{ + GLuint InputsRead = compiler->fp->Base.Base.InputsRead; + + if (!(InputsRead & FRAG_BIT_WPOS)) + return; + + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); + + _mesa_insert_instructions(compiler->program, 0, 3); + fpi = compiler->program->Instructions; + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + for (; i < compiler->program->NumInstructions; ++i) { + int reg; + for (reg = 0; reg < 3; reg++) { + if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && + fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { + fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[reg].Index = tempregi; + } + } + } +} + +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; + } +} + +static GLuint build_func(GLuint comparefunc) +{ + return comparefunc - GL_NEVER; +} + +/** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct r300_fragment_program *fp, + struct r300_fragment_program_external_state *state) +{ + int unit; + + _mesa_bzero(state, sizeof(*state)); + + for(unit = 0; unit < 16; ++unit) { + if (fp->Base.Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; + + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); + } + } +} + +void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp) +{ + r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)fp; + struct r300_fragment_program_external_state state; + + build_state(r300, r300_fp, &state); + if (_mesa_memcmp(&r300_fp->state, &state, sizeof(state))) { + /* TODO: cache compiled programs */ + r300_fp->translated = GL_FALSE; + _mesa_memcpy(&r300_fp->state, &state, sizeof(state)); + } + + if (!r300_fp->translated) { + struct r300_fragment_program_compiler compiler; + + compiler.r300 = r300; + compiler.fp = r300_fp; + compiler.code = &r300_fp->code; + compiler.program = _mesa_clone_program(ctx, &fp->Base); + + if (RADEON_DEBUG & DEBUG_PIXEL) { + fflush(stdout); + _mesa_printf("Fragment Program: Initial program:\n"); + _mesa_print_program(compiler.program); + fflush(stdout); + } + + insert_WPOS_trailer(&compiler); + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct radeon_program_transformation transformations[] = { + { &r500_transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformDeriv, 0 }, + { &radeonTransformTrigScale, 0 } + }; + radeonLocalTransform(ctx, compiler.program, 4, transformations); + } else { + struct radeon_program_transformation transformations[] = { + { &r300_transform_TEX, &compiler }, + { &radeonTransformALU, 0 }, + { &radeonTransformTrigSimple, 0 } + }; + radeonLocalTransform(ctx, compiler.program, 3, transformations); + } + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Fragment Program: After native rewrite:\n"); + _mesa_print_program(compiler.program); + fflush(stdout); + } + + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r500FPIsNativeSwizzle, + .BuildSwizzle = &r500FPBuildSwizzle, + .RewriteDepthOut = GL_TRUE + }; + radeonNqssaDce(ctx, compiler.program, &nqssadce); + } else { + struct radeon_nqssadce_descr nqssadce = { + .Init = &nqssadce_init, + .IsNativeSwizzle = &r300FPIsNativeSwizzle, + .BuildSwizzle = &r300FPBuildSwizzle, + .RewriteDepthOut = GL_TRUE + }; + radeonNqssaDce(ctx, compiler.program, &nqssadce); + } + + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler: after NqSSA-DCE:\n"); + _mesa_print_program(compiler.program); + fflush(stdout); + } + + if (!r300->vtbl.BuildFragmentProgramHwCode(&compiler)) + r300_fp->error = GL_TRUE; + + /* Subtle: Rescue any parameters that have been added during transformations */ + _mesa_free_parameter_list(fp->Base.Parameters); + fp->Base.Parameters = compiler.program->Parameters; + compiler.program->Parameters = 0; + + _mesa_reference_program(ctx, &compiler.program, NULL); + + r300_fp->translated = GL_TRUE; + + r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + + if (r300_fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) + r300->vtbl.FragmentProgramDump(&r300_fp->code); + } + + update_params(ctx, fp); +} diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h new file mode 100644 index 0000000000..85ea86fecb --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_FRAGPROG_COMMON_H_ +#define __R300_FRAGPROG_COMMON_H_ + +#include "main/mtypes.h" + +extern void r300TranslateFragmentShader(GLcontext *ctx, struct gl_fragment_program *fp); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c index 9f0b7e3534..b75656e7ee 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c @@ -47,7 +47,7 @@ #define PROG_CODE \ struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ - struct r300_fragment_program_code *code = c->code + struct r300_fragment_program_code *code = &c->code->r300 #define error(fmt, args...) do { \ fprintf(stderr, "%s::%s(): " fmt "\n", \ @@ -66,7 +66,7 @@ static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwind } if (*hwindex >= code->const_nr) { - if (*hwindex >= PFS_NUM_CONST_REGS) { + if (*hwindex >= R300_PFS_NUM_CONST_REGS) { error("Out of hw constants!\n"); return GL_FALSE; } @@ -138,7 +138,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) { PROG_CODE; - if (code->alu.length >= PFS_MAX_ALU_INST) { + if (code->alu.length >= R300_PFS_MAX_ALU_INST) { error("Too many ALU instructions"); return GL_FALSE; } @@ -201,7 +201,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) if (inst->Alpha.DepthWriteMask) { code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; code->node[code->cur_node].flags |= R300_W_OUT; - c->fp->WritesDepth = GL_TRUE; + c->fp->writes_depth = GL_TRUE; } return GL_TRUE; @@ -213,7 +213,7 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) */ static GLboolean finish_node(struct r300_fragment_program_compiler *c) { - struct r300_fragment_program_code *code = c->code; + struct r300_fragment_program_code *code = &c->code->r300; struct r300_fragment_program_node *node = &code->node[code->cur_node]; if (node->alu_end < 0) { @@ -275,7 +275,7 @@ static GLboolean emit_tex(void* data, struct prog_instruction* inst) { PROG_CODE; - if (code->tex.length >= PFS_MAX_TEX_INST) { + if (code->tex.length >= R300_PFS_MAX_TEX_INST) { error("Too many TEX instructions"); return GL_FALSE; } @@ -318,16 +318,16 @@ static const struct radeon_pair_handler pair_handler = { .EmitPaired = &emit_alu, .EmitTex = &emit_tex, .BeginTexBlock = &begin_tex, - .MaxHwTemps = PFS_NUM_TEMP_REGS + .MaxHwTemps = R300_PFS_NUM_TEMP_REGS }; /** * Final compilation step: Turn the intermediate radeon_program into * machine-readable instructions. */ -GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler) +GLboolean r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { - struct r300_fragment_program_code *code = compiler->code; + struct r300_fragment_program_code *code = &compiler->code->r300; _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); code->node[0].alu_end = -1; diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index ee85e229f0..104079b4db 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -46,8 +46,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "swrast/swrast.h" +#include "radeon_common.h" +#include "radeon_lock.h" #include "r300_context.h" -#include "radeon_ioctl.h" #include "r300_ioctl.h" #include "r300_cmdbuf.h" #include "r300_state.h" @@ -55,71 +56,90 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_reg.h" #include "r300_emit.h" #include "r300_fragprog.h" +#include "r300_context.h" #include "vblank.h" +#define R200_3D_DRAW_IMMD_2 0xC0003500 + #define CLEARBUFFER_COLOR 0x1 #define CLEARBUFFER_DEPTH 0x2 #define CLEARBUFFER_STENCIL 0x4 -static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) +static void r300EmitClearState(GLcontext * ctx); + +static void r300UserClear(GLcontext *ctx, GLuint mask) { + radeon_clear_tris(ctx, mask); +} + +static void r300ClearBuffer(r300ContextPtr r300, int flags, + struct radeon_renderbuffer *rrb, + struct radeon_renderbuffer *rrbd) +{ + BATCH_LOCALS(&r300->radeon); GLcontext *ctx = r300->radeon.glCtx; - __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; - GLuint cboffset, cbpitch; - drm_r300_cmd_header_t *cmd2; - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); + GLuint cbpitch = 0; r300ContextPtr rmesa = r300; if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s: %s buffer (%i,%i %ix%i)\n", - __FUNCTION__, buffer ? "back" : "front", - dPriv->x, dPriv->y, dPriv->w, dPriv->h); - - if (buffer) { - cboffset = r300->radeon.radeonScreen->backOffset; - cbpitch = r300->radeon.radeonScreen->backPitch; - } else { - cboffset = r300->radeon.radeonScreen->frontOffset; - cbpitch = r300->radeon.radeonScreen->frontPitch; + fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n", + __FUNCTION__, rrb, dPriv->x, dPriv->y, + dPriv->w, dPriv->h); + + if (rrb) { + cbpitch = (rrb->pitch / rrb->cpp); + if (rrb->cpp == 4) + cbpitch |= R300_COLOR_FORMAT_ARGB8888; + else + cbpitch |= R300_COLOR_FORMAT_RGB565; + + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ + cbpitch |= R300_COLOR_TILE_ENABLE; + } } - cboffset += r300->radeon.radeonScreen->fbLocation; - - cp_wait(r300, R300_WAIT_3D | R300_WAIT_3D_CLEAN); - end_3d(rmesa); - - R300_STATECHANGE(r300, cb); - reg_start(R300_RB3D_COLOROFFSET0, 0); - e32(cboffset); - - if (r300->radeon.radeonScreen->cpp == 4) - cbpitch |= R300_COLOR_FORMAT_ARGB8888; - else - cbpitch |= R300_COLOR_FORMAT_RGB565; - - if (r300->radeon.sarea->tiling_enabled) - cbpitch |= R300_COLOR_TILE_ENABLE; - - reg_start(R300_RB3D_COLORPITCH0, 0); - e32(cbpitch); - - R300_STATECHANGE(r300, cmk); - reg_start(RB3D_COLOR_CHANNEL_MASK, 0); + /* TODO in bufmgr */ + cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); + end_3d(&rmesa->radeon); if (flags & CLEARBUFFER_COLOR) { - e32((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | - (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | - (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | - (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); + assert(rrb != 0); + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch); + END_BATCH(); + } +#if 1 + if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) { + assert(rrbd != 0); + cbpitch = (rrbd->pitch / rrbd->cpp); + if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){ + cbpitch |= R300_DEPTHMACROTILE_ENABLE; + } + if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){ + cbpitch |= R300_DEPTHMICROTILE_TILED; + } + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch); + END_BATCH(); + } +#endif + BEGIN_BATCH_NO_AUTOSTATE(6); + OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1); + if (flags & CLEARBUFFER_COLOR) { + OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) | + (ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) | + (ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) | + (ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0)); } else { - e32(0x0); + OUT_BATCH(0); } - R300_STATECHANGE(r300, zs); - reg_start(R300_ZB_CNTL, 2); { uint32_t t1, t2; @@ -146,73 +166,92 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) R300_S_FRONT_ZFAIL_OP_SHIFT); } - e32(t1); - e32(t2); - e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) | - (ctx->Stencil.Clear & R300_STENCILREF_MASK)); + OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3); + OUT_BATCH(t1); + OUT_BATCH(t2); + OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << + R300_STENCILWRITEMASK_SHIFT) | + (ctx->Stencil.Clear & R300_STENCILREF_MASK)); + END_BATCH(); } - cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__); - cmd2[0].packet3.cmd_type = R300_CMD_PACKET3; - cmd2[0].packet3.packet = R300_CMD_PACKET3_CLEAR; - cmd2[1].u = r300PackFloat32(dPriv->w / 2.0); - cmd2[2].u = r300PackFloat32(dPriv->h / 2.0); - cmd2[3].u = r300PackFloat32(ctx->Depth.Clear); - cmd2[4].u = r300PackFloat32(1.0); - cmd2[5].u = r300PackFloat32(ctx->Color.ClearColor[0]); - cmd2[6].u = r300PackFloat32(ctx->Color.ClearColor[1]); - cmd2[7].u = r300PackFloat32(ctx->Color.ClearColor[2]); - cmd2[8].u = r300PackFloat32(ctx->Color.ClearColor[3]); + if (!rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(9); + OUT_BATCH(cmdpacket3(r300->radeon.radeonScreen, R300_CMD_PACKET3_CLEAR)); + OUT_BATCH_FLOAT32(dPriv->w / 2.0); + OUT_BATCH_FLOAT32(dPriv->h / 2.0); + OUT_BATCH_FLOAT32(ctx->Depth.Clear); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); + END_BATCH(); + } else { + OUT_BATCH(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); + OUT_BATCH(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | + (1 << R300_PRIM_NUM_VERTICES_SHIFT)); + OUT_BATCH_FLOAT32(dPriv->w / 2.0); + OUT_BATCH_FLOAT32(dPriv->h / 2.0); + OUT_BATCH_FLOAT32(ctx->Depth.Clear); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]); + OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]); + } r300EmitCacheFlush(rmesa); - cp_wait(rmesa, R300_WAIT_3D | R300_WAIT_3D_CLEAN); + cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN); + + R300_STATECHANGE(r300, cb); + R300_STATECHANGE(r300, cmk); + R300_STATECHANGE(r300, zs); } static void r300EmitClearState(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - r300ContextPtr rmesa = r300; - __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; + BATCH_LOCALS(&r300->radeon); + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); int i; - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; - int has_tcl = 1; + int has_tcl; int is_r500 = 0; GLuint vap_cntl; - if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - has_tcl = 0; - - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - is_r500 = 1; + has_tcl = r300->options.hw_tcl_enabled; + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + is_r500 = 1; - /* FIXME: the values written to R300_VAP_INPUT_ROUTE_0_0 and - * R300_VAP_INPUT_ROUTE_0_1 are in fact known, however, the values are - * quite complex; see the functions in r300_emit.c. + /* State atom dirty tracking is a little subtle here. + * + * On the one hand, we need to make sure base state is emitted + * here if we start with an empty batch buffer, otherwise clear + * works incorrectly with multiple processes. Therefore, the first + * BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE. * - * I believe it would be a good idea to extend the functions in - * r300_emit.c so that they can be used to setup the default values for - * these registers, as well as the actual values used for rendering. + * On the other hand, implicit state emission clears the state atom + * dirty bits, so we have to call R300_STATECHANGE later than the + * first BEGIN_BATCH. + * + * The final trickiness is that, because we change state, we need + * to ensure that any stored swtcl primitives are flushed properly + * before we start changing state. See the R300_NEWPRIM in r300Clear + * for this. */ - R300_STATECHANGE(r300, vir[0]); - reg_start(R300_VAP_PROG_STREAM_CNTL_0, 0); + BEGIN_BATCH(31); + OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1); if (!has_tcl) - e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); else - e32(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | + OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) | ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT))); - /* disable fog */ - R300_STATECHANGE(r300, fogs); - reg_start(R300_FG_FOG_BLEND, 0); - e32(0x0); - - R300_STATECHANGE(r300, vir[1]); - reg_start(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0); - e32(((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | + OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0); + OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0, + ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | @@ -226,619 +265,402 @@ static void r300EmitClearState(GLcontext * ctx) << R300_SWIZZLE1_SHIFT))); /* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */ - R300_STATECHANGE(r300, vic); - reg_start(R300_VAP_VTX_STATE_CNTL, 1); - e32((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); - e32(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); + OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2); + OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT)); + OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0); - R300_STATECHANGE(r300, vte); /* comes from fglrx startup of clear */ - reg_start(R300_SE_VTE_CNTL, 1); - e32(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | - R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | - R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | - R300_VPORT_Z_OFFSET_ENA); - e32(0x8); + OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2); + OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA | + R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | + R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | + R300_VPORT_Z_OFFSET_ENA); + OUT_BATCH(0x8); - reg_start(R300_VAP_PSC_SGN_NORM_CNTL, 0); - e32(0xaaaaaaaa); + OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa); - R300_STATECHANGE(r300, vof); - reg_start(R300_VAP_OUTPUT_VTX_FMT_0, 1); - e32(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | - R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); - e32(0x0); /* no textures */ + OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT | + R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT); + OUT_BATCH(0); /* no textures */ - R300_STATECHANGE(r300, txe); - reg_start(R300_TX_ENABLE, 0); - e32(0x0); + OUT_BATCH_REGVAL(R300_TX_ENABLE, 0); - R300_STATECHANGE(r300, vpt); - reg_start(R300_SE_VPORT_XSCALE, 5); - efloat(1.0); - efloat(dPriv->x); - efloat(1.0); - efloat(dPriv->y); - efloat(1.0); - efloat(0.0); + OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(dPriv->x); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(dPriv->y); + OUT_BATCH_FLOAT32(1.0); + OUT_BATCH_FLOAT32(0.0); - R300_STATECHANGE(r300, at); - reg_start(R300_FG_ALPHA_FUNC, 0); - e32(0x0); + OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0); + + OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + END_BATCH(); + R300_STATECHANGE(r300, vir[0]); + R300_STATECHANGE(r300, fogs); + R300_STATECHANGE(r300, vir[1]); + R300_STATECHANGE(r300, vic); + R300_STATECHANGE(r300, vte); + R300_STATECHANGE(r300, vof); + R300_STATECHANGE(r300, txe); + R300_STATECHANGE(r300, vpt); + R300_STATECHANGE(r300, at); R300_STATECHANGE(r300, bld); - reg_start(R300_RB3D_CBLEND, 1); - e32(0x0); - e32(0x0); + R300_STATECHANGE(r300, ps); if (has_tcl) { - R300_STATECHANGE(r300, vap_clip_cntl); - reg_start(R300_VAP_CLIP_CNTL, 0); - e32(R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); + R300_STATECHANGE(r300, vap_clip_cntl); + + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE); + END_BATCH(); } - R300_STATECHANGE(r300, ps); - reg_start(R300_GA_POINT_SIZE, 0); - e32(((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | - ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGVAL(R300_GA_POINT_SIZE, + ((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) | + ((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT)); + END_BATCH(); if (!is_r500) { R300_STATECHANGE(r300, ri); - reg_start(R300_RS_IP_0, 7); - for (i = 0; i < 8; ++i) { - e32(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); - } - R300_STATECHANGE(r300, rc); - /* The second constant is needed to get glxgears display anything .. */ - reg_start(R300_RS_COUNT, 1); - e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - e32(0x0); - R300_STATECHANGE(r300, rr); - reg_start(R300_RS_INST_0, 0); - e32(R300_RS_INST_COL_CN_WRITE); + + BEGIN_BATCH(14); + OUT_BATCH_REGSEQ(R300_RS_IP_0, 8); + for (i = 0; i < 8; ++i) + OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3)); + + OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); + OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + OUT_BATCH(0x0); + + OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE); + END_BATCH(); } else { R300_STATECHANGE(r300, ri); - reg_start(R500_RS_IP_0, 7); + R300_STATECHANGE(r300, rc); + R300_STATECHANGE(r300, rr); + + BEGIN_BATCH(14); + OUT_BATCH_REGSEQ(R500_RS_IP_0, 8); for (i = 0; i < 8; ++i) { - e32((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | - (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | - (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); + OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); } - R300_STATECHANGE(r300, rc); - /* The second constant is needed to get glxgears display anything .. */ - reg_start(R300_RS_COUNT, 1); - e32((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); - e32(0x0); - - R300_STATECHANGE(r300, rr); - reg_start(R500_RS_INST_0, 0); - e32(R500_RS_INST_COL_CN_WRITE); + OUT_BATCH_REGSEQ(R300_RS_COUNT, 2); + OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + OUT_BATCH(0x0); + OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE); + END_BATCH(); } if (!is_r500) { R300_STATECHANGE(r300, fp); - reg_start(R300_US_CONFIG, 2); - e32(0x0); - e32(0x0); - e32(0x0); - reg_start(R300_US_CODE_ADDR_0, 3); - e32(0x0); - e32(0x0); - e32(0x0); - e32(R300_RGBA_OUT); - R300_STATECHANGE(r300, fpi[0]); R300_STATECHANGE(r300, fpi[1]); R300_STATECHANGE(r300, fpi[2]); R300_STATECHANGE(r300, fpi[3]); - reg_start(R300_US_ALU_RGB_INST_0, 0); - e32(FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); - - reg_start(R300_US_ALU_RGB_ADDR_0, 0); - e32(FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); - - reg_start(R300_US_ALU_ALPHA_INST_0, 0); - e32(FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); - - reg_start(R300_US_ALU_ALPHA_ADDR_0, 0); - e32(FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + BEGIN_BATCH(17); + OUT_BATCH_REGSEQ(R300_US_CONFIG, 3); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH(0x0); + OUT_BATCH(R300_RGBA_OUT); + + OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0, + FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO))); + OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0, + FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0)); + OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0, + FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO))); + OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0, + FP_SELA(0, NO, W, FP_TMP(0), 0, 0)); + END_BATCH(); } else { - R300_STATECHANGE(r300, fp); - reg_start(R500_US_CONFIG, 1); - e32(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); - e32(0x0); - reg_start(R500_US_CODE_ADDR, 2); - e32(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); - e32(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); - e32(R500_US_CODE_OFFSET_ADDR(0)); + struct radeon_state_atom r500fp; + uint32_t _cmd[10]; + R300_STATECHANGE(r300, fp); R300_STATECHANGE(r300, r500fp); - r500fp_start_fragment(0, 6); - - e32(R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | - R500_INST_LAST | - R500_INST_RGB_OMASK_R | - R500_INST_RGB_OMASK_G | - R500_INST_RGB_OMASK_B | - R500_INST_ALPHA_OMASK | - R500_INST_RGB_CLAMP | - R500_INST_ALPHA_CLAMP); - - e32(R500_RGB_ADDR0(0) | - R500_RGB_ADDR1(0) | - R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | - R500_RGB_ADDR2_CONST); - - e32(R500_ALPHA_ADDR0(0) | - R500_ALPHA_ADDR1(0) | - R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | - R500_ALPHA_ADDR2_CONST); - - e32(R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | - R500_ALU_RGB_R_SWIZ_B_R | - R500_ALU_RGB_B_SWIZ_B_G | - R500_ALU_RGB_G_SWIZ_B_B); - - e32(R500_ALPHA_OP_CMP | - R500_ALPHA_SWIZ_A_A | - R500_ALPHA_SWIZ_B_A); - - e32(R500_ALU_RGBA_OP_CMP | - R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | - R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0); + + BEGIN_BATCH(7); + OUT_BATCH_REGSEQ(R500_US_CONFIG, 2); + OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); + OUT_BATCH(0x0); + OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3); + OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1)); + OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1)); + OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0)); + END_BATCH(); + + r500fp.check = check_r500fp; + r500fp.cmd = _cmd; + r500fp.cmd[0] = cmdr500fp(r300->radeon.radeonScreen, 0, 1, 0, 0); + r500fp.cmd[1] = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | + R500_INST_ALPHA_CLAMP; + r500fp.cmd[2] = R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST; + r500fp.cmd[3] = R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST; + r500fp.cmd[4] = R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | + R500_ALU_RGB_G_SWIZ_B_B; + r500fp.cmd[5] = R500_ALPHA_OP_CMP | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_A; + r500fp.cmd[6] = R500_ALU_RGBA_OP_CMP | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0; + + r500fp.cmd[7] = 0; + emit_r500fp(ctx, &r500fp); } - reg_start(R300_VAP_PVS_STATE_FLUSH_REG, 0); - e32(0x00000000); + BEGIN_BATCH(2); + OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0); + END_BATCH(); + if (has_tcl) { - vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | (5 << R300_PVS_NUM_CNTLRS_SHIFT) | (12 << R300_VF_MAX_VTX_NUM_SHIFT)); - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - vap_cntl |= R500_TCL_STATE_OPTIMIZATION; - } else - vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) + vap_cntl |= R500_TCL_STATE_OPTIMIZATION; + } else { + vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) | (5 << R300_PVS_NUM_CNTLRS_SHIFT) | (5 << R300_VF_MAX_VTX_NUM_SHIFT)); + } if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515) - vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); + vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT); else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) || (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) || (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570)) - vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); + vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT); else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) || (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420)) - vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); + vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT); else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) || (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580)) - vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); + vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT); else - vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); + vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT); + + R300_STATECHANGE(r300, vap_cntl); - R300_STATECHANGE(rmesa, vap_cntl); - reg_start(R300_VAP_CNTL, 0); - e32(vap_cntl); + BEGIN_BATCH(2); + OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl); + END_BATCH(); if (has_tcl) { + struct radeon_state_atom vpu; + uint32_t _cmd[10]; R300_STATECHANGE(r300, pvs); - reg_start(R300_VAP_PVS_CODE_CNTL_0, 2); - - e32((0 << R300_PVS_FIRST_INST_SHIFT) | - (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | - (1 << R300_PVS_LAST_INST_SHIFT)); - e32((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | - (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); - e32(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); - R300_STATECHANGE(r300, vpi); - vsf_start_fragment(0x0, 8); - e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 0, 0xf, PVS_DST_REG_OUT)); - e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); - e32(PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); - e32(0x0); - - e32(PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, PVS_DST_REG_OUT)); - e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); - e32(PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_SELECT_FORCE_0, PVS_SRC_REG_INPUT, VSF_FLAG_NONE)); - e32(0x0); + BEGIN_BATCH(4); + OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3); + OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) | + (0 << R300_PVS_XYZW_VALID_INST_SHIFT) | + (1 << R300_PVS_LAST_INST_SHIFT)); + OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | + (0 << R300_PVS_MAX_CONST_ADDR_SHIFT)); + OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT); + END_BATCH(); + + vpu.check = check_vpu; + vpu.cmd = _cmd; + vpu.cmd[0] = cmdvpu(r300->radeon.radeonScreen, 0, 2); + + vpu.cmd[1] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, + 0, 0xf, PVS_DST_REG_OUT); + vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, + PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, + PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + vpu.cmd[4] = 0x0; + + vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf, + PVS_DST_REG_OUT); + vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X, + PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, + PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, + + VSF_FLAG_NONE); + vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_SELECT_FORCE_0, + PVS_SRC_REG_INPUT, VSF_FLAG_NONE); + vpu.cmd[8] = 0x0; + + r300->vap_flush_needed = GL_TRUE; + emit_vpu(ctx, &vpu); } } -/** - * Buffer clear - */ -static void r300Clear(GLcontext * ctx, GLbitfield mask) +static void r300KernelClear(GLcontext *ctx, GLuint flags) { r300ContextPtr r300 = R300_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable; - int flags = 0; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); + struct radeon_framebuffer *rfb = dPriv->driverPrivate; + struct radeon_renderbuffer *rrb; + struct radeon_renderbuffer *rrbd; int bits = 0; - int swapped; - - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "r300Clear\n"); - - { - LOCK_HARDWARE(&r300->radeon); - UNLOCK_HARDWARE(&r300->radeon); - if (dPriv->numClipRects == 0) - return; - } - if (mask & BUFFER_BIT_FRONT_LEFT) { - flags |= BUFFER_BIT_FRONT_LEFT; - mask &= ~BUFFER_BIT_FRONT_LEFT; - } - - if (mask & BUFFER_BIT_BACK_LEFT) { - flags |= BUFFER_BIT_BACK_LEFT; - mask &= ~BUFFER_BIT_BACK_LEFT; - } - - if (mask & BUFFER_BIT_DEPTH) { + /* Make sure it fits there. */ + rcommonEnsureCmdBufSpace(&r300->radeon, 421 * 3, __FUNCTION__); + if (flags || bits) + r300EmitClearState(ctx); + rrbd = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH); + if (rrbd && (flags & BUFFER_BIT_DEPTH)) bits |= CLEARBUFFER_DEPTH; - mask &= ~BUFFER_BIT_DEPTH; - } - if ((mask & BUFFER_BIT_STENCIL) && r300->state.stencil.hw_stencil) { + if (rrbd && (flags & BUFFER_BIT_STENCIL)) bits |= CLEARBUFFER_STENCIL; - mask &= ~BUFFER_BIT_STENCIL; - } - if (mask) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "%s: swrast clear, mask: %x\n", - __FUNCTION__, mask); - _swrast_Clear(ctx, mask); + if (flags & BUFFER_BIT_COLOR0) { + rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0); + r300ClearBuffer(r300, CLEARBUFFER_COLOR, rrb, NULL); + bits = 0; } - swapped = r300->radeon.sarea->pfCurrentPage == 1; - - /* Make sure it fits there. */ - r300EnsureCmdBufSpace(r300, 421 * 3, __FUNCTION__); - if (flags || bits) - r300EmitClearState(ctx); - if (flags & BUFFER_BIT_FRONT_LEFT) { - r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped); + rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT); + r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd); bits = 0; } if (flags & BUFFER_BIT_BACK_LEFT) { - r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, swapped ^ 1); + rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_BACK_LEFT); + r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd); bits = 0; } if (bits) - r300ClearBuffer(r300, bits, 0); + r300ClearBuffer(r300, bits, NULL, rrbd); + COMMIT_BATCH(); } -void r300Flush(GLcontext * ctx) +/** + * Buffer clear + */ +static void r300Clear(GLcontext * ctx, GLbitfield mask) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); + r300ContextPtr r300 = R300_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&r300->radeon); + const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask); + GLbitfield swrast_mask = 0, tri_mask = 0; + int i; + struct gl_framebuffer *fb = ctx->DrawBuffer; if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (rmesa->dma.flush) - rmesa->dma.flush( rmesa ); - - if (rmesa->cmdbuf.count_used > rmesa->cmdbuf.count_reemit) - r300FlushCmdBuf(rmesa, __FUNCTION__); -} - -#ifdef USER_BUFFERS -#include "r300_mem.h" - -void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size) -{ - struct r300_dma_buffer *dmabuf; - size = MAX2(size, RADEON_BUFFER_SIZE * 16); - - if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (rmesa->dma.flush) { - rmesa->dma.flush(rmesa); - } + fprintf(stderr, "r300Clear\n"); - if (rmesa->dma.current.buf) { -#ifdef USER_BUFFERS - r300_mem_use(rmesa, rmesa->dma.current.buf->id); -#endif - r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); + if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) { + LOCK_HARDWARE(&r300->radeon); + UNLOCK_HARDWARE(&r300->radeon); + if (dPriv->numClipRects == 0) + return; } - if (rmesa->dma.nr_released_bufs > 4) - r300FlushCmdBuf(rmesa, __FUNCTION__); - - dmabuf = CALLOC_STRUCT(r300_dma_buffer); - dmabuf->buf = (void *)1; /* hack */ - dmabuf->refcount = 1; - dmabuf->id = r300_mem_alloc(rmesa, 4, size); - if (dmabuf->id == 0) { - LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */ - - r300FlushCmdBufLocked(rmesa, __FUNCTION__); - radeonWaitForIdleLocked(&rmesa->radeon); + /* Flush swtcl vertices if necessary, because we will change hardware + * state during clear. See also the state-related comment in + * r300EmitClearState. + */ + R300_NEWPRIM(r300); - dmabuf->id = r300_mem_alloc(rmesa, 4, size); + if (colorMask == ~0) + tri_mask |= (mask & BUFFER_BITS_COLOR); - UNLOCK_HARDWARE(&rmesa->radeon); - if (dmabuf->id == 0) { - fprintf(stderr, - "Error: Could not get dma buffer... exiting\n"); - _mesa_exit(-1); - } + /* HW stencil */ + if (mask & BUFFER_BIT_STENCIL) { + tri_mask |= BUFFER_BIT_STENCIL; } - rmesa->dma.current.buf = dmabuf; - rmesa->dma.current.address = r300_mem_ptr(rmesa, dmabuf->id); - rmesa->dma.current.end = size; - rmesa->dma.current.start = 0; - rmesa->dma.current.ptr = 0; -} - -void r300ReleaseDmaRegion(r300ContextPtr rmesa, - struct r300_dma_region *region, const char *caller) -{ - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); - - if (!region->buf) - return; - - if (rmesa->dma.flush) - rmesa->dma.flush(rmesa); - - if (--region->buf->refcount == 0) { - r300_mem_free(rmesa, region->buf->id); - FREE(region->buf); - rmesa->dma.nr_released_bufs++; + /* HW depth */ + if (mask & BUFFER_BIT_DEPTH) { + tri_mask |= BUFFER_BIT_DEPTH; } - region->buf = 0; - region->start = 0; -} - -/* Allocates a region from rmesa->dma.current. If there isn't enough - * space in current, grab a new buffer (and discard what was left of current) - */ -void r300AllocDmaRegion(r300ContextPtr rmesa, - struct r300_dma_region *region, - int bytes, int alignment) -{ - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); - - if (rmesa->dma.flush) - rmesa->dma.flush(rmesa); - - if (region->buf) - r300ReleaseDmaRegion(rmesa, region, __FUNCTION__); - - alignment--; - rmesa->dma.current.start = rmesa->dma.current.ptr = - (rmesa->dma.current.ptr + alignment) & ~alignment; - - if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end) - r300RefillCurrentDmaRegion(rmesa, (bytes + 0x7) & ~0x7); - - region->start = rmesa->dma.current.start; - region->ptr = rmesa->dma.current.start; - region->end = rmesa->dma.current.start + bytes; - region->address = rmesa->dma.current.address; - region->buf = rmesa->dma.current.buf; - region->buf->refcount++; - - rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ - rmesa->dma.current.start = - rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; - - assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); -} + /* If we're doing a tri pass for depth/stencil, include a likely color + * buffer with it. + */ -#else -static void r300RefillCurrentDmaRegion(r300ContextPtr rmesa) -{ - struct r300_dma_buffer *dmabuf; - int fd = rmesa->radeon.dri.fd; - int index = 0; - int size = 0; - drmDMAReq dma; - int ret; - - if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (rmesa->dma.flush) { - rmesa->dma.flush(rmesa); + for (i = 0; i < BUFFER_COUNT; i++) { + GLuint bufBit = 1 << i; + if ((tri_mask) & bufBit) { + if (!fb->Attachment[i].Renderbuffer->ClassID) { + tri_mask &= ~bufBit; + swrast_mask |= bufBit; + } + } } - if (rmesa->dma.current.buf) - r300ReleaseDmaRegion(rmesa, &rmesa->dma.current, __FUNCTION__); - - if (rmesa->dma.nr_released_bufs > 4) - r300FlushCmdBuf(rmesa, __FUNCTION__); - - dma.context = rmesa->radeon.dri.hwContext; - dma.send_count = 0; - dma.send_list = NULL; - dma.send_sizes = NULL; - dma.flags = 0; - dma.request_count = 1; - dma.request_size = RADEON_BUFFER_SIZE; - dma.request_list = &index; - dma.request_sizes = &size; - dma.granted_count = 0; - - LOCK_HARDWARE(&rmesa->radeon); /* no need to validate */ - - ret = drmDMA(fd, &dma); - - if (ret != 0) { - /* Try to release some buffers and wait until we can't get any more */ - if (rmesa->dma.nr_released_bufs) { - r300FlushCmdBufLocked(rmesa, __FUNCTION__); - } - - if (RADEON_DEBUG & DEBUG_DMA) - fprintf(stderr, "Waiting for buffers\n"); - - radeonWaitForIdleLocked(&rmesa->radeon); - ret = drmDMA(fd, &dma); + /* SW fallback clearing */ + swrast_mask = mask & ~tri_mask; - if (ret != 0) { - UNLOCK_HARDWARE(&rmesa->radeon); - fprintf(stderr, - "Error: Could not get dma buffer... exiting\n"); - _mesa_exit(-1); - } + if (tri_mask) { + if (r300->radeon.radeonScreen->kernel_mm) + r300UserClear(ctx, tri_mask); + else + r300KernelClear(ctx, tri_mask); } - - UNLOCK_HARDWARE(&rmesa->radeon); - - if (RADEON_DEBUG & DEBUG_DMA) - fprintf(stderr, "Allocated buffer %d\n", index); - - dmabuf = CALLOC_STRUCT(r300_dma_buffer); - dmabuf->buf = &rmesa->radeon.radeonScreen->buffers->list[index]; - dmabuf->refcount = 1; - - rmesa->dma.current.buf = dmabuf; - rmesa->dma.current.address = dmabuf->buf->address; - rmesa->dma.current.end = dmabuf->buf->total; - rmesa->dma.current.start = 0; - rmesa->dma.current.ptr = 0; -} - -void r300ReleaseDmaRegion(r300ContextPtr rmesa, - struct r300_dma_region *region, const char *caller) -{ - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); - - if (!region->buf) - return; - - if (rmesa->dma.flush) - rmesa->dma.flush(rmesa); - - if (--region->buf->refcount == 0) { - drm_radeon_cmd_header_t *cmd; - - if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) - fprintf(stderr, "%s -- DISCARD BUF %d\n", - __FUNCTION__, region->buf->buf->idx); - cmd = - (drm_radeon_cmd_header_t *) r300AllocCmdBuf(rmesa, - sizeof - (*cmd) / 4, - __FUNCTION__); - cmd->dma.cmd_type = R300_CMD_DMA_DISCARD; - cmd->dma.buf_idx = region->buf->buf->idx; - - FREE(region->buf); - rmesa->dma.nr_released_bufs++; + if (swrast_mask) { + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "%s: swrast clear, mask: %x\n", + __FUNCTION__, swrast_mask); + _swrast_Clear(ctx, swrast_mask); } - - region->buf = 0; - region->start = 0; -} - -/* Allocates a region from rmesa->dma.current. If there isn't enough - * space in current, grab a new buffer (and discard what was left of current) - */ -void r300AllocDmaRegion(r300ContextPtr rmesa, - struct r300_dma_region *region, - int bytes, int alignment) -{ - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); - - if (rmesa->dma.flush) - rmesa->dma.flush(rmesa); - - if (region->buf) - r300ReleaseDmaRegion(rmesa, region, __FUNCTION__); - - alignment--; - rmesa->dma.current.start = rmesa->dma.current.ptr = - (rmesa->dma.current.ptr + alignment) & ~alignment; - - if (rmesa->dma.current.ptr + bytes > rmesa->dma.current.end) - r300RefillCurrentDmaRegion(rmesa); - - region->start = rmesa->dma.current.start; - region->ptr = rmesa->dma.current.start; - region->end = rmesa->dma.current.start + bytes; - region->address = rmesa->dma.current.address; - region->buf = rmesa->dma.current.buf; - region->buf->refcount++; - - rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ - rmesa->dma.current.start = - rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; - - assert(rmesa->dma.current.ptr <= rmesa->dma.current.end); } -#endif - -GLboolean r300IsGartMemory(r300ContextPtr rmesa, const GLvoid * pointer, - GLint size) -{ - int offset = - (char *)pointer - - (char *)rmesa->radeon.radeonScreen->gartTextures.map; - int valid = (size >= 0 && offset >= 0 - && offset + size < - rmesa->radeon.radeonScreen->gartTextures.size); - - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "r300IsGartMemory( %p ) : %d\n", pointer, - valid); - - return valid; -} - -GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, const GLvoid * pointer) -{ - int offset = - (char *)pointer - - (char *)rmesa->radeon.radeonScreen->gartTextures.map; - - //fprintf(stderr, "offset=%08x\n", offset); - - if (offset < 0 - || offset > rmesa->radeon.radeonScreen->gartTextures.size) - return ~0; - else - return rmesa->radeon.radeonScreen->gart_texture_offset + offset; -} void r300InitIoctlFuncs(struct dd_function_table *functions) { functions->Clear = r300Clear; functions->Finish = radeonFinish; - functions->Flush = r300Flush; + functions->Flush = radeonFlush; } diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.h b/src/mesa/drivers/dri/r300/r300_ioctl.h index e1143fb6c3..3abfa71a6e 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.h +++ b/src/mesa/drivers/dri/r300/r300_ioctl.h @@ -39,22 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" #include "radeon_drm.h" -extern GLboolean r300IsGartMemory(r300ContextPtr rmesa, - const GLvoid * pointer, GLint size); - -extern GLuint r300GartOffsetFromVirtual(r300ContextPtr rmesa, - const GLvoid * pointer); - -extern void r300Flush(GLcontext * ctx); - -extern void r300ReleaseDmaRegion(r300ContextPtr rmesa, - struct r300_dma_region *region, - const char *caller); -extern void r300AllocDmaRegion(r300ContextPtr rmesa, - struct r300_dma_region *region, int bytes, - int alignment); - extern void r300InitIoctlFuncs(struct dd_function_table *functions); -extern void r300RefillCurrentDmaRegion(r300ContextPtr rmesa, int size); #endif /* __R300_IOCTL_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_mem.c b/src/mesa/drivers/dri/r300/r300_mem.c deleted file mode 100644 index f8f9d4fcdf..0000000000 --- a/src/mesa/drivers/dri/r300/r300_mem.c +++ /dev/null @@ -1,385 +0,0 @@ -/* - * Copyright (C) 2005 Aapo Tahkola. - * - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial - * portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - -/** - * \file - * - * \author Aapo Tahkola <aet@rasterburn.org> - */ - -#include <unistd.h> - -#include "r300_context.h" -#include "r300_cmdbuf.h" -#include "r300_ioctl.h" -#include "r300_mem.h" -#include "radeon_ioctl.h" - -#ifdef USER_BUFFERS - -static void resize_u_list(r300ContextPtr rmesa) -{ - void *temp; - int nsize; - - temp = rmesa->rmm->u_list; - nsize = rmesa->rmm->u_size * 2; - - rmesa->rmm->u_list = _mesa_malloc(nsize * sizeof(*rmesa->rmm->u_list)); - _mesa_memset(rmesa->rmm->u_list, 0, - nsize * sizeof(*rmesa->rmm->u_list)); - - if (temp) { - r300FlushCmdBuf(rmesa, __FUNCTION__); - - _mesa_memcpy(rmesa->rmm->u_list, temp, - rmesa->rmm->u_size * sizeof(*rmesa->rmm->u_list)); - _mesa_free(temp); - } - - rmesa->rmm->u_size = nsize; -} - -void r300_mem_init(r300ContextPtr rmesa) -{ - rmesa->rmm = malloc(sizeof(struct r300_memory_manager)); - memset(rmesa->rmm, 0, sizeof(struct r300_memory_manager)); - - rmesa->rmm->u_size = 128; - resize_u_list(rmesa); -} - -void r300_mem_destroy(r300ContextPtr rmesa) -{ - _mesa_free(rmesa->rmm->u_list); - rmesa->rmm->u_list = NULL; - - _mesa_free(rmesa->rmm); - rmesa->rmm = NULL; -} - -void *r300_mem_ptr(r300ContextPtr rmesa, int id) -{ - assert(id <= rmesa->rmm->u_last); - return rmesa->rmm->u_list[id].ptr; -} - -int r300_mem_find(r300ContextPtr rmesa, void *ptr) -{ - int i; - - for (i = 1; i < rmesa->rmm->u_size + 1; i++) - if (rmesa->rmm->u_list[i].ptr && - ptr >= rmesa->rmm->u_list[i].ptr && - ptr < - rmesa->rmm->u_list[i].ptr + rmesa->rmm->u_list[i].size) - break; - - if (i < rmesa->rmm->u_size + 1) - return i; - - fprintf(stderr, "%p failed\n", ptr); - return 0; -} - -//#define MM_DEBUG -int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size) -{ - drm_radeon_mem_alloc_t alloc; - int offset = 0, ret; - int i, free = -1; - int done_age; - drm_radeon_mem_free_t memfree; - int tries = 0; - static int bytes_wasted = 0, allocated = 0; - - if (size < 4096) - bytes_wasted += 4096 - size; - - allocated += size; - -#if 0 - static int t = 0; - if (t != time(NULL)) { - t = time(NULL); - fprintf(stderr, "slots used %d, wasted %d kb, allocated %d\n", - rmesa->rmm->u_last, bytes_wasted / 1024, - allocated / 1024); - } -#endif - - memfree.region = RADEON_MEM_REGION_GART; - - again: - - done_age = radeonGetAge((radeonContextPtr) rmesa); - - if (rmesa->rmm->u_last + 1 >= rmesa->rmm->u_size) - resize_u_list(rmesa); - - for (i = rmesa->rmm->u_last + 1; i > 0; i--) { - if (rmesa->rmm->u_list[i].ptr == NULL) { - free = i; - continue; - } - - if (rmesa->rmm->u_list[i].h_pending == 0 && - rmesa->rmm->u_list[i].pending - && rmesa->rmm->u_list[i].age <= done_age) { - memfree.region_offset = - (char *)rmesa->rmm->u_list[i].ptr - - (char *)rmesa->radeon.radeonScreen->gartTextures. - map; - - ret = - drmCommandWrite(rmesa->radeon.radeonScreen-> - driScreen->fd, DRM_RADEON_FREE, - &memfree, sizeof(memfree)); - - if (ret) { - fprintf(stderr, "Failed to free at %p\n", - rmesa->rmm->u_list[i].ptr); - fprintf(stderr, "ret = %s\n", strerror(-ret)); - exit(1); - } else { -#ifdef MM_DEBUG - fprintf(stderr, "really freed %d at age %x\n", - i, - radeonGetAge((radeonContextPtr) rmesa)); -#endif - if (i == rmesa->rmm->u_last) - rmesa->rmm->u_last--; - - if (rmesa->rmm->u_list[i].size < 4096) - bytes_wasted -= - 4096 - rmesa->rmm->u_list[i].size; - - allocated -= rmesa->rmm->u_list[i].size; - rmesa->rmm->u_list[i].pending = 0; - rmesa->rmm->u_list[i].ptr = NULL; - free = i; - } - } - } - rmesa->rmm->u_head = i; - - if (free == -1) { - WARN_ONCE("Ran out of slots!\n"); - //usleep(100); - r300FlushCmdBuf(rmesa, __FUNCTION__); - tries++; - if (tries > 100) { - WARN_ONCE("Ran out of slots!\n"); - exit(1); - } - goto again; - } - - alloc.region = RADEON_MEM_REGION_GART; - alloc.alignment = alignment; - alloc.size = size; - alloc.region_offset = &offset; - - ret = - drmCommandWriteRead(rmesa->radeon.dri.fd, DRM_RADEON_ALLOC, &alloc, - sizeof(alloc)); - if (ret) { -#if 0 - WARN_ONCE("Ran out of mem!\n"); - r300FlushCmdBuf(rmesa, __FUNCTION__); - //usleep(100); - tries2++; - tries = 0; - if (tries2 > 100) { - WARN_ONCE("Ran out of GART memory!\n"); - exit(1); - } - goto again; -#else - WARN_ONCE - ("Ran out of GART memory (for %d)!\nPlease consider adjusting GARTSize option.\n", - size); - return 0; -#endif - } - - i = free; - - if (i > rmesa->rmm->u_last) - rmesa->rmm->u_last = i; - - rmesa->rmm->u_list[i].ptr = - ((GLubyte *) rmesa->radeon.radeonScreen->gartTextures.map) + offset; - rmesa->rmm->u_list[i].size = size; - rmesa->rmm->u_list[i].age = 0; - //fprintf(stderr, "alloc %p at id %d\n", rmesa->rmm->u_list[i].ptr, i); - -#ifdef MM_DEBUG - fprintf(stderr, "allocated %d at age %x\n", i, - radeonGetAge((radeonContextPtr) rmesa)); -#endif - - return i; -} - -void r300_mem_use(r300ContextPtr rmesa, int id) -{ - uint64_t ull; -#ifdef MM_DEBUG - fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, - radeonGetAge((radeonContextPtr) rmesa)); -#endif - drm_r300_cmd_header_t *cmd; - - assert(id <= rmesa->rmm->u_last); - - if (id == 0) - return; - - cmd = - (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, - 2 + sizeof(ull) / 4, - __FUNCTION__); - cmd[0].scratch.cmd_type = R300_CMD_SCRATCH; - cmd[0].scratch.reg = R300_MEM_SCRATCH; - cmd[0].scratch.n_bufs = 1; - cmd[0].scratch.flags = 0; - cmd++; - - ull = (uint64_t) (intptr_t) & rmesa->rmm->u_list[id].age; - _mesa_memcpy(cmd, &ull, sizeof(ull)); - cmd += sizeof(ull) / 4; - - cmd[0].u = /*id */ 0; - - LOCK_HARDWARE(&rmesa->radeon); /* Protect from DRM. */ - rmesa->rmm->u_list[id].h_pending++; - UNLOCK_HARDWARE(&rmesa->radeon); -} - -unsigned long r300_mem_offset(r300ContextPtr rmesa, int id) -{ - unsigned long offset; - - assert(id <= rmesa->rmm->u_last); - - offset = (char *)rmesa->rmm->u_list[id].ptr - - (char *)rmesa->radeon.radeonScreen->gartTextures.map; - offset += rmesa->radeon.radeonScreen->gart_texture_offset; - - return offset; -} - -void *r300_mem_map(r300ContextPtr rmesa, int id, int access) -{ -#ifdef MM_DEBUG - fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, - radeonGetAge((radeonContextPtr) rmesa)); -#endif - void *ptr; - int tries = 0; - - assert(id <= rmesa->rmm->u_last); - - if (access == R300_MEM_R) { - - if (rmesa->rmm->u_list[id].mapped == 1) - WARN_ONCE("buffer %d already mapped\n", id); - - rmesa->rmm->u_list[id].mapped = 1; - ptr = r300_mem_ptr(rmesa, id); - - return ptr; - } - - if (rmesa->rmm->u_list[id].h_pending) - r300FlushCmdBuf(rmesa, __FUNCTION__); - - if (rmesa->rmm->u_list[id].h_pending) { - return NULL; - } - - while (rmesa->rmm->u_list[id].age > - radeonGetAge((radeonContextPtr) rmesa) && tries++ < 1000) - usleep(10); - - if (tries >= 1000) { - fprintf(stderr, "Idling failed (%x vs %x)\n", - rmesa->rmm->u_list[id].age, - radeonGetAge((radeonContextPtr) rmesa)); - return NULL; - } - - if (rmesa->rmm->u_list[id].mapped == 1) - WARN_ONCE("buffer %d already mapped\n", id); - - rmesa->rmm->u_list[id].mapped = 1; - ptr = r300_mem_ptr(rmesa, id); - - return ptr; -} - -void r300_mem_unmap(r300ContextPtr rmesa, int id) -{ -#ifdef MM_DEBUG - fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, - radeonGetAge((radeonContextPtr) rmesa)); -#endif - - assert(id <= rmesa->rmm->u_last); - - if (rmesa->rmm->u_list[id].mapped == 0) - WARN_ONCE("buffer %d not mapped\n", id); - - rmesa->rmm->u_list[id].mapped = 0; -} - -void r300_mem_free(r300ContextPtr rmesa, int id) -{ -#ifdef MM_DEBUG - fprintf(stderr, "%s: %d at age %x\n", __FUNCTION__, id, - radeonGetAge((radeonContextPtr) rmesa)); -#endif - - assert(id <= rmesa->rmm->u_last); - - if (id == 0) - return; - - if (rmesa->rmm->u_list[id].ptr == NULL) { - WARN_ONCE("Not allocated!\n"); - return; - } - - if (rmesa->rmm->u_list[id].pending) { - WARN_ONCE("%p already pended!\n", rmesa->rmm->u_list[id].ptr); - return; - } - - rmesa->rmm->u_list[id].pending = 1; -} -#endif diff --git a/src/mesa/drivers/dri/r300/r300_mem.h b/src/mesa/drivers/dri/r300/r300_mem.h deleted file mode 100644 index 625a7f6d8d..0000000000 --- a/src/mesa/drivers/dri/r300/r300_mem.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef __R300_MEM_H__ -#define __R300_MEM_H__ - -//#define R300_MEM_PDL 0 -#define R300_MEM_UL 1 - -#define R300_MEM_R 1 -#define R300_MEM_W 2 -#define R300_MEM_RW (R300_MEM_R | R300_MEM_W) - -#define R300_MEM_SCRATCH 2 - -struct r300_memory_manager { - struct { - void *ptr; - uint32_t size; - uint32_t age; - uint32_t h_pending; - int pending; - int mapped; - } *u_list; - int u_head, u_size, u_last; - -}; - -extern void r300_mem_init(r300ContextPtr rmesa); -extern void r300_mem_destroy(r300ContextPtr rmesa); -extern void *r300_mem_ptr(r300ContextPtr rmesa, int id); -extern int r300_mem_find(r300ContextPtr rmesa, void *ptr); -extern int r300_mem_alloc(r300ContextPtr rmesa, int alignment, int size); -extern void r300_mem_use(r300ContextPtr rmesa, int id); -extern unsigned long r300_mem_offset(r300ContextPtr rmesa, int id); -extern void *r300_mem_map(r300ContextPtr rmesa, int id, int access); -extern void r300_mem_unmap(r300ContextPtr rmesa, int id); -extern void r300_mem_free(r300ContextPtr rmesa, int id); - -#endif diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 8f1a6630d5..357c600af9 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1467,6 +1467,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_FORMAT_3D (1 << 25) # define R300_TX_FORMAT_CUBIC_MAP (2 << 25) +# define R300_TX_FORMAT_GAMMA (1 << 21) + /* gap */ /* Floating point formats */ /* Note - hardware supports both 16 and 32 bit floating point */ @@ -1531,6 +1533,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R500_SEL_FILTER4_TC3 (3 << 18) #define R300_TX_OFFSET_0 0x4540 +#define R300_TX_OFFSET_1 0x4544 +#define R300_TX_OFFSET_2 0x4548 +#define R300_TX_OFFSET_3 0x454C +#define R300_TX_OFFSET_4 0x4550 +#define R300_TX_OFFSET_5 0x4554 +#define R300_TX_OFFSET_6 0x4558 +#define R300_TX_OFFSET_7 0x455C /* BEGIN: Guess from R200 */ # define R300_TXO_ENDIAN_NO_SWAP (0 << 0) # define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) @@ -2425,6 +2434,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* Z Buffer Clear Value */ #define R300_ZB_DEPTHCLEARVALUE 0x4f28 +#define R300_ZB_ZMASK_OFFSET 0x4f30 +#define R300_ZB_ZMASK_PITCH 0x4f34 +#define R300_ZB_ZMASK_WRINDEX 0x4f38 +#define R300_ZB_ZMASK_DWORD 0x4f3c +#define R300_ZB_ZMASK_RDINDEX 0x4f40 + /* Hierarchical Z Memory Offset */ #define R300_ZB_HIZ_OFFSET 0x4f44 @@ -3165,6 +3180,9 @@ enum { # define R300_W_SRC_RAS (1 << 2) +/* Packet0 field ordering to write all values to the same reg */ +#define RADEON_ONE_REG_WR (1 << 15) + /* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. * Two parameter dwords: * 0. VAP_VTX_FMT: The first parameter is not written to hardware diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index 16ce4a1199..bf50b062f6 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -50,6 +50,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. * no bugs... */ +#include "r300_render.h" + #include "main/glheader.h" #include "main/state.h" #include "main/imports.h" @@ -66,16 +68,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/t_vp_build.h" #include "radeon_reg.h" #include "radeon_macros.h" -#include "radeon_ioctl.h" -#include "radeon_state.h" #include "r300_context.h" #include "r300_ioctl.h" #include "r300_state.h" #include "r300_reg.h" #include "r300_tex.h" #include "r300_emit.h" -#include "r300_fragprog.h" -extern int future_hw_tcl_on; +#include "r300_fragprog_common.h" +#include "r300_swtcl.h" /** * \brief Convert a OpenGL primitive type into a R300 primitive type. @@ -172,96 +172,186 @@ int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim) return num_verts - verts_off; } -static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts) +static void r300EmitElts(GLcontext * ctx, unsigned long n_elts) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct r300_dma_region *rvb = &rmesa->state.elt_dma; void *out; + GLuint size; - if (r300IsGartMemory(rmesa, elts, n_elts * 4)) { - rvb->address = rmesa->radeon.radeonScreen->gartTextures.map; - rvb->start = ((char *)elts) - rvb->address; - rvb->aos_offset = - rmesa->radeon.radeonScreen->gart_texture_offset + - rvb->start; - return; - } else if (r300IsGartMemory(rmesa, elts, 1)) { - WARN_ONCE("Pointer not within GART memory!\n"); - _mesa_exit(-1); - } - - r300AllocDmaRegion(rmesa, rvb, n_elts * 4, 4); - rvb->aos_offset = GET_START(rvb); + size = ((rmesa->ind_buf.is_32bit ? 4 : 2) * n_elts + 3) & ~3; - out = rvb->address + rvb->start; - memcpy(out, elts, n_elts * 4); + radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo, + &rmesa->radeon.tcl.elt_dma_offset, size, 4); + radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1); + out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset; + memcpy(out, rmesa->ind_buf.ptr, size); + radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo); } -static void r300FireEB(r300ContextPtr rmesa, unsigned long addr, - int vertex_count, int type) +static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type) { - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; - - start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0), 0); - e32(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (vertex_count << 16) | type | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - - start_packet3(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2), 2); - e32(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | - (R300_VAP_PORT_IDX0 >> 2)); - e32(addr); - e32(vertex_count); + BATCH_LOCALS(&rmesa->radeon); + + r300_emit_scissor(rmesa->radeon.glCtx); + if (vertex_count > 0) { + int size; + + BEGIN_BATCH(10); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0); + if (rmesa->ind_buf.is_32bit) { + size = vertex_count; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + ((vertex_count + 0) << 16) | type | + R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + } else { + size = (vertex_count + 1) >> 1; + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | + ((vertex_count + 0) << 16) | type); + } + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset, + rmesa->radeon.tcl.elt_dma_bo, + rmesa->radeon.tcl.elt_dma_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + OUT_BATCH(size); + } else { + OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2); + OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) | + (R300_VAP_PORT_IDX0 >> 2)); + OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset); + OUT_BATCH(size); + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.elt_dma_bo, + RADEON_GEM_DOMAIN_GTT, 0, 0); + } + END_BATCH(); + } } static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset) { + BATCH_LOCALS(&rmesa->radeon); + uint32_t voffset; int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; int i; - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr, offset); - start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1), sz - 1); - e32(nr); + if (!rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH(sz+2+(nr * 2)); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); + + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) | + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[i].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[i+1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } - for (i = 0; i + 1 < nr; i += 2) { - e32((rmesa->state.aos[i].aos_size << 0) | - (rmesa->state.aos[i].aos_stride << 8) | - (rmesa->state.aos[i + 1].aos_size << 16) | - (rmesa->state.aos[i + 1].aos_stride << 24)); + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[nr - 1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + END_BATCH(); + } else { - e32(rmesa->state.aos[i].aos_offset + offset * 4 * rmesa->state.aos[i].aos_stride); - e32(rmesa->state.aos[i + 1].aos_offset + offset * 4 * rmesa->state.aos[i + 1].aos_stride); - } + BEGIN_BATCH(sz+2+(nr * 2)); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); + + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) | + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH(voffset); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + OUT_BATCH(voffset); + } - if (nr & 1) { - e32((rmesa->state.aos[nr - 1].aos_size << 0) | - (rmesa->state.aos[nr - 1].aos_stride << 8)); - e32(rmesa->state.aos[nr - 1].aos_offset + offset * 4 * rmesa->state.aos[nr - 1].aos_stride); + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + OUT_BATCH(voffset); + } + for (i = 0; i + 1 < nr; i += 2) { + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[i+0].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[i+1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + if (nr & 1) { + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[nr-1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + END_BATCH(); } + } static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type) { - int cmd_reserved = 0; - int cmd_written = 0; - drm_radeon_cmd_header_t *cmd = NULL; + BATCH_LOCALS(&rmesa->radeon); - start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); - e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); + r300_emit_scissor(rmesa->radeon.glCtx); + BEGIN_BATCH(3); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type); + END_BATCH(); } -static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, - int start, int end, int prim) +void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); + BATCH_LOCALS(&rmesa->radeon); int type, num_verts; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; type = r300PrimitiveType(rmesa, prim); num_verts = r300NumVerts(rmesa, end - start, prim); @@ -269,7 +359,13 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, if (type < 0 || num_verts <= 0) return; - if (vb->Elts) { + /* Make space for at least 64 dwords. + * This is supposed to ensure that we can get all rendering + * commands into a single command buffer. + */ + rcommonEnsureCmdBufSpace(&rmesa->radeon, 128, __FUNCTION__); + + if (rmesa->ind_buf.ptr) { if (num_verts > 65535) { /* not implemented yet */ WARN_ONCE("Too many elts\n"); @@ -286,114 +382,143 @@ static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx, * allocating the index array might actually evict the vertex * arrays. *sigh* */ - r300EmitElts(ctx, vb->Elts, num_verts); - r300EmitAOS(rmesa, rmesa->state.aos_count, start); - r300FireEB(rmesa, rmesa->state.elt_dma.aos_offset, num_verts, type); + r300EmitElts(ctx, num_verts); + /* don't pass start if we are split up */ + r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0); + if (rmesa->radeon.radeonScreen->kernel_mm) { + BEGIN_BATCH_NO_AUTOSTATE(2); + OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1); + OUT_BATCH(rmesa->radeon.tcl.aos[0].count); + END_BATCH(); + } + r300FireEB(rmesa, num_verts, type); } else { - r300EmitAOS(rmesa, rmesa->state.aos_count, start); + r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start); r300FireAOS(rmesa, num_verts, type); } + COMMIT_BATCH(); } -static GLboolean r300RunRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) +static void r300RunRender(GLcontext * ctx, struct tnl_pipeline_stage *stage) { r300ContextPtr rmesa = R300_CONTEXT(ctx); int i; TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; - if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); r300UpdateShaders(rmesa); - if (r300EmitArrays(ctx)) - return GL_TRUE; + r300EmitArrays(ctx); r300UpdateShaderStates(rmesa); r300EmitCacheFlush(rmesa); - r300EmitState(rmesa); + radeonEmitState(&rmesa->radeon); for (i = 0; i < vb->PrimitiveCount; i++) { GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); GLuint start = vb->Primitive[i].start; GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; - r300RunRenderPrimitive(rmesa, ctx, start, end, prim); + r300RunRenderPrimitive(ctx, start, end, prim); } r300EmitCacheFlush(rmesa); -#ifdef USER_BUFFERS - r300UseArrays(ctx); -#endif + radeonReleaseArrays(ctx, ~0); +} - r300ReleaseArrays(ctx); - return GL_FALSE; +static const char *getFallbackString(uint32_t bit) +{ + switch (bit) { + case R300_FALLBACK_VERTEX_PROGRAM : + return "vertex program"; + case R300_FALLBACK_LINE_SMOOTH: + return "smooth lines"; + case R300_FALLBACK_POINT_SMOOTH: + return "smooth points"; + case R300_FALLBACK_POLYGON_SMOOTH: + return "smooth polygons"; + case R300_FALLBACK_LINE_STIPPLE: + return "line stipple"; + case R300_FALLBACK_POLYGON_STIPPLE: + return "polygon stipple"; + case R300_FALLBACK_STENCIL_TWOSIDE: + return "two-sided stencil"; + case R300_FALLBACK_RENDER_MODE: + return "render mode != GL_RENDER"; + case R300_FALLBACK_FRAGMENT_PROGRAM: + return "fragment program"; + case R300_FALLBACK_AOS_LIMIT: + return "aos limit"; + case R300_FALLBACK_INVALID_BUFFERS: + return "invalid buffers"; + default: + return "unknown"; + } } -#define FALLBACK_IF(expr) \ - do { \ - if (expr) { \ - if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \ - WARN_ONCE("Software fallback:%s\n", \ - #expr); \ - return R300_FALLBACK_RAST; \ - } \ - } while(0) - -static int r300Fallback(GLcontext * ctx) +void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode) { - r300ContextPtr r300 = R300_CONTEXT(ctx); - const unsigned back = ctx->Stencil._BackFace; - - /* Do we need to use new-style shaders? - * Also is there a better way to do this? */ - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - struct r500_fragment_program *fp = (struct r500_fragment_program *) - (char *)ctx->FragmentProgram._Current; - if (fp) { - if (!fp->translated) { - r500TranslateFragmentShader(r300, fp); - FALLBACK_IF(!fp->translated); + TNLcontext *tnl = TNL_CONTEXT(ctx); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + uint32_t old_fallback = rmesa->fallback; + static uint32_t fallback_warn = 0; + + if (mode) { + if ((fallback_warn & bit) == 0) { + _mesa_fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(bit)); + fallback_warn |= bit; + } + rmesa->fallback |= bit; + + /* update only if we change from no tcl fallbacks to some tcl fallbacks */ + if (rmesa->options.hw_tcl_enabled) { + if (((old_fallback & R300_TCL_FALLBACK_MASK) == 0) && + ((bit & R300_TCL_FALLBACK_MASK) > 0)) { + R300_STATECHANGE(rmesa, vap_cntl_status); + rmesa->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS; } } + + /* update only if we change from no raster fallbacks to some raster fallbacks */ + if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) && + ((bit & R300_RASTER_FALLBACK_MASK) > 0)) { + + radeon_firevertices(&rmesa->radeon); + rmesa->radeon.swtcl.RenderIndex = ~0; + _swsetup_Wakeup( ctx ); + } } else { - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; - if (fp) { - if (!fp->translated) { - r300TranslateFragmentShader(r300, fp); - FALLBACK_IF(!fp->translated); + rmesa->fallback &= ~bit; + + /* update only if we have disabled all tcl fallbacks */ + if (rmesa->options.hw_tcl_enabled) { + if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) { + R300_STATECHANGE(rmesa, vap_cntl_status); + rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS; } } - } - FALLBACK_IF(ctx->RenderMode != GL_RENDER); - - /* If GL_EXT_stencil_two_side is disabled, this fallback check can - * be removed. - */ - FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] - || ctx->Stencil.ValueMask[0] != - ctx->Stencil.ValueMask[back] - || ctx->Stencil.WriteMask[0] != - ctx->Stencil.WriteMask[back]); - - if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite) - FALLBACK_IF(ctx->Point.PointSprite); - - if (!r300->disable_lowimpact_fallback) { - FALLBACK_IF(ctx->Polygon.StippleFlag); - FALLBACK_IF(ctx->Multisample._Enabled); - FALLBACK_IF(ctx->Line.StippleFlag); - FALLBACK_IF(ctx->Line.SmoothFlag); - FALLBACK_IF(ctx->Point.SmoothFlag); + /* update only if we have disabled all raster fallbacks */ + if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) { + _swrast_flush( ctx ); + + tnl->Driver.Render.Start = r300RenderStart; + tnl->Driver.Render.Finish = r300RenderFinish; + tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive; + tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple; + tnl->Driver.Render.BuildVertices = _tnl_build_vertices; + tnl->Driver.Render.CopyPV = _tnl_copy_pv; + tnl->Driver.Render.Interp = _tnl_interp; + + _tnl_invalidate_vertex_state( ctx, ~0 ); + _tnl_invalidate_vertices( ctx, ~0 ); + } } - - return R300_FALLBACK_NONE; + } static GLboolean r300RunNonTCLRender(GLcontext * ctx, @@ -404,43 +529,15 @@ static GLboolean r300RunNonTCLRender(GLcontext * ctx, if (RADEON_DEBUG & DEBUG_PRIMS) fprintf(stderr, "%s\n", __FUNCTION__); - if (r300Fallback(ctx) >= R300_FALLBACK_RAST) - return GL_TRUE; - - if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - return GL_TRUE; - - return r300RunRender(ctx, stage); -} - -static GLboolean r300RunTCLRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct r300_vertex_program *vp; - - hw_tcl_on = future_hw_tcl_on; - - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (hw_tcl_on == GL_FALSE) + if (rmesa->fallback & R300_RASTER_FALLBACK_MASK) return GL_TRUE; - if (r300Fallback(ctx) >= R300_FALLBACK_TCL) { - hw_tcl_on = GL_FALSE; + if (rmesa->options.hw_tcl_enabled == GL_FALSE) return GL_TRUE; - } - - r300UpdateShaders(rmesa); - vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); - if (vp->native == GL_FALSE) { - hw_tcl_on = GL_FALSE; - return GL_TRUE; - } + r300RunRender(ctx, stage); - return r300RunRender(ctx, stage); + return GL_FALSE; } const struct tnl_pipeline_stage _r300_render_stage = { @@ -451,12 +548,3 @@ const struct tnl_pipeline_stage _r300_render_stage = { NULL, r300RunNonTCLRender }; - -const struct tnl_pipeline_stage _r300_tcl_stage = { - "r300 Hardware Transform, Clipping and Lighting", - NULL, - NULL, - NULL, - NULL, - r300RunTCLRender -}; diff --git a/src/mesa/drivers/dri/r300/r300_render.h b/src/mesa/drivers/dri/r300/r300_render.h new file mode 100644 index 0000000000..ec785474a6 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_render.h @@ -0,0 +1,69 @@ +/* + * Copyright 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __R300_RENDER_H__ +#define __R300_RENDER_H__ + +#include "main/mtypes.h" + +#define R300_FALLBACK_VERTEX_PROGRAM (1 << 0) +#define R300_TCL_FALLBACK_MASK 0x0000ffff + +#define R300_FALLBACK_LINE_SMOOTH (1 << 16) +#define R300_FALLBACK_POINT_SMOOTH (1 << 17) +#define R300_FALLBACK_POLYGON_SMOOTH (1 << 18) +#define R300_FALLBACK_LINE_STIPPLE (1 << 19) +#define R300_FALLBACK_POLYGON_STIPPLE (1 << 20) +#define R300_FALLBACK_STENCIL_TWOSIDE (1 << 21) +#define R300_FALLBACK_RENDER_MODE (1 << 22) +#define R300_FALLBACK_FRAGMENT_PROGRAM (1 << 23) +#define R300_FALLBACK_AOS_LIMIT (1 << 30) +#define R300_FALLBACK_INVALID_BUFFERS (1 << 31) +#define R300_RASTER_FALLBACK_MASK 0xffff0000 + +#define MASK_XYZW (R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) +#define MASK_X R300_WRITE_ENA_X +#define MASK_Y R300_WRITE_ENA_Y +#define MASK_Z R300_WRITE_ENA_Z +#define MASK_W R300_WRITE_ENA_W + +#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \ + SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \ + SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \ + SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \ + SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \ + SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE +#error Cannot change these! +#endif + +extern const struct tnl_pipeline_stage _r300_render_stage; + +extern void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode); + +extern void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim); + +#endif diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index f30fd986e0..0133b83796 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -1,18 +1,42 @@ +/* + * Copyright 2009 Maciej Cencora <m.cencora@gmail.com> + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ #include "main/glheader.h" #include "shader/program.h" #include "tnl/tnl.h" #include "r300_context.h" -#include "r300_fragprog.h" +#include "r300_fragprog_common.h" static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, GLuint id) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_vertex_program_cont *vp; - struct r300_fragment_program *r300_fp; - struct r500_fragment_program *r500_fp; + struct r300_fragment_program *fp; switch (target) { case GL_VERTEX_STATE_PROGRAM_NV: @@ -20,28 +44,12 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, vp = CALLOC_STRUCT(r300_vertex_program_cont); return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id); - case GL_FRAGMENT_PROGRAM_ARB: - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - r500_fp = CALLOC_STRUCT(r500_fragment_program); - r500_fp->ctx = ctx; - return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, - target, id); - } else { - r300_fp = CALLOC_STRUCT(r300_fragment_program); - return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, - target, id); - } case GL_FRAGMENT_PROGRAM_NV: - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - r500_fp = CALLOC_STRUCT(r500_fragment_program); - return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program, - target, id); - } else { - r300_fp = CALLOC_STRUCT(r300_fragment_program); - return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, - target, id); - } + case GL_FRAGMENT_PROGRAM_ARB: + fp = CALLOC_STRUCT(r300_fragment_program); + return _mesa_init_fragment_program(ctx, &fp->Base, target, id); + default: _mesa_problem(ctx, "Bad target in r300NewProgram"); } @@ -57,20 +65,15 @@ static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog) static void r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_vertex_program_cont *vp = (void *)prog; struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog; - struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog; switch (target) { case GL_VERTEX_PROGRAM_ARB: vp->progs = NULL; break; case GL_FRAGMENT_PROGRAM_ARB: - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - r500_fp->translated = GL_FALSE; - else - r300_fp->translated = GL_FALSE; + r300_fp->translated = GL_FALSE; break; } @@ -81,7 +84,14 @@ r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) static GLboolean r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) { - return GL_TRUE; + if (target == GL_FRAGMENT_PROGRAM_ARB) { + struct r300_fragment_program *fp = (struct r300_fragment_program *)prog; + if (!fp->translated) + r300TranslateFragmentShader(ctx, &fp->Base); + + return !fp->error; + } else + return GL_TRUE; } void r300InitShaderFuncs(struct dd_function_table *functions) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 79f0b3625c..c0eda977db 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/macros.h" #include "main/context.h" #include "main/dd.h" +#include "main/framebuffer.h" #include "main/simple_list.h" #include "main/api_arrayelt.h" #include "main/texformat.h" @@ -52,22 +53,22 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/prog_statevars.h" #include "vbo/vbo.h" #include "tnl/tnl.h" +#include "tnl/t_vp_build.h" -#include "radeon_ioctl.h" -#include "radeon_state.h" #include "r300_context.h" #include "r300_ioctl.h" #include "r300_state.h" #include "r300_reg.h" #include "r300_emit.h" -#include "r300_fragprog.h" #include "r300_tex.h" +#include "r300_fragprog_common.h" +#include "r300_fragprog.h" +#include "r500_fragprog.h" +#include "r300_render.h" +#include "r300_vertprog.h" #include "drirenderbuffer.h" -extern int future_hw_tcl_on; -extern void _tnl_UpdateFixedFunctionProgram(GLcontext * ctx); - static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4]) { r300ContextPtr rmesa = R300_CONTEXT(ctx); @@ -366,7 +367,7 @@ static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) GLint *ip; /* no VAP UCP on non-TCL chipsets */ - if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + if (!rmesa->options.hw_tcl_enabled) return; p = (GLint) plane - (GLint) GL_CLIP_PLANE0; @@ -385,7 +386,7 @@ static void r300SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state) GLuint p; /* no VAP UCP on non-TCL chipsets */ - if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + if (!r300->options.hw_tcl_enabled) return; p = cap - GL_CLIP_PLANE0; @@ -451,24 +452,16 @@ static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state) static GLboolean current_fragment_program_writes_depth(GLcontext* ctx) { - r300ContextPtr r300 = R300_CONTEXT(ctx); + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; - if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; - return (fp && fp->WritesDepth); - } else { - struct r500_fragment_program* fp = - (struct r500_fragment_program*)(char*) - ctx->FragmentProgram._Current; - return (fp && fp->writes_depth); - } + return (fp && fp->writes_depth); } static void r300SetEarlyZState(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); GLuint topZ = R300_ZTOP_ENABLE; + GLuint w_fmt, fgdepthsrc; if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS) topZ = R300_ZTOP_DISABLE; @@ -485,6 +478,26 @@ static void r300SetEarlyZState(GLcontext * ctx) R300_STATECHANGE(r300, zstencil_format); r300->hw.zstencil_format.cmd[2] = topZ; } + + /* w_fmt value is set to get best performance + * see p.130 R5xx 3D acceleration guide v1.3 */ + if (current_fragment_program_writes_depth(ctx)) { + fgdepthsrc = R300_FG_DEPTH_SRC_SHADER; + w_fmt = R300_W_FMT_W24 | R300_W_SRC_US; + } else { + fgdepthsrc = R300_FG_DEPTH_SRC_SCAN; + w_fmt = R300_W_FMT_W0 | R300_W_SRC_US; + } + + if (w_fmt != r300->hw.us_out_fmt.cmd[5]) { + R300_STATECHANGE(r300, us_out_fmt); + r300->hw.us_out_fmt.cmd[5] = w_fmt; + } + + if (fgdepthsrc != r300->hw.fg_depth_src.cmd[1]) { + R300_STATECHANGE(r300, fg_depth_src); + r300->hw.fg_depth_src.cmd[1] = fgdepthsrc; + } } static void r300SetAlphaState(GLcontext * ctx) @@ -535,8 +548,6 @@ static void r300SetAlphaState(GLcontext * ctx) R300_STATECHANGE(r300, at); r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc; r300->hw.at.cmd[R300_AT_UNKNOWN] = 0; - - r300SetEarlyZState(ctx); } static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) @@ -584,15 +595,35 @@ static void r300SetDepthState(GLcontext * ctx) r300->hw.zs.cmd[R300_ZS_CNTL_1] |= translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT; } +} - r300SetEarlyZState(ctx); +static void r300CatchStencilFallback(GLcontext *ctx) +{ + const unsigned back = ctx->Stencil._BackFace; + + if (ctx->Stencil._Enabled && (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back] + || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back] + || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) { + r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_TRUE); + } else { + r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE); + } } static void r300SetStencilState(GLcontext * ctx, GLboolean state) { r300ContextPtr r300 = R300_CONTEXT(ctx); + GLboolean hw_stencil = GL_FALSE; - if (r300->state.stencil.hw_stencil) { + r300CatchStencilFallback(ctx); + + if (ctx->DrawBuffer) { + struct radeon_renderbuffer *rrbStencil + = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); + hw_stencil = (rrbStencil && rrbStencil->bo); + } + + if (hw_stencil) { R300_STATECHANGE(r300, zs); if (state) { r300->hw.zs.cmd[R300_ZS_CNTL_0] |= @@ -601,10 +632,6 @@ static void r300SetStencilState(GLcontext * ctx, GLboolean state) r300->hw.zs.cmd[R300_ZS_CNTL_0] &= ~R300_STENCIL_ENABLE; } - } else { -#if R200_MERGED - FALLBACK(&r300->radeon, RADEON_FALLBACK_STENCIL, state); -#endif } } @@ -737,7 +764,12 @@ static void r300ColorMask(GLcontext * ctx, static void r300PointSize(GLcontext * ctx, GLfloat size) { r300ContextPtr r300 = R300_CONTEXT(ctx); - /* same size limits for AA, non-AA points */ + + /* We need to clamp to user defined range here, because + * the HW clamping happens only for per vertex point size. */ + size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize); + + /* same size limits for AA, non-AA points */ size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize); R300_STATECHANGE(r300, ps); @@ -830,29 +862,33 @@ static void r300ShadeModel(GLcontext * ctx, GLenum mode) R300_STATECHANGE(rmesa, shade); rmesa->hw.shade.cmd[1] = 0x00000002; + R300_STATECHANGE(rmesa, shade2); switch (mode) { case GL_FLAT: - rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_FLAT; + rmesa->hw.shade2.cmd[1] = R300_RE_SHADE_MODEL_FLAT; break; case GL_SMOOTH: - rmesa->hw.shade.cmd[2] = R300_RE_SHADE_MODEL_SMOOTH; + rmesa->hw.shade2.cmd[1] = R300_RE_SHADE_MODEL_SMOOTH; break; default: return; } - rmesa->hw.shade.cmd[3] = 0x00000000; - rmesa->hw.shade.cmd[4] = 0x00000000; + rmesa->hw.shade2.cmd[2] = 0x00000000; + rmesa->hw.shade2.cmd[3] = 0x00000000; } static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, GLenum func, GLint ref, GLuint mask) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - GLuint refmask = - ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT) - | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT); - const unsigned back = ctx->Stencil._BackFace; + GLuint refmask; GLuint flag; + const unsigned back = ctx->Stencil._BackFace; + + r300CatchStencilFallback(ctx); + + refmask = ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT) + | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT); R300_STATECHANGE(rmesa, zs); rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK; @@ -880,6 +916,8 @@ static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) { r300ContextPtr rmesa = R300_CONTEXT(ctx); + r300CatchStencilFallback(ctx); + R300_STATECHANGE(rmesa, zs); rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &= ~(R300_STENCILREF_MASK << @@ -896,6 +934,8 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, r300ContextPtr rmesa = R300_CONTEXT(ctx); const unsigned back = ctx->Stencil._BackFace; + r300CatchStencilFallback(ctx); + R300_STATECHANGE(rmesa, zs); /* It is easier to mask what's left.. */ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= @@ -924,28 +964,32 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, * Window position and viewport transformation */ -/* - * To correctly position primitives: - */ -#define SUBPIXEL_X 0.125 -#define SUBPIXEL_Y 0.125 - static void r300UpdateWindow(GLcontext * ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->radeon.dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; const GLfloat *v = ctx->Viewport._WindowMap.m; + const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + GLfloat y_scale, y_bias; + + if (render_to_fbo) { + y_scale = 1.0; + y_bias = 0; + } else { + y_scale = -1.0; + y_bias = yoffset; + } GLfloat sx = v[MAT_SX]; - GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X; - GLfloat sy = -v[MAT_SY]; - GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y; - GLfloat sz = v[MAT_SZ] * rmesa->state.depth.scale; - GLfloat tz = v[MAT_TZ] * rmesa->state.depth.scale; + GLfloat tx = v[MAT_TX] + xoffset; + GLfloat sy = v[MAT_SY] * y_scale; + GLfloat ty = (v[MAT_TY] * y_scale) + y_bias; + GLfloat sz = v[MAT_SZ] * depthScale; + GLfloat tz = v[MAT_TZ] * depthScale; - R300_FIREVERTICES(rmesa); R300_STATECHANGE(rmesa, vpt); rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx); @@ -964,6 +1008,8 @@ static void r300Viewport(GLcontext * ctx, GLint x, GLint y, * values, or keep the originals hanging around. */ r300UpdateWindow(ctx); + + radeon_viewport(ctx, x, y, width, height); } static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) @@ -974,13 +1020,13 @@ static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) void r300UpdateViewportOffset(GLcontext * ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = ((radeonContextPtr) rmesa)->dri.drawable; + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = (GLfloat) dPriv->x; GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h; const GLfloat *v = ctx->Viewport._WindowMap.m; - GLfloat tx = v[MAT_TX] + xoffset + SUBPIXEL_X; - GLfloat ty = (-v[MAT_TY]) + yoffset + SUBPIXEL_Y; + GLfloat tx = v[MAT_TX] + xoffset; + GLfloat ty = (-v[MAT_TY]) + yoffset; if (rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] != r300PackFloat32(tx) || rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] != r300PackFloat32(ty)) { @@ -996,64 +1042,6 @@ void r300UpdateViewportOffset(GLcontext * ctx) radeonUpdateScissor(ctx); } -/** - * Tell the card where to render (offset, pitch). - * Effected by glDrawBuffer, etc - */ -void r300UpdateDrawBuffer(GLcontext * ctx) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - r300ContextPtr r300 = rmesa; - struct gl_framebuffer *fb = ctx->DrawBuffer; - driRenderbuffer *drb; - - if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { - /* draw to front */ - drb = - (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT]. - Renderbuffer; - } else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { - /* draw to back */ - drb = - (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT]. - Renderbuffer; - } else { - /* drawing to multiple buffers, or none */ - return; - } - - assert(drb); - assert(drb->flippedPitch); - - R300_STATECHANGE(rmesa, cb); - - r300->hw.cb.cmd[R300_CB_OFFSET] = drb->flippedOffset + //r300->radeon.state.color.drawOffset + - r300->radeon.radeonScreen->fbLocation; - r300->hw.cb.cmd[R300_CB_PITCH] = drb->flippedPitch; //r300->radeon.state.color.drawPitch; - - if (r300->radeon.radeonScreen->cpp == 4) - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; - else - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; - - if (r300->radeon.sarea->tiling_enabled) - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; -#if 0 - R200_STATECHANGE(rmesa, ctx); - - /* Note: we used the (possibly) page-flipped values */ - rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] - = ((drb->flippedOffset + rmesa->r200Screen->fbLocation) - & R200_COLOROFFSET_MASK); - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch; - - if (rmesa->sarea->tiling_enabled) { - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= - R200_COLOR_TILE_ENABLE; - } -#endif -} - static void r300FetchStateParameter(GLcontext * ctx, const gl_state_index state[STATE_LENGTH], @@ -1064,12 +1052,14 @@ r300FetchStateParameter(GLcontext * ctx, switch (state[0]) { case STATE_INTERNAL: switch (state[1]) { - case STATE_R300_WINDOW_DIMENSION: - value[0] = r300->radeon.dri.drawable->w * 0.5f; /* width*0.5 */ - value[1] = r300->radeon.dri.drawable->h * 0.5f; /* height*0.5 */ - value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ - value[3] = 1.0F; /* not used */ - break; + case STATE_R300_WINDOW_DIMENSION: { + __DRIdrawablePrivate * drawable = radeon_get_drawable(&r300->radeon); + value[0] = drawable->w * 0.5f; /* width*0.5 */ + value[1] = drawable->h * 0.5f; /* height*0.5 */ + value[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */ + value[3] = 1.0F; /* not used */ + break; + } case STATE_R300_TEXRECT_FACTOR:{ struct gl_texture_object *t = @@ -1109,14 +1099,14 @@ void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state) struct gl_program_parameter_list *paramList; GLuint i; - if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM))) + if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS))) return; fp = (struct r300_fragment_program *)ctx->FragmentProgram._Current; if (!fp) return; - paramList = fp->mesa_program.Base.Parameters; + paramList = fp->Base.Base.Parameters; if (!paramList) return; @@ -1235,9 +1225,8 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { r300ContextPtr r300 = R300_CONTEXT(ctx); int i; - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; - struct r300_fragment_program_code *code = &fp->code; + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + struct r300_fragment_program_code *code = &fp->code.r300; R300_STATECHANGE(r300, fpt); @@ -1271,15 +1260,15 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) } r300->hw.fpt.cmd[R300_FPT_CMD_0] = - cmdpacket0(R300_US_TEX_INST_0, code->tex.length); + cmdpacket0(r300->radeon.radeonScreen, + R300_US_TEX_INST_0, code->tex.length); } static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) { int i; - struct r500_fragment_program *fp = (struct r500_fragment_program *) - (char *)ctx->FragmentProgram._Current; - struct r500_fragment_program_code *code = &fp->code; + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; + struct r500_fragment_program_code *code = &fp->code.r500; /* find all the texture instructions and relocate the texture units */ for (i = 0; i < code->inst_end + 1; i++) { @@ -1322,7 +1311,7 @@ static GLuint translate_lod_bias(GLfloat bias) static void r300SetupTextures(GLcontext * ctx) { int i, mtu; - struct r300_tex_obj *t; + struct radeon_tex_obj *t; r300ContextPtr r300 = R300_CONTEXT(ctx); int hw_tmu = 0; int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */ @@ -1356,21 +1345,16 @@ static void r300SetupTextures(GLcontext * ctx) /* We cannot let disabled tmu offsets pass DRM */ for (i = 0; i < mtu; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { - -#if 0 /* Enables old behaviour */ - hw_tmu = i; -#endif tmu_mappings[i] = hw_tmu; - t = (r300TexObjPtr) r300->state.texture.unit[i].texobj->DriverData; - /* XXX questionable fix for bug 9170: */ + t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); if (!t) continue; - if ((t->format & 0xffffff00) == 0xffffff00) { + if ((t->pp_txformat & 0xffffff00) == 0xffffff00) { WARN_ONCE ("unknown texture format (entry %x) encountered. Help me !\n", - t->format & 0xff); + t->pp_txformat & 0xff); } if (RADEON_DEBUG & DEBUG_STATE) @@ -1381,29 +1365,28 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + hw_tmu] = - gen_fixed_filter(t->filter) | (hw_tmu << 28); + gen_fixed_filter(t->pp_txfilter) | (hw_tmu << 28); /* Note: There is a LOD bias per texture unit and a LOD bias * per texture object. We add them here to get the correct behaviour. * (The per-texture object LOD bias was introduced in OpenGL 1.4 * and is not present in the EXT_texture_object extension). */ r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = - t->filter_1 | - translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.tObj->LodBias); + t->pp_txfilter_1 | + translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias); r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = - t->size; + t->pp_txsize; r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + - hw_tmu] = t->format; + hw_tmu] = t->pp_txformat; r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] = - t->pitch_reg; - r300->hw.tex.offset.cmd[R300_TEX_VALUE_0 + - hw_tmu] = t->offset; + t->pp_txpitch; + r300->hw.textures[hw_tmu] = t; - if (t->offset & R300_TXO_MACRO_TILE) { + if (t->tile_bits & R300_TXO_MACRO_TILE) { WARN_ONCE("macro tiling enabled!\n"); } - if (t->offset & R300_TXO_MICRO_TILE) { + if (t->tile_bits & R300_TXO_MICRO_TILE) { WARN_ONCE("micro tiling enabled!\n"); } @@ -1420,37 +1403,33 @@ static void r300SetupTextures(GLcontext * ctx) } r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_FILTER0_0, last_hw_tmu + 1); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1); r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_FILTER1_0, last_hw_tmu + 1); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, last_hw_tmu + 1); r300->hw.tex.size.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_SIZE_0, last_hw_tmu + 1); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, last_hw_tmu + 1); r300->hw.tex.format.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_FORMAT_0, last_hw_tmu + 1); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, last_hw_tmu + 1); r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_FORMAT2_0, last_hw_tmu + 1); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, last_hw_tmu + 1); r300->hw.tex.offset.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_OFFSET_0, last_hw_tmu + 1); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, last_hw_tmu + 1); r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_CHROMA_KEY_0, last_hw_tmu + 1); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, last_hw_tmu + 1); r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); - - if (!fp) /* should only happenen once, just after context is created */ - return; + cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1); if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) { - if (fp->mesa_program.UsesKill && last_hw_tmu < 0) { + if (fp->Base.UsesKill && last_hw_tmu < 0) { // The KILL operation requires the first texture unit // to be enabled. r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1; r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0; r300->hw.tex.filter.cmd[R300_TEX_CMD_0] = - cmdpacket0(R300_TX_FILTER0_0, 1); + cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 1); } - r300SetupFragmentShaderTextures(ctx, tmu_mappings); - } else - r500SetupFragmentShaderTextures(ctx, tmu_mappings); + } + r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings); if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n", @@ -1469,26 +1448,21 @@ union r300_outputs_written { static void r300SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; union r300_outputs_written OutputsWritten; GLuint InputsRead; int fp_reg, high_rr; int col_ip, tex_ip; int rs_tex_count = 0; - int i, count, col_fmt; + int i, col_fmt, hw_tcl_on; + + hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten; else - RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - if (ctx->FragmentProgram._Current) - InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; - else { - fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); - return; /* This should only ever happen once.. */ - } + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -1507,15 +1481,7 @@ static void r300SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL0) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; - if (count == 4) - col_fmt = R300_RS_COL_FMT_RGBA; - else if (count == 3) - col_fmt = R300_RS_COL_FMT_RGB1; - else - col_fmt = R300_RS_COL_FMT_0001; - - r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt); + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg); InputsRead &= ~FRAG_BIT_COL0; ++col_ip; @@ -1527,15 +1493,7 @@ static void r300SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; - if (count == 4) - col_fmt = R300_RS_COL_FMT_RGBA; - else if (count == 3) - col_fmt = R300_RS_COL_FMT_RGB1; - else - col_fmt = R300_RS_COL_FMT_0001; - - r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt); + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA); r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg); InputsRead &= ~FRAG_BIT_COL1; ++col_ip; @@ -1545,6 +1503,7 @@ static void r300SetupRSUnit(GLcontext * ctx) } } + /* We always route 4 texcoord components */ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) continue; @@ -1554,33 +1513,27 @@ static void r300SetupRSUnit(GLcontext * ctx) continue; } - int swiz; - - /* with TCL we always seem to route 4 components */ - if (hw_tcl_on) - count = 4; - else - count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; - - switch(count) { - case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break; - case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; - default: - case 1: - case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break; - }; - - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); InputsRead &= ~(FRAG_BIT_TEX0 << i); - rs_tex_count += count; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; + } + + if (InputsRead & FRAG_BIT_WPOS) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_WPOS; + rs_tex_count += 4; ++tex_ip; ++fp_reg; } if (InputsRead & FRAG_BIT_FOGC) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(R300_RS_SEL_K0) | R300_RS_SEL_R(R300_RS_SEL_K0); + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_Q(R300_RS_SEL_K1) | R300_RS_TEX_PTR(rs_tex_count); r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); InputsRead &= ~FRAG_BIT_FOGC; rs_tex_count += 4; @@ -1591,27 +1544,19 @@ static void r300SetupRSUnit(GLcontext * ctx) } } - if (InputsRead & FRAG_BIT_WPOS) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count); - r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg); - InputsRead &= ~FRAG_BIT_WPOS; - rs_tex_count += 4; - ++tex_ip; - ++fp_reg; - } - InputsRead &= ~FRAG_BIT_WPOS; - /* Setup default color if no color or tex was set */ if (rs_tex_count == 0 && col_ip == 0) { - r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001); + r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_ADDR(0); + r300->hw.ri.cmd[R300_RI_INTERP_0] = R300_RS_COL_PTR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001); ++col_ip; } high_rr = (col_ip > tex_ip) ? col_ip : tex_ip; - r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; + r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; r300->hw.rc.cmd[2] |= high_rr - 1; - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, high_rr); if (InputsRead) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); @@ -1620,26 +1565,21 @@ static void r300SetupRSUnit(GLcontext * ctx) static void r500SetupRSUnit(GLcontext * ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *VB = &tnl->vb; union r300_outputs_written OutputsWritten; GLuint InputsRead; int fp_reg, high_rr; int col_ip, tex_ip; int rs_tex_count = 0; - int i, count, col_fmt; + int i, col_fmt, hw_tcl_on; + + hw_tcl_on = r300->options.hw_tcl_enabled; if (hw_tcl_on) - OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten; + OutputsWritten.vp_outputs = r300->selected_vp->key.OutputsWritten; else - RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->state.render_inputs_bitset); + RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset); - if (ctx->FragmentProgram._Current) - InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; - else { - fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); - return; /* This should only ever happen once.. */ - } + InputsRead = ctx->FragmentProgram._Current->Base.InputsRead; R300_STATECHANGE(r300, ri); R300_STATECHANGE(r300, rc); @@ -1658,15 +1598,7 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL0) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size; - if (count == 4) - col_fmt = R300_RS_COL_FMT_RGBA; - else if (count == 3) - col_fmt = R300_RS_COL_FMT_RGB1; - else - col_fmt = R300_RS_COL_FMT_0001; - - r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt); + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg); InputsRead &= ~FRAG_BIT_COL0; ++col_ip; @@ -1678,15 +1610,7 @@ static void r500SetupRSUnit(GLcontext * ctx) if (InputsRead & FRAG_BIT_COL1) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) { - count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size; - if (count == 4) - col_fmt = R300_RS_COL_FMT_RGBA; - else if (count == 3) - col_fmt = R300_RS_COL_FMT_RGB1; - else - col_fmt = R300_RS_COL_FMT_0001; - - r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt); + r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA); r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg); InputsRead &= ~FRAG_BIT_COL1; ++col_ip; @@ -1696,7 +1620,7 @@ static void r500SetupRSUnit(GLcontext * ctx) } } - + /* We always route 4 texcoord components */ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (! ( InputsRead & FRAG_BIT_TEX(i) ) ) continue; @@ -1706,55 +1630,37 @@ static void r500SetupRSUnit(GLcontext * ctx) continue; } - int swiz = 0; - - /* with TCL we always seem to route 4 components */ - if (hw_tcl_on) - count = 4; - else - count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size; - - if (count == 4) { - swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; - swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; - swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT; - swiz |= (rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT; - } else if (count == 3) { - swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; - swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; - swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT; - swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; - } else if (count == 2) { - swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; - swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT; - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; - swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; - } else if (count == 1) { - swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT; - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; - swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; - } else { - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT; - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT; - swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT; - swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT; - } + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | + ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | + ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | + ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz; r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); InputsRead &= ~(FRAG_BIT_TEX0 << i); - rs_tex_count += count; + rs_tex_count += 4; + ++tex_ip; + ++fp_reg; + } + + if (InputsRead & FRAG_BIT_WPOS) { + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | + ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | + ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | + ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); + + r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); + InputsRead &= ~FRAG_BIT_WPOS; + rs_tex_count += 4; ++tex_ip; ++fp_reg; } if (InputsRead & FRAG_BIT_FOGC) { if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | - ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | - ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | - ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); + r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= (rs_tex_count << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT); r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); InputsRead &= ~FRAG_BIT_FOGC; @@ -1766,87 +1672,27 @@ static void r500SetupRSUnit(GLcontext * ctx) } } - if (InputsRead & FRAG_BIT_WPOS) { - r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) | - ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) | - ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) | - ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT); - - r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg); - InputsRead &= ~FRAG_BIT_WPOS; - rs_tex_count += 4; - ++tex_ip; - ++fp_reg; - } - /* Setup default color if no color or tex was set */ if (rs_tex_count == 0 && col_ip == 0) { - r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001); + r300->hw.rr.cmd[R300_RR_INST_0] = R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_ADDR(0); + r300->hw.ri.cmd[R300_RI_INTERP_0] = R500_RS_COL_PTR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001); ++col_ip; } high_rr = (col_ip > tex_ip) ? col_ip : tex_ip; - r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; - r300->hw.rc.cmd[2] |= 0xC0 | (high_rr - 1); + r300->hw.rc.cmd[1] = (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN; + r300->hw.rc.cmd[2] = 0xC0 | (high_rr - 1); - r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr); + r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr); + r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, high_rr); if (InputsRead) WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead); } - - - -#define bump_vpu_count(ptr, new_count) do{\ - drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\ - int _nc=(new_count)/4; \ - assert(_nc < 256); \ - if(_nc>_p->vpu.count)_p->vpu.count=_nc;\ - }while(0) - -static INLINE void r300SetupVertexProgramFragment(r300ContextPtr r300, int dest, struct r300_vertex_shader_fragment *vsf) -{ - int i; - - if (vsf->length == 0) - return; - - if (vsf->length & 0x3) { - fprintf(stderr, "VERTEX_SHADER_FRAGMENT must have length divisible by 4\n"); - _mesa_exit(-1); - } - - switch ((dest >> 8) & 0xf) { - case 0: - R300_STATECHANGE(r300, vpi); - for (i = 0; i < vsf->length; i++) - r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); - bump_vpu_count(r300->hw.vpi.cmd, vsf->length + 4 * (dest & 0xff)); - break; - - case 2: - R300_STATECHANGE(r300, vpp); - for (i = 0; i < vsf->length; i++) - r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); - bump_vpu_count(r300->hw.vpp.cmd, vsf->length + 4 * (dest & 0xff)); - break; - case 4: - R300_STATECHANGE(r300, vps); - for (i = 0; i < vsf->length; i++) - r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (vsf->body.d[i]); - bump_vpu_count(r300->hw.vps.cmd, vsf->length + 4 * (dest & 0xff)); - break; - default: - fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); - _mesa_exit(-1); - } -} - #define MIN3(a, b, c) ((a) < (b) ? MIN2(a, c) : MIN2(b, c)) - -static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, +void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count) { int vtx_mem_size; @@ -1870,7 +1716,7 @@ static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count); R300_STATECHANGE(rmesa, vap_cntl); - if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + if (rmesa->options.hw_tcl_enabled) { rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) | (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) | @@ -1900,114 +1746,6 @@ static void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, } -static void r300SetupDefaultVertexProgram(r300ContextPtr rmesa) -{ - struct r300_vertex_shader_state *prog = &(rmesa->state.vertex_shader); - GLuint o_reg = 0; - GLuint i_reg = 0; - int i; - int inst_count = 0; - int param_count = 0; - int program_end = 0; - - for (i = VERT_ATTRIB_POS; i < VERT_ATTRIB_MAX; i++) { - if (rmesa->state.sw_tcl_inputs[i] != -1) { - prog->program.body.i[program_end + 0] = PVS_OP_DST_OPERAND(VE_MULTIPLY, GL_FALSE, GL_FALSE, o_reg++, VSF_FLAG_ALL, PVS_DST_REG_OUT); - prog->program.body.i[program_end + 1] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); - prog->program.body.i[program_end + 2] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); - prog->program.body.i[program_end + 3] = PVS_SRC_OPERAND(rmesa->state.sw_tcl_inputs[i], PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_SELECT_FORCE_1, PVS_SRC_REG_INPUT, VSF_FLAG_NONE); - program_end += 4; - i_reg++; - } - } - - prog->program.length = program_end; - - r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, - &(prog->program)); - inst_count = (prog->program.length / 4) - 1; - - r300VapCntl(rmesa, i_reg, o_reg, 0); - - R300_STATECHANGE(rmesa, pvs); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (0 << R300_PVS_FIRST_INST_SHIFT) | - (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | - (inst_count << R300_PVS_LAST_INST_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | - (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); -} - -static int bit_count (int x) -{ - x = ((x & 0xaaaaaaaaU) >> 1) + (x & 0x55555555U); - x = ((x & 0xccccccccU) >> 2) + (x & 0x33333333U); - x = (x >> 16) + (x & 0xffff); - x = ((x & 0xf0f0) >> 4) + (x & 0x0f0f); - return (x >> 8) + (x & 0x00ff); -} - -static void r300SetupRealVertexProgram(r300ContextPtr rmesa) -{ - GLcontext *ctx = rmesa->radeon.glCtx; - struct r300_vertex_program *prog = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx); - int inst_count = 0; - int param_count = 0; - - /* FIXME: r300SetupVertexProgramFragment */ - R300_STATECHANGE(rmesa, vpp); - param_count = - r300VertexProgUpdateParams(ctx, - (struct r300_vertex_program_cont *) - ctx->VertexProgram._Current, - (float *)&rmesa->hw.vpp. - cmd[R300_VPP_PARAM_0]); - bump_vpu_count(rmesa->hw.vpp.cmd, param_count); - param_count /= 4; - - r300SetupVertexProgramFragment(rmesa, R300_PVS_CODE_START, &(prog->program)); - inst_count = (prog->program.length / 4) - 1; - - r300VapCntl(rmesa, bit_count(prog->key.InputsRead), - bit_count(prog->key.OutputsWritten), prog->num_temporaries); - - R300_STATECHANGE(rmesa, pvs); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = - (0 << R300_PVS_FIRST_INST_SHIFT) | - (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | - (inst_count << R300_PVS_LAST_INST_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = - (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | - (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); - rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = - (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); -} - -static void r300SetupVertexProgram(r300ContextPtr rmesa) -{ - GLcontext *ctx = rmesa->radeon.glCtx; - - /* Reset state, in case we don't use something */ - ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; - ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; - ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; - - /* Not sure why this doesnt work... - 0x400 area might have something to do with pixel shaders as it appears right after pfs programming. - 0x406 is set to { 0.0, 0.0, 1.0, 0.0 } most of the time but should change with smooth points and in other rare cases. */ - //setup_vertex_shader_fragment(rmesa, 0x406, &unk4); - if (hw_tcl_on && ((struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx))->translated) { - r300SetupRealVertexProgram(rmesa); - } else { - /* FIXME: This needs to be replaced by vertex shader generation code. */ - r300SetupDefaultVertexProgram(rmesa); - } - -} - /** * Enable/Disable states. * @@ -2015,20 +1753,13 @@ static void r300SetupVertexProgram(r300ContextPtr rmesa) */ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr(cap), state ? "GL_TRUE" : "GL_FALSE"); switch (cap) { - case GL_TEXTURE_1D: - case GL_TEXTURE_2D: - case GL_TEXTURE_3D: - /* empty */ - break; - case GL_FOG: - /* empty */ - break; case GL_ALPHA_TEST: r300SetAlphaState(ctx); break; @@ -2046,22 +1777,46 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) case GL_CLIP_PLANE5: r300SetClipPlaneState(ctx, cap, state); break; + case GL_CULL_FACE: + r300UpdateCulling(ctx); + break; case GL_DEPTH_TEST: r300SetDepthState(ctx); break; - case GL_STENCIL_TEST: - r300SetStencilState(ctx, state); + case GL_LINE_SMOOTH: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_LINE_SMOOTH, ctx->Line.SmoothFlag); break; - case GL_CULL_FACE: - r300UpdateCulling(ctx); + case GL_LINE_STIPPLE: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_LINE_STIPPLE, ctx->Line.StippleFlag); + break; + case GL_POINT_SMOOTH: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_POINT_SMOOTH, ctx->Point.SmoothFlag); + break; + case GL_POLYGON_SMOOTH: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_SMOOTH, ctx->Polygon.SmoothFlag); + break; + case GL_POLYGON_STIPPLE: + if (rmesa->options.conformance_mode) + r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_STIPPLE, ctx->Polygon.StippleFlag); break; case GL_POLYGON_OFFSET_POINT: case GL_POLYGON_OFFSET_LINE: case GL_POLYGON_OFFSET_FILL: r300SetPolygonOffsetState(ctx, state); break; + case GL_SCISSOR_TEST: + radeon_firevertices(&rmesa->radeon); + rmesa->radeon.state.scissor.enabled = state; + radeonUpdateScissor( ctx ); + break; + case GL_STENCIL_TEST: + r300SetStencilState(ctx, state); + break; default: - radeonEnable(ctx, cap, state); break; } } @@ -2072,15 +1827,14 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state) static void r300ResetHwState(r300ContextPtr r300) { GLcontext *ctx = r300->radeon.glCtx; - int has_tcl = 1; + int has_tcl; - if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) - has_tcl = 0; + has_tcl = r300->options.hw_tcl_enabled; if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "%s\n", __FUNCTION__); - r300UpdateWindow(ctx); + radeon_firevertices(&r300->radeon); r300ColorMask(ctx, ctx->Color.ColorMask[RCOMP], @@ -2102,8 +1856,6 @@ static void r300ResetHwState(r300ContextPtr r300) r300UpdateCulling(ctx); - r300UpdateTextureState(ctx); - r300SetBlendState(ctx); r300SetLogicOpState(ctx); @@ -2182,8 +1934,8 @@ static void r300ResetHwState(r300ContextPtr r300) } /* XXX: Enable anti-aliasing? */ - r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE; - r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = 0; + r300->hw.gb_misc2.cmd[R300_GB_MISC2_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE; + r300->hw.gb_misc2.cmd[R300_GB_MISC2_SELECT] = 0; r300->hw.ga_point_s0.cmd[1] = r300PackFloat32(0.0); r300->hw.ga_point_s0.cmd[2] = r300PackFloat32(0.0); @@ -2242,20 +1994,6 @@ static void r300ResetHwState(r300ContextPtr r300) r300BlendColor(ctx, ctx->Color.BlendColor); - /* Again, r300ClearBuffer uses this */ - r300->hw.cb.cmd[R300_CB_OFFSET] = - r300->radeon.state.color.drawOffset + - r300->radeon.radeonScreen->fbLocation; - r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch; - - if (r300->radeon.radeonScreen->cpp == 4) - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; - else - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; - - if (r300->radeon.sarea->tiling_enabled) - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; - r300->hw.rb3d_dither_ctl.cmd[1] = 0; r300->hw.rb3d_dither_ctl.cmd[2] = 0; r300->hw.rb3d_dither_ctl.cmd[3] = 0; @@ -2268,44 +2006,18 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.rb3d_aaresolve_ctl.cmd[1] = 0; - r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000; - r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff; - - r300->hw.zb.cmd[R300_ZB_OFFSET] = - r300->radeon.radeonScreen->depthOffset + - r300->radeon.radeonScreen->fbLocation; - r300->hw.zb.cmd[R300_ZB_PITCH] = r300->radeon.radeonScreen->depthPitch; - - if (r300->radeon.sarea->tiling_enabled) { - /* XXX: Turn off when clearing buffers ? */ - r300->hw.zb.cmd[R300_ZB_PITCH] |= R300_DEPTHMACROTILE_ENABLE; - - if (ctx->Visual.depthBits == 24) - r300->hw.zb.cmd[R300_ZB_PITCH] |= - R300_DEPTHMICROTILE_TILED; - } + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000; + r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff; r300->hw.zb_depthclearvalue.cmd[1] = 0; - switch (ctx->Visual.depthBits) { - case 16: - r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_16BIT_INT_Z; - break; - case 24: - r300->hw.zstencil_format.cmd[1] = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; - break; - default: - fprintf(stderr, "Error: Unsupported depth %d... exiting\n", ctx->Visual.depthBits); - _mesa_exit(-1); - } - r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE; r300->hw.zstencil_format.cmd[3] = 0x00000003; r300->hw.zstencil_format.cmd[4] = 0x00000000; r300SetEarlyZState(ctx); - r300->hw.unk4F30.cmd[1] = 0; - r300->hw.unk4F30.cmd[2] = 0; + r300->hw.zb_zmask.cmd[1] = 0; + r300->hw.zb_zmask.cmd[2] = 0; r300->hw.zb_hiz_offset.cmd[1] = 0; @@ -2319,20 +2031,26 @@ static void r300ResetHwState(r300ContextPtr r300) r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0; } - r300->hw.all_dirty = GL_TRUE; + r300->radeon.hw.all_dirty = GL_TRUE; } void r300UpdateShaders(r300ContextPtr rmesa) { GLcontext *ctx; - struct r300_vertex_program *vp; + struct r300_fragment_program *fp; int i; ctx = rmesa->radeon.glCtx; + fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; - if (rmesa->NewGLState && hw_tcl_on) { - rmesa->NewGLState = 0; + /* should only happenen once, just after context is created */ + /* TODO: shouldn't we fallback to sw here? */ + if (!fp) { + _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); + return; + } + if (rmesa->radeon.NewGLState && rmesa->options.hw_tcl_enabled) { for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { rmesa->temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i]; @@ -2348,20 +2066,16 @@ void r300UpdateShaders(r300ContextPtr rmesa) } r300SelectVertexShader(rmesa); - vp = (struct r300_vertex_program *) - CURRENT_VERTEX_SHADER(ctx); - /*if (vp->translated == GL_FALSE) - r300TranslateVertexShader(vp); */ - if (vp->translated == GL_FALSE) { - fprintf(stderr, "Failing back to sw-tcl\n"); - hw_tcl_on = future_hw_tcl_on = 0; - r300ResetHwState(rmesa); - - r300UpdateStateParameters(ctx, _NEW_PROGRAM); - return; - } + r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, rmesa->selected_vp->error); } - r300UpdateStateParameters(ctx, _NEW_PROGRAM); + + if (!fp->translated || rmesa->radeon.NewGLState) + r300TranslateFragmentShader(ctx, ctx->FragmentProgram._Current); + + r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error); + + r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); + rmesa->radeon.NewGLState = 0; } static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, @@ -2385,35 +2099,23 @@ static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, } -static void r300SetupPixelShader(r300ContextPtr rmesa) +static void r300SetupPixelShader(GLcontext *ctx) { - GLcontext *ctx = rmesa->radeon.glCtx; - struct r300_fragment_program *fp = (struct r300_fragment_program *) - (char *)ctx->FragmentProgram._Current; + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; struct r300_fragment_program_code *code; int i, k; - if (!fp) /* should only happenen once, just after context is created */ - return; - - r300TranslateFragmentShader(rmesa, fp); - if (!fp->translated) { - fprintf(stderr, "%s: No valid fragment shader, exiting\n", - __FUNCTION__); - return; - } - code = &fp->code; - - r300SetupTextures(ctx); + code = &fp->code.r300; R300_STATECHANGE(rmesa, fpi[0]); R300_STATECHANGE(rmesa, fpi[1]); R300_STATECHANGE(rmesa, fpi[2]); R300_STATECHANGE(rmesa, fpi[3]); - rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, code->alu.length); - rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, code->alu.length); - rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, code->alu.length); - rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); + rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, code->alu.length); + rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, code->alu.length); + rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length); + rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length); for (i = 0; i < code->alu.length; i++) { rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0; rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1; @@ -2444,10 +2146,10 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) } R300_STATECHANGE(rmesa, fpp); - rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4); + rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, code->const_nr * 4); for (i = 0; i < code->const_nr; i++) { const GLfloat *constant = get_fragmentprogram_constant(ctx, - &fp->mesa_program.Base, code->constant[i]); + &fp->Base.Base, code->constant[i]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]); rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]); @@ -2469,29 +2171,17 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\ } while(0) -static void r500SetupPixelShader(r300ContextPtr rmesa) +static void r500SetupPixelShader(GLcontext *ctx) { - GLcontext *ctx = rmesa->radeon.glCtx; - struct r500_fragment_program *fp = (struct r500_fragment_program *) - (char *)ctx->FragmentProgram._Current; + r300ContextPtr rmesa = R300_CONTEXT(ctx); + struct r300_fragment_program *fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; int i; struct r500_fragment_program_code *code; - if (!fp) /* should only happenen once, just after context is created */ - return; - ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0; ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0; - r500TranslateFragmentShader(rmesa, fp); - if (!fp->translated) { - fprintf(stderr, "%s: No valid fragment shader, exiting\n", - __FUNCTION__); - return; - } - code = &fp->code; - - r300SetupTextures(ctx); + code = &fp->code.r500; R300_STATECHANGE(rmesa, fp); rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx; @@ -2521,58 +2211,96 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) R300_STATECHANGE(rmesa, r500fp_const); for (i = 0; i < code->const_nr; i++) { const GLfloat *constant = get_fragmentprogram_constant(ctx, - &fp->mesa_program.Base, code->constant[i]); + &fp->Base.Base, code->constant[i]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]); rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]); } bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4); +} + +void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten) +{ + r300ContextPtr rmesa = R300_CONTEXT( ctx ); + struct vertex_attribute *attrs = rmesa->vbuf.attribs; + int i, j, reg_count; + uint32_t *vir0 = &rmesa->hw.vir[0].cmd[1]; + uint32_t *vir1 = &rmesa->hw.vir[1].cmd[1]; + + for (i = 0; i < R300_VIR_CMDSIZE-1; ++i) + vir0[i] = vir1[i] = 0; + + for (i = 0, j = 0; i < rmesa->vbuf.num_attribs; ++i) { + int tmp; + + tmp = attrs[i].data_type | (attrs[i].dst_loc << R300_DST_VEC_LOC_SHIFT); + if (attrs[i]._signed) + tmp |= R300_SIGNED; + if (attrs[i].normalize) + tmp |= R300_NORMALIZE; + + if (i % 2 == 0) { + vir0[j] = tmp << R300_DATA_TYPE_0_SHIFT; + vir1[j] = attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT); + } else { + vir0[j] |= tmp << R300_DATA_TYPE_1_SHIFT; + vir1[j] |= (attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT; + ++j; + } + } + + reg_count = (rmesa->vbuf.num_attribs + 1) >> 1; + if (rmesa->vbuf.num_attribs % 2 != 0) { + vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT; + } else { + vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT; + } + + R300_STATECHANGE(rmesa, vir[0]); + R300_STATECHANGE(rmesa, vir[1]); + R300_STATECHANGE(rmesa, vof); + R300_STATECHANGE(rmesa, vic); + + if (rmesa->radeon.radeonScreen->kernel_mm) { + rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF; + rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF; + rmesa->hw.vir[0].cmd[0] |= (reg_count & 0x3FFF) << 16; + rmesa->hw.vir[1].cmd[0] |= (reg_count & 0x3FFF) << 16; + } else { + ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = reg_count; + ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = reg_count; + } + rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); + rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); + rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead); + rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten, ctx->FragmentProgram._Current->Base.InputsRead); } void r300UpdateShaderStates(r300ContextPtr rmesa) { GLcontext *ctx; ctx = rmesa->radeon.glCtx; + struct r300_fragment_program *r300_fp; - r300UpdateTextureState(ctx); - r300SetEarlyZState(ctx); + r300_fp = (struct r300_fragment_program *) ctx->FragmentProgram._Current; - /* w_fmt value is set to get best performance - * see p.130 R5xx 3D acceleration guide v1.3 */ - GLuint w_fmt, fgdepthsrc; - if (current_fragment_program_writes_depth(ctx)) { - fgdepthsrc = R300_FG_DEPTH_SRC_SHADER; - w_fmt = R300_W_FMT_W24 | R300_W_SRC_US; - } else { - fgdepthsrc = R300_FG_DEPTH_SRC_SCAN; - w_fmt = R300_W_FMT_W0 | R300_W_SRC_US; - } + /* should only happenen once, just after context is created */ + if (!r300_fp) + return; - if (w_fmt != rmesa->hw.us_out_fmt.cmd[5]) { - R300_STATECHANGE(rmesa, us_out_fmt); - rmesa->hw.us_out_fmt.cmd[5] = w_fmt; - } + r300SetEarlyZState(ctx); - if (fgdepthsrc != rmesa->hw.fg_depth_src.cmd[1]) { - R300_STATECHANGE(rmesa, fg_depth_src); - rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc; - } + r300SetupTextures(ctx); - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - r500SetupPixelShader(rmesa); - else - r300SetupPixelShader(rmesa); + rmesa->vtbl.SetupPixelShader(ctx); - if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) - r500SetupRSUnit(ctx); - else - r300SetupRSUnit(ctx); + rmesa->vtbl.SetupRSUnit(ctx); - if ((rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) + if (rmesa->options.hw_tcl_enabled) { r300SetupVertexProgram(rmesa); - + } } /** @@ -2586,15 +2314,18 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) _swsetup_InvalidateState(ctx, new_state); _vbo_InvalidateState(ctx, new_state); _tnl_InvalidateState(ctx, new_state); - _ae_invalidate_state(ctx, new_state); - if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { - r300UpdateDrawBuffer(ctx); + if (new_state & _NEW_BUFFERS) { + _mesa_update_framebuffer(ctx); + /* this updates the DrawBuffer's Width/Height if it's a FBO */ + _mesa_update_draw_buffer_bounds(ctx); + + R300_STATECHANGE(r300, cb); } r300UpdateStateParameters(ctx, new_state); - r300->NewGLState |= new_state; + r300->radeon.NewGLState |= new_state; } /** @@ -2604,58 +2335,12 @@ static void r300InvalidateState(GLcontext * ctx, GLuint new_state) */ void r300InitState(r300ContextPtr r300) { - GLcontext *ctx = r300->radeon.glCtx; - GLuint depth_fmt; - - radeonInitState(&r300->radeon); - - switch (ctx->Visual.depthBits) { - case 16: - r300->state.depth.scale = 1.0 / (GLfloat) 0xffff; - depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z; - break; - case 24: - r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff; - depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; - break; - default: - fprintf(stderr, "Error: Unsupported depth %d... exiting\n", - ctx->Visual.depthBits); - _mesa_exit(-1); - } - - /* Only have hw stencil when depth buffer is 24 bits deep */ - r300->state.stencil.hw_stencil = (ctx->Visual.stencilBits > 0 && - ctx->Visual.depthBits == 24); - - memset(&(r300->state.texture), 0, sizeof(r300->state.texture)); - r300ResetHwState(r300); } static void r300RenderMode(GLcontext * ctx, GLenum mode) { - r300ContextPtr rmesa = R300_CONTEXT(ctx); - (void)rmesa; - (void)mode; -} - -void r300UpdateClipPlanes( GLcontext *ctx ) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - GLuint p; - - for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { - if (ctx->Transform.ClipPlanesEnabled & (1 << p)) { - GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; - - R300_STATECHANGE( rmesa, vpucp[p] ); - rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0]; - rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1]; - rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2]; - rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3]; - } - } + r300SwitchFallback(ctx, R300_FALLBACK_RENDER_MODE, ctx->RenderMode != GL_RENDER); } /** @@ -2663,7 +2348,6 @@ void r300UpdateClipPlanes( GLcontext *ctx ) */ void r300InitStateFuncs(struct dd_function_table *functions) { - radeonInitStateFuncs(functions); functions->UpdateState = r300InvalidateState; functions->AlphaFunc = r300AlphaFunc; @@ -2699,4 +2383,25 @@ void r300InitStateFuncs(struct dd_function_table *functions) functions->RenderMode = r300RenderMode; functions->ClipPlane = r300ClipPlane; + functions->Scissor = radeonScissor; + + functions->DrawBuffer = radeonDrawBuffer; + functions->ReadBuffer = radeonReadBuffer; +} + +void r300InitShaderFunctions(r300ContextPtr r300) +{ + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + r300->vtbl.SetupRSUnit = r500SetupRSUnit; + r300->vtbl.SetupPixelShader = r500SetupPixelShader; + r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures; + r300->vtbl.BuildFragmentProgramHwCode = r500BuildFragmentProgramHwCode; + r300->vtbl.FragmentProgramDump = r500FragmentProgramDump; + } else { + r300->vtbl.SetupRSUnit = r300SetupRSUnit; + r300->vtbl.SetupPixelShader = r300SetupPixelShader; + r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures; + r300->vtbl.BuildFragmentProgramHwCode = r300BuildFragmentProgramHwCode; + r300->vtbl.FragmentProgramDump = r300FragmentProgramDump; + } } diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h index 0589ab7cad..2328289420 100644 --- a/src/mesa/drivers/dri/r300/r300_state.h +++ b/src/mesa/drivers/dri/r300/r300_state.h @@ -39,42 +39,25 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_NEWPRIM( rmesa ) \ do { \ - if ( rmesa->dma.flush ) \ - rmesa->dma.flush( rmesa ); \ + if ( rmesa->radeon.dma.flush ) \ + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \ } while (0) #define R300_STATECHANGE(r300, atom) \ do { \ R300_NEWPRIM(r300); \ r300->hw.atom.dirty = GL_TRUE; \ - r300->hw.is_dirty = GL_TRUE; \ + r300->radeon.hw.is_dirty = GL_TRUE; \ } while(0) -#define R300_PRINT_STATE(r300, atom) \ - r300PrintStateAtom(r300, &r300->hw.atom) - -/* Fire the buffered vertices no matter what. - TODO: This has not been implemented yet - */ -#define R300_FIREVERTICES( r300 ) \ -do { \ - \ - if ( (r300)->cmdbuf.count_used || (r300)->dma.flush ) { \ - r300Flush( (r300)->radeon.glCtx ); \ - } \ - \ -} while (0) - -// r300_state.c -extern int future_hw_tcl_on; -void _tnl_UpdateFixedFunctionProgram (GLcontext * ctx); void r300UpdateViewportOffset (GLcontext * ctx); void r300UpdateDrawBuffer (GLcontext * ctx); void r300UpdateStateParameters (GLcontext * ctx, GLuint new_state); void r300UpdateShaders (r300ContextPtr rmesa); void r300UpdateShaderStates (r300ContextPtr rmesa); void r300InitState (r300ContextPtr r300); -void r300UpdateClipPlanes (GLcontext * ctx); void r300InitStateFuncs (struct dd_function_table *functions); +void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count); +void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten); #endif /* __R300_STATE_H__ */ diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c index ba3621b16b..ce4179208e 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.c +++ b/src/mesa/drivers/dri/r300/r300_swtcl.c @@ -28,362 +28,237 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* * Authors: * Dave Airlie <airlied@linux.ie> + * Maciej Cencora <m.cencora@gmail.com> */ -/* derived from r200 swtcl path */ - - - -#include "main/glheader.h" -#include "main/mtypes.h" -#include "main/colormac.h" -#include "main/enums.h" -#include "main/image.h" -#include "main/imports.h" -#include "main/light.h" -#include "main/macros.h" - -#include "swrast/s_context.h" -#include "swrast/s_fog.h" -#include "swrast_setup/swrast_setup.h" -#include "math/m_translate.h" #include "tnl/tnl.h" -#include "tnl/t_context.h" #include "tnl/t_pipeline.h" -#include "r300_context.h" -#include "r300_swtcl.h" #include "r300_state.h" -#include "r300_ioctl.h" +#include "r300_swtcl.h" #include "r300_emit.h" -#include "r300_mem.h" - -static void flush_last_swtcl_prim( r300ContextPtr rmesa ); +#include "r300_tex.h" +#include "r300_render.h" - -void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset); -void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr); #define EMIT_ATTR( ATTR, STYLE ) \ do { \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \ - rmesa->swtcl.vertex_attr_count++; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ } while (0) #define EMIT_PAD( N ) \ do { \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \ - rmesa->swtcl.vertex_attr_count++; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ +} while (0) + +#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask, _normalize) \ +do { \ + attrs[num_attrs].element = (_attr); \ + attrs[num_attrs].data_type = (_format); \ + attrs[num_attrs].dst_loc = (_dst_loc); \ + attrs[num_attrs].swizzle = (_swizzle); \ + attrs[num_attrs].write_mask = (_write_mask); \ + attrs[num_attrs]._signed = 0; \ + attrs[num_attrs].normalize = (_normalize); \ + ++num_attrs; \ } while (0) -static void r300SetVertexFormat( GLcontext *ctx ) +void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_OutputsWritten) { r300ContextPtr rmesa = R300_CONTEXT( ctx ); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; - DECLARE_RENDERINPUTS(index_bitset); - GLuint InputsRead = 0, OutputsWritten = 0; - int vap_fmt_1 = 0; - int offset = 0; - int vte = 0; - int fog_id; - GLint inputs[VERT_ATTRIB_MAX]; - GLint tab[VERT_ATTRIB_MAX]; - int swizzle[VERT_ATTRIB_MAX][4]; - GLuint i, nr; - GLuint sz; - - DECLARE_RENDERINPUTS(render_inputs_bitset); - RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset); - RENDERINPUTS_COPY( index_bitset, tnl->render_inputs_bitset ); - RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset); - - vte = rmesa->hw.vte.cmd[1]; - vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT); - /* Important: - */ - if ( VB->NdcPtr != NULL ) { - VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; - vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT; - } - else { - VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; - vte |= R300_VTX_W0_FMT; - } - - assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); - rmesa->swtcl.vertex_attr_count = 0; - - /* EMIT_ATTR's must be in order as they tell t_vertex.c how to - * build up a hardware vertex. - */ - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS)) { - sz = VB->AttribPtr[VERT_ATTRIB_POS]->size; - InputsRead |= 1 << VERT_ATTRIB_POS; - OutputsWritten |= 1 << VERT_RESULT_HPOS; - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 ); - offset = sz; - } else { - offset = 4; - EMIT_PAD(4 * sizeof(float)); - } -/* - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) { - EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); - offset += 1; - } -*/ - if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_COLOR0)) { - sz = VB->AttribPtr[VERT_ATTRIB_COLOR0]->size; - rmesa->swtcl.coloroffset = offset; + int first_free_tex = 0; + GLuint InputsRead = 0; + GLuint OutputsWritten = 0; + int num_attrs = 0; + GLuint fp_reads = ctx->FragmentProgram._Current->Base.InputsRead; + struct vertex_attribute *attrs = rmesa->vbuf.attribs; + + rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0; + rmesa->radeon.swtcl.vertex_attr_count = 0; + + /* We always want non Ndc coords format */ + VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr; + + /* Always write position vector */ + InputsRead |= 1 << VERT_ATTRIB_POS; + OutputsWritten |= 1 << VERT_RESULT_HPOS; + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); + ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW, 0); + rmesa->swtcl.coloroffset = 4; + + if (fp_reads & FRAG_BIT_COL0) { InputsRead |= 1 << VERT_ATTRIB_COLOR0; OutputsWritten |= 1 << VERT_RESULT_COL0; - EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_1F + sz - 1 ); - offset += sz; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1); +#endif } - rmesa->swtcl.specoffset = 0; - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_COLOR1 )) { - sz = VB->AttribPtr[VERT_ATTRIB_COLOR1]->size; - rmesa->swtcl.specoffset = offset; - EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_1F + sz - 1 ); + if (fp_reads & FRAG_BIT_COL1) { + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); InputsRead |= 1 << VERT_ATTRIB_COLOR1; OutputsWritten |= 1 << VERT_RESULT_COL1; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1); +#endif + rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1; } - fog_id = -1; - if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_FOG)) { - /* find first free tex coord slot */ - if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { - int i; - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { - fog_id = i; - break; - } - } - } else { - fog_id = 0; - } - - if (fog_id == -1) { - fprintf(stderr, "\tout of free texcoords to do fog\n"); - _mesa_exit(-1); + if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { + VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->ColorPtr[1]; + OutputsWritten |= 1 << VERT_RESULT_BFC0; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1); +#endif + if (fp_reads & FRAG_BIT_COL1) { + VB->AttribPtr[VERT_ATTRIB_GENERIC1] = VB->SecondaryColorPtr[1]; + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + OutputsWritten |= 1 << VERT_RESULT_BFC1; +#if MESA_LITTLE_ENDIAN + EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA ); + ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1); +#else + EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR ); + ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1); +#endif } + } - sz = VB->AttribPtr[VERT_ATTRIB_FOG]->size; - EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F + sz - 1); - InputsRead |= 1 << VERT_ATTRIB_FOG; - OutputsWritten |= 1 << VERT_RESULT_FOGC; - vap_fmt_1 |= sz << (3 * fog_id); + if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) { + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); + InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE; + OutputsWritten |= 1 << VERT_RESULT_PSIZ; + EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F ); + ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0); } - if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { + /** + * Sending only one texcoord component may lead to lock up, + * so for all textures always output 4 texcoord components to RS. + */ + { int i; - + GLuint swiz, format, hw_format; for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { - sz = VB->TexCoordPtr[i]->size; + if (fp_reads & FRAG_BIT_TEX(i)) { + switch (VB->TexCoordPtr[i]->size) { + case 1: + format = EMIT_1F; + hw_format = R300_DATA_TYPE_FLOAT_1; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE); + break; + case 2: + format = EMIT_2F; + hw_format = R300_DATA_TYPE_FLOAT_2; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE); + break; + case 3: + format = EMIT_3F; + hw_format = R300_DATA_TYPE_FLOAT_3; + swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE); + break; + case 4: + format = EMIT_4F; + hw_format = R300_DATA_TYPE_FLOAT_4; + swiz = SWIZZLE_XYZW; + break; + default: + continue; + } InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i); OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i); - EMIT_ATTR( _TNL_ATTRIB_TEX0+i, EMIT_1F + sz - 1 ); - vap_fmt_1 |= sz << (3 * i); + EMIT_ATTR(_TNL_ATTRIB_TEX(i), format); + ADD_ATTR(VERT_ATTRIB_TEX0 + i, hw_format, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0); + ++first_free_tex; } } } /* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */ - if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS) && (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_WPOS)) { - int first_free_tex = -1; - if (fog_id >= 0) { - first_free_tex = fog_id+1; - } else { - if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) { - int i; - for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { - if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) { - first_free_tex = i; - break; - } - } - } else { - first_free_tex = 0; - } - } - - if (first_free_tex == -1) { + if (fp_reads & FRAG_BIT_WPOS) { + if (first_free_tex >= ctx->Const.MaxTextureUnits) { fprintf(stderr, "\tout of free texcoords to write w pos\n"); _mesa_exit(-1); } - sz = VB->AttribPtr[VERT_ATTRIB_POS]->size; InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex); OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex); - EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 ); - vap_fmt_1 |= sz << (3 * first_free_tex); - } - - for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { - if (InputsRead & (1 << i)) { - inputs[i] = nr++; - } else { - inputs[i] = -1; - } + EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F ); + ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW, 0); + ++first_free_tex; } - /* Fixed, apply to vir0 only */ - if (InputsRead & (1 << VERT_ATTRIB_POS)) - inputs[VERT_ATTRIB_POS] = 0; - if (InputsRead & (1 << VERT_ATTRIB_COLOR0)) - inputs[VERT_ATTRIB_COLOR0] = 2; - if (InputsRead & (1 << VERT_ATTRIB_COLOR1)) - inputs[VERT_ATTRIB_COLOR1] = 3; - if (InputsRead & (1 << VERT_ATTRIB_FOG)) - inputs[VERT_ATTRIB_FOG] = 6 + fog_id; - for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++) - if (InputsRead & (1 << i)) - inputs[i] = 6 + (i - VERT_ATTRIB_TEX0); - - for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) { - if (InputsRead & (1 << i)) { - tab[nr++] = i; + if (fp_reads & FRAG_BIT_FOGC) { + if (first_free_tex >= ctx->Const.MaxTextureUnits) { + fprintf(stderr, "\tout of free texcoords to write fog coordinate\n"); + _mesa_exit(-1); } - } - - for (i = 0; i < nr; i++) { - int ci; - swizzle[i][0] = SWIZZLE_ZERO; - swizzle[i][1] = SWIZZLE_ZERO; - swizzle[i][2] = SWIZZLE_ZERO; - swizzle[i][3] = SWIZZLE_ONE; - - for (ci = 0; ci < VB->AttribPtr[tab[i]]->size; ci++) { - swizzle[i][ci] = ci; - } + InputsRead |= 1 << VERT_ATTRIB_FOG; + OutputsWritten |= 1 << VERT_RESULT_FOGC; + GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO); + EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F ); + ADD_ATTR(VERT_ATTRIB_FOG, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0); } R300_NEWPRIM(rmesa); - R300_STATECHANGE(rmesa, vir[0]); - ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = - r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0], - VB->AttribPtr, inputs, tab, nr); - R300_STATECHANGE(rmesa, vir[1]); - ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = - r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle, - nr); - - R300_STATECHANGE(rmesa, vic); - rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead); - rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead); - - R300_STATECHANGE(rmesa, vof); - rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten); - rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = vap_fmt_1; - - rmesa->swtcl.vertex_size = - _tnl_install_attrs( ctx, - rmesa->swtcl.vertex_attrs, - rmesa->swtcl.vertex_attr_count, - NULL, 0 ); - - rmesa->swtcl.vertex_size /= 4; + rmesa->vbuf.num_attribs = num_attrs; + *_InputsRead = InputsRead; + *_OutputsWritten = OutputsWritten; - RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); - - - R300_STATECHANGE(rmesa, vte); - rmesa->hw.vte.cmd[1] = vte; - rmesa->hw.vte.cmd[2] = rmesa->swtcl.vertex_size; + RENDERINPUTS_COPY(rmesa->render_inputs_bitset, tnl->render_inputs_bitset); } - -/* Flush vertices in the current dma region. - */ -static void flush_last_swtcl_prim( r300ContextPtr rmesa ) +static void r300PrepareVertices(GLcontext *ctx) { - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); - - rmesa->dma.flush = NULL; - - if (rmesa->dma.current.buf) { - struct r300_dma_region *current = &rmesa->dma.current; - GLuint current_offset = GET_START(current); - - assert (current->start + - rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == - current->ptr); - - if (rmesa->dma.current.start != rmesa->dma.current.ptr) { - - r300EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size + (12*sizeof(int)), __FUNCTION__); - - r300EmitState(rmesa); - - r300EmitVertexAOS( rmesa, - rmesa->swtcl.vertex_size, - current_offset); + r300ContextPtr rmesa = R300_CONTEXT(ctx); + GLuint InputsRead, OutputsWritten; - r300EmitVbufPrim( rmesa, - rmesa->swtcl.hw_primitive, - rmesa->swtcl.numverts); + r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten); + r300SetupVAP(ctx, InputsRead, OutputsWritten); - r300EmitCacheFlush(rmesa); - } + rmesa->radeon.swtcl.vertex_size = + _tnl_install_attrs( ctx, + rmesa->radeon.swtcl.vertex_attrs, + rmesa->radeon.swtcl.vertex_attr_count, + NULL, 0 ); - rmesa->swtcl.numverts = 0; - current->start = current->ptr; - } + rmesa->radeon.swtcl.vertex_size /= 4; } -/* Alloc space in the current dma region. - */ -static void * -r300AllocDmaLowVerts( r300ContextPtr rmesa, int nverts, int vsize ) -{ - GLuint bytes = vsize * nverts; - - if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) - r300RefillCurrentDmaRegion( rmesa, bytes); - - if (!rmesa->dma.flush) { - rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; - rmesa->dma.flush = flush_last_swtcl_prim; - } - - ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); - ASSERT( rmesa->dma.flush == flush_last_swtcl_prim ); - ASSERT( rmesa->dma.current.start + - rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == - rmesa->dma.current.ptr ); - - { - GLubyte *head = (GLubyte *) (rmesa->dma.current.address + rmesa->dma.current.ptr); - rmesa->dma.current.ptr += bytes; - rmesa->swtcl.numverts += nverts; - return head; - } -} static GLuint reduced_prim[] = { - GL_POINTS, - GL_LINES, - GL_LINES, - GL_LINES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, + GL_POINTS, + GL_LINES, + GL_LINES, + GL_LINES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, }; static void r300RasterPrimitive( GLcontext *ctx, GLuint prim ); -static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); -//static void r300ResetLineStipple( GLcontext *ctx ); /*********************************************************************** * Emit primitives as inline vertices * @@ -405,15 +280,13 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); #undef LOCAL_VARS #undef ALLOC_VERTS #define CTX_ARG r300ContextPtr rmesa -#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size -#define ALLOC_VERTS( n, size ) r300AllocDmaLowVerts( rmesa, n, size * 4 ) +#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size +#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 ) #define LOCAL_VARS \ r300ContextPtr rmesa = R300_CONTEXT(ctx); \ - const char *r300verts = (char *)rmesa->swtcl.verts; + const char *r300verts = (char *)rmesa->radeon.swtcl.verts; #define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int))) #define VERTEX r300Vertex -#define DO_DEBUG_VERTS (1 && (RADEON_DEBUG & DEBUG_VERTS)) -#define PRINT_VERTEX(x) #undef TAG #define TAG(x) r300_##x #include "tnl_dd/t_dd_triemit.h" @@ -433,9 +306,8 @@ static void r300RenderPrimitive( GLcontext *ctx, GLenum prim ); * Build render functions from dd templates * ***********************************************************************/ -#define R300_TWOSIDE_BIT 0x01 -#define R300_UNFILLED_BIT 0x02 -#define R300_MAX_TRIFUNC 0x04 +#define R300_UNFILLED_BIT 0x01 +#define R300_MAX_TRIFUNC 0x02 static struct { tnl_points_func points; @@ -446,9 +318,9 @@ static struct { #define DO_FALLBACK 0 #define DO_UNFILLED (IND & R300_UNFILLED_BIT) -#define DO_TWOSIDE (IND & R300_TWOSIDE_BIT) +#define DO_TWOSIDE 0 #define DO_FLAT 0 -#define DO_OFFSET 0 +#define DO_OFFSET 0 #define DO_TRI 1 #define DO_QUAD 1 #define DO_LINE 1 @@ -468,33 +340,39 @@ static struct { #define VERT_Y(_v) _v->v.y #define VERT_Z(_v) _v->v.z #define AREA_IS_CCW( a ) (a < 0) -#define GET_VERTEX(e) (rmesa->swtcl.verts + (e*rmesa->swtcl.vertex_size*sizeof(int))) - -/* Only used to pull back colors into vertices (ie, we know color is - * floating point). - */ -#define R300_COLOR( dst, src ) \ -do { \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[3], (src)[3]); \ +#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int))) + +#define VERT_SET_RGBA( v, c ) \ +do { \ + r300_color_t *color = (r300_color_t *)&((v)->ui[coloroffset]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \ + UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \ } while (0) -#define VERT_SET_RGBA( v, c ) if (coloroffset) R300_COLOR( v->ub4[coloroffset], c ) -#define VERT_COPY_RGBA( v0, v1 ) if (coloroffset) v0->ui[coloroffset] = v1->ui[coloroffset] -#define VERT_SAVE_RGBA( idx ) if (coloroffset) color[idx] = v[idx]->ui[coloroffset] -#define VERT_RESTORE_RGBA( idx ) if (coloroffset) v[idx]->ui[coloroffset] = color[idx] +#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset] + +#define VERT_SET_SPEC( v0, c ) \ +do { \ + if (specoffset) { \ + UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]); \ + UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]); \ + UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]); \ + } \ +} while (0) -#define R300_SPEC( dst, src ) \ -do { \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[0], (src)[2]); \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[1], (src)[1]); \ - UNCLAMPED_FLOAT_TO_UBYTE((dst)[2], (src)[0]); \ +#define VERT_COPY_SPEC( v0, v1 ) \ +do { \ + if (specoffset) { \ + v0->v.specular.red = v1->v.specular.red; \ + v0->v.specular.green = v1->v.specular.green; \ + v0->v.specular.blue = v1->v.specular.blue; \ + } \ } while (0) -#define VERT_SET_SPEC( v, c ) if (specoffset) R300_SPEC( v->ub4[specoffset], c ) -#define VERT_COPY_SPEC( v0, v1 ) if (specoffset) COPY_3V(v0->ub4[specoffset], v1->ub4[specoffset]) +#define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset] +#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx] #define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset] #define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx] @@ -514,7 +392,7 @@ do { \ ***********************************************************************/ #define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] ) -#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive +#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive #undef TAG #define TAG(x) x #include "tnl_dd/t_dd_unfilled.h" @@ -530,26 +408,15 @@ do { \ #define TAG(x) x #include "tnl_dd/t_dd_tritmp.h" -#define IND (R300_TWOSIDE_BIT) -#define TAG(x) x##_twoside -#include "tnl_dd/t_dd_tritmp.h" - #define IND (R300_UNFILLED_BIT) #define TAG(x) x##_unfilled #include "tnl_dd/t_dd_tritmp.h" -#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT) -#define TAG(x) x##_twoside_unfilled -#include "tnl_dd/t_dd_tritmp.h" - - static void init_rast_tab( void ) { init(); - init_twoside(); init_unfilled(); - init_twoside_unfilled(); } /**********************************************************************/ @@ -571,8 +438,8 @@ static void init_rast_tab( void ) #undef LOCAL_VARS #define LOCAL_VARS \ r300ContextPtr rmesa = R300_CONTEXT(ctx); \ - const GLuint vertsize = rmesa->swtcl.vertex_size; \ - const char *r300verts = (char *)rmesa->swtcl.verts; \ + const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \ + const char *r300verts = (char *)rmesa->radeon.swtcl.verts; \ const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ const GLboolean stipple = ctx->Line.StippleFlag; \ (void) elt; (void) stipple; @@ -601,10 +468,9 @@ static void r300ChooseRenderState( GLcontext *ctx ) GLuint index = 0; GLuint flags = ctx->_TriangleCaps; - if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT; if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT; - if (index != rmesa->swtcl.RenderIndex) { + if (index != rmesa->radeon.swtcl.RenderIndex) { tnl->Driver.Render.Points = rast_tab[index].points; tnl->Driver.Render.Line = rast_tab[index].line; tnl->Driver.Render.ClippedLine = rast_tab[index].line; @@ -621,30 +487,32 @@ static void r300ChooseRenderState( GLcontext *ctx ) tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; } - rmesa->swtcl.RenderIndex = index; + rmesa->radeon.swtcl.RenderIndex = index; } } -static void r300RenderStart(GLcontext *ctx) +void r300RenderStart(GLcontext *ctx) { - r300ContextPtr rmesa = R300_CONTEXT( ctx ); + r300ContextPtr rmesa = R300_CONTEXT( ctx ); r300ChooseRenderState(ctx); - r300SetVertexFormat(ctx); + r300PrepareVertices(ctx); + + r300ValidateBuffers(ctx); r300UpdateShaders(rmesa); r300UpdateShaderStates(rmesa); r300EmitCacheFlush(rmesa); - if (rmesa->dma.flush != 0 && - rmesa->dma.flush != flush_last_swtcl_prim) - rmesa->dma.flush( rmesa ); - + /* investigate if we can put back flush optimisation if needed */ + if (rmesa->radeon.dma.flush != NULL) { + rmesa->radeon.dma.flush(ctx); + } } -static void r300RenderFinish(GLcontext *ctx) +void r300RenderFinish(GLcontext *ctx) { } @@ -652,28 +520,26 @@ static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim ) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - if (rmesa->swtcl.hw_primitive != hwprim) { - R300_NEWPRIM( rmesa ); - rmesa->swtcl.hw_primitive = hwprim; + if (rmesa->radeon.swtcl.hw_primitive != hwprim) { + R300_NEWPRIM( rmesa ); + rmesa->radeon.swtcl.hw_primitive = hwprim; } } -static void r300RenderPrimitive(GLcontext *ctx, GLenum prim) +void r300RenderPrimitive(GLcontext *ctx, GLenum prim) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - rmesa->swtcl.render_primitive = prim; + rmesa->radeon.swtcl.render_primitive = prim; if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED)) - return; + return; r300RasterPrimitive( ctx, reduced_prim[prim] ); } -static void r300ResetLineStipple(GLcontext *ctx) +void r300ResetLineStipple(GLcontext *ctx) { - - } void r300InitSwtcl(GLcontext *ctx) @@ -699,50 +565,68 @@ void r300InitSwtcl(GLcontext *ctx) _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, 48 * sizeof(GLfloat) ); - rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; - rmesa->swtcl.RenderIndex = ~0; - rmesa->swtcl.render_primitive = GL_TRIANGLES; - rmesa->swtcl.hw_primitive = 0; + rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; + rmesa->radeon.swtcl.RenderIndex = ~0; + rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES; + rmesa->radeon.swtcl.hw_primitive = 0; _tnl_invalidate_vertex_state( ctx, ~0 ); _tnl_invalidate_vertices( ctx, ~0 ); - RENDERINPUTS_ZERO( rmesa->tnl_index_bitset ); _tnl_need_projected_coords( ctx, GL_FALSE ); - r300ChooseRenderState(ctx); } void r300DestroySwtcl(GLcontext *ctx) { } -void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, GLuint offset) +static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset) { - int cmd_reserved = 0; - int cmd_written = 0; + BATCH_LOCALS(&rmesa->radeon); - drm_radeon_cmd_header_t *cmd = NULL; if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", - __FUNCTION__, vertex_size, offset); - - start_packet3(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2), 2); - e32(1); - e32(vertex_size | (vertex_size << 8)); - e32(offset); + fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n", + __FUNCTION__, vertex_size, offset); + + BEGIN_BATCH(7); + OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2); + OUT_BATCH(1); + OUT_BATCH(vertex_size | (vertex_size << 8)); + OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); } -void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) +static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr) { - - int cmd_reserved = 0; - int cmd_written = 0; + BATCH_LOCALS(&rmesa->radeon); int type, num_verts; - drm_radeon_cmd_header_t *cmd = NULL; type = r300PrimitiveType(rmesa, primitive); num_verts = r300NumVerts(rmesa, vertex_nr, primitive); - start_packet3(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0), 0); - e32(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); + BEGIN_BATCH(3); + OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0); + OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type); + END_BATCH(); +} + +void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset) +{ + r300ContextPtr rmesa = R300_CONTEXT(ctx); + + rcommonEnsureCmdBufSpace(&rmesa->radeon, + rmesa->radeon.hw.max_state_size + (12*sizeof(int)), + __FUNCTION__); + radeonEmitState(&rmesa->radeon); + r300_emit_scissor(ctx); + r300EmitVertexAOS(rmesa, + rmesa->radeon.swtcl.vertex_size, + rmesa->radeon.dma.current, + current_offset); + + r300EmitVbufPrim(rmesa, + rmesa->radeon.swtcl.hw_primitive, + rmesa->radeon.swtcl.numverts); + r300EmitCacheFlush(rmesa); + COMMIT_BATCH(); } diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.h b/src/mesa/drivers/dri/r300/r300_swtcl.h index 55df53c1ad..c271d26546 100644 --- a/src/mesa/drivers/dri/r300/r300_swtcl.h +++ b/src/mesa/drivers/dri/r300/r300_swtcl.h @@ -39,7 +39,27 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast/swrast.h" #include "r300_context.h" +/* + * Here are definitions of OVM locations of vertex attributes for non TCL hw + */ +#define SWTCL_OVM_POS 0 +#define SWTCL_OVM_COLOR0 2 +#define SWTCL_OVM_COLOR1 3 +#define SWTCL_OVM_COLOR2 4 +#define SWTCL_OVM_COLOR3 5 +#define SWTCL_OVM_TEX(n) ((n) + 6) +#define SWTCL_OVM_POINT_SIZE 15 + +extern void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *InputsRead, GLuint *OutputsWritten); + extern void r300InitSwtcl( GLcontext *ctx ); extern void r300DestroySwtcl( GLcontext *ctx ); +extern void r300RenderStart(GLcontext *ctx); +extern void r300RenderFinish(GLcontext *ctx); +extern void r300RenderPrimitive(GLcontext *ctx, GLenum prim); +extern void r300ResetLineStipple(GLcontext *ctx); + +extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset); + #endif diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 7c699ec572..0af5bb4f46 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/enums.h" #include "main/image.h" +#include "main/mipmap.h" #include "main/simple_list.h" #include "main/texformat.h" #include "main/texstore.h" @@ -49,6 +50,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" #include "r300_state.h" #include "r300_ioctl.h" +#include "radeon_mipmap_tree.h" #include "r300_tex.h" #include "xmlpool.h" @@ -77,20 +79,20 @@ static unsigned int translate_wrap_mode(GLenum wrapmode) * * \param t Texture object whose wrap modes are to be set */ -static void r300UpdateTexWrap(r300TexObjPtr t) +static void r300UpdateTexWrap(radeonTexObjPtr t) { - struct gl_texture_object *tObj = t->base.tObj; + struct gl_texture_object *tObj = &t->base; - t->filter &= + t->pp_txfilter &= ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK); - t->filter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT; + t->pp_txfilter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT; if (tObj->Target != GL_TEXTURE_1D) { - t->filter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT; + t->pp_txfilter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT; if (tObj->Target == GL_TEXTURE_3D) - t->filter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT; + t->pp_txfilter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT; } } @@ -117,10 +119,13 @@ static GLuint aniso_filter(GLfloat anisotropy) * \param magf Texture magnification mode * \param anisotropy Maximum anisotropy level */ -static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy) +static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy) { - t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK); - t->filter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY; + /* Force revalidation to account for switches from/to mipmapping. */ + t->validated = GL_FALSE; + + t->pp_txfilter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK); + t->pp_txfilter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY; /* Note that EXT_texture_filter_anisotropic is extremely vague about * how anisotropic filtering interacts with the "normal" filter modes. @@ -128,7 +133,7 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat * filter settings completely. This includes driconf's settings. */ if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) { - t->filter |= R300_TX_MAG_FILTER_ANISO + t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO | R300_TX_MIN_FILTER_ANISO | R300_TX_MIN_FILTER_MIP_LINEAR | aniso_filter(anisotropy); @@ -139,22 +144,22 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat switch (minf) { case GL_NEAREST: - t->filter |= R300_TX_MIN_FILTER_NEAREST; + t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST; break; case GL_LINEAR: - t->filter |= R300_TX_MIN_FILTER_LINEAR; + t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR; break; case GL_NEAREST_MIPMAP_NEAREST: - t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST; + t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST; break; case GL_NEAREST_MIPMAP_LINEAR: - t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR; + t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR; break; case GL_LINEAR_MIPMAP_NEAREST: - t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST; + t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST; break; case GL_LINEAR_MIPMAP_LINEAR: - t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR; + t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR; break; } @@ -163,15 +168,15 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat */ switch (magf) { case GL_NEAREST: - t->filter |= R300_TX_MAG_FILTER_NEAREST; + t->pp_txfilter |= R300_TX_MAG_FILTER_NEAREST; break; case GL_LINEAR: - t->filter |= R300_TX_MAG_FILTER_LINEAR; + t->pp_txfilter |= R300_TX_MAG_FILTER_LINEAR; break; } } -static void r300SetTexBorderColor(r300TexObjPtr t, const GLfloat color[4]) +static void r300SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4]) { GLubyte c[4]; CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); @@ -182,729 +187,6 @@ static void r300SetTexBorderColor(r300TexObjPtr t, const GLfloat color[4]) } /** - * Allocate space for and load the mesa images into the texture memory block. - * This will happen before drawing with a new texture, or drawing with a - * texture after it was swapped out or teximaged again. - */ - -static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj) -{ - r300TexObjPtr t; - - t = CALLOC_STRUCT(r300_tex_obj); - texObj->DriverData = t; - if (t != NULL) { - if (RADEON_DEBUG & DEBUG_TEXTURE) { - fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, - (void *)texObj, (void *)t); - } - - /* Initialize non-image-dependent parts of the state: - */ - t->base.tObj = texObj; - t->border_fallback = GL_FALSE; - - make_empty_list(&t->base); - - r300UpdateTexWrap(t); - r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); - r300SetTexBorderColor(t, texObj->BorderColor); - } - - return t; -} - -/* try to find a format which will only need a memcopy */ -static const struct gl_texture_format *r300Choose8888TexFormat(GLenum srcFormat, - GLenum srcType) -{ - const GLuint ui = 1; - const GLubyte littleEndian = *((const GLubyte *)&ui); - - if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) || - (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) || - (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || - (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) { - return &_mesa_texformat_rgba8888; - } else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || - (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) || - (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) || - (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) { - return &_mesa_texformat_rgba8888_rev; - } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) || - srcType == GL_UNSIGNED_INT_8_8_8_8)) { - return &_mesa_texformat_argb8888_rev; - } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) || - srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) { - return &_mesa_texformat_argb8888; - } else - return _dri_texformat_argb8888; -} - -static const struct gl_texture_format *r300ChooseTextureFormat(GLcontext * ctx, - GLint - internalFormat, - GLenum format, - GLenum type) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - const GLboolean do32bpt = - (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32); - const GLboolean force16bpt = - (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16); - (void)format; - -#if 0 - fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n", - _mesa_lookup_enum_by_nr(internalFormat), internalFormat, - _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format)); - fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt); -#endif - - switch (internalFormat) { - case 4: - case GL_RGBA: - case GL_COMPRESSED_RGBA: - switch (type) { - case GL_UNSIGNED_INT_10_10_10_2: - case GL_UNSIGNED_INT_2_10_10_10_REV: - return do32bpt ? _dri_texformat_argb8888 : - _dri_texformat_argb1555; - case GL_UNSIGNED_SHORT_4_4_4_4: - case GL_UNSIGNED_SHORT_4_4_4_4_REV: - return _dri_texformat_argb4444; - case GL_UNSIGNED_SHORT_5_5_5_1: - case GL_UNSIGNED_SHORT_1_5_5_5_REV: - return _dri_texformat_argb1555; - default: - return do32bpt ? r300Choose8888TexFormat(format, type) : - _dri_texformat_argb4444; - } - - case 3: - case GL_RGB: - case GL_COMPRESSED_RGB: - switch (type) { - case GL_UNSIGNED_SHORT_4_4_4_4: - case GL_UNSIGNED_SHORT_4_4_4_4_REV: - return _dri_texformat_argb4444; - case GL_UNSIGNED_SHORT_5_5_5_1: - case GL_UNSIGNED_SHORT_1_5_5_5_REV: - return _dri_texformat_argb1555; - case GL_UNSIGNED_SHORT_5_6_5: - case GL_UNSIGNED_SHORT_5_6_5_REV: - return _dri_texformat_rgb565; - default: - return do32bpt ? _dri_texformat_argb8888 : - _dri_texformat_rgb565; - } - - case GL_RGBA8: - case GL_RGB10_A2: - case GL_RGBA12: - case GL_RGBA16: - return !force16bpt ? - r300Choose8888TexFormat(format, - type) : _dri_texformat_argb4444; - - case GL_RGBA4: - case GL_RGBA2: - return _dri_texformat_argb4444; - - case GL_RGB5_A1: - return _dri_texformat_argb1555; - - case GL_RGB8: - case GL_RGB10: - case GL_RGB12: - case GL_RGB16: - return !force16bpt ? _dri_texformat_argb8888 : - _dri_texformat_rgb565; - - case GL_RGB5: - case GL_RGB4: - case GL_R3_G3_B2: - return _dri_texformat_rgb565; - - case GL_ALPHA: - case GL_ALPHA4: - case GL_ALPHA8: - case GL_ALPHA12: - case GL_ALPHA16: - case GL_COMPRESSED_ALPHA: - return _dri_texformat_a8; - - case 1: - case GL_LUMINANCE: - case GL_LUMINANCE4: - case GL_LUMINANCE8: - case GL_LUMINANCE12: - case GL_LUMINANCE16: - case GL_COMPRESSED_LUMINANCE: - return _dri_texformat_l8; - - case 2: - case GL_LUMINANCE_ALPHA: - case GL_LUMINANCE4_ALPHA4: - case GL_LUMINANCE6_ALPHA2: - case GL_LUMINANCE8_ALPHA8: - case GL_LUMINANCE12_ALPHA4: - case GL_LUMINANCE12_ALPHA12: - case GL_LUMINANCE16_ALPHA16: - case GL_COMPRESSED_LUMINANCE_ALPHA: - return _dri_texformat_al88; - - case GL_INTENSITY: - case GL_INTENSITY4: - case GL_INTENSITY8: - case GL_INTENSITY12: - case GL_INTENSITY16: - case GL_COMPRESSED_INTENSITY: - return _dri_texformat_i8; - - case GL_YCBCR_MESA: - if (type == GL_UNSIGNED_SHORT_8_8_APPLE || - type == GL_UNSIGNED_BYTE) - return &_mesa_texformat_ycbcr; - else - return &_mesa_texformat_ycbcr_rev; - - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return &_mesa_texformat_rgb_dxt1; - - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - return &_mesa_texformat_rgba_dxt1; - - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - return &_mesa_texformat_rgba_dxt3; - - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - return &_mesa_texformat_rgba_dxt5; - - case GL_ALPHA16F_ARB: - return &_mesa_texformat_alpha_float16; - case GL_ALPHA32F_ARB: - return &_mesa_texformat_alpha_float32; - case GL_LUMINANCE16F_ARB: - return &_mesa_texformat_luminance_float16; - case GL_LUMINANCE32F_ARB: - return &_mesa_texformat_luminance_float32; - case GL_LUMINANCE_ALPHA16F_ARB: - return &_mesa_texformat_luminance_alpha_float16; - case GL_LUMINANCE_ALPHA32F_ARB: - return &_mesa_texformat_luminance_alpha_float32; - case GL_INTENSITY16F_ARB: - return &_mesa_texformat_intensity_float16; - case GL_INTENSITY32F_ARB: - return &_mesa_texformat_intensity_float32; - case GL_RGB16F_ARB: - return &_mesa_texformat_rgba_float16; - case GL_RGB32F_ARB: - return &_mesa_texformat_rgba_float32; - case GL_RGBA16F_ARB: - return &_mesa_texformat_rgba_float16; - case GL_RGBA32F_ARB: - return &_mesa_texformat_rgba_float32; - - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT16: - case GL_DEPTH_COMPONENT24: - case GL_DEPTH_COMPONENT32: -#if 0 - switch (type) { - case GL_UNSIGNED_BYTE: - case GL_UNSIGNED_SHORT: - return &_mesa_texformat_z16; - case GL_UNSIGNED_INT: - return &_mesa_texformat_z32; - case GL_UNSIGNED_INT_24_8_EXT: - default: - return &_mesa_texformat_z24_s8; - } -#else - return &_mesa_texformat_z16; -#endif - - default: - _mesa_problem(ctx, - "unexpected internalFormat 0x%x in r300ChooseTextureFormat", - (int)internalFormat); - return NULL; - } - - return NULL; /* never get here */ -} - -static GLboolean -r300ValidateClientStorage(GLcontext * ctx, GLenum target, - GLint internalFormat, - GLint srcWidth, GLint srcHeight, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "intformat %s format %s type %s\n", - _mesa_lookup_enum_by_nr(internalFormat), - _mesa_lookup_enum_by_nr(format), - _mesa_lookup_enum_by_nr(type)); - - if (!ctx->Unpack.ClientStorage) - return 0; - - if (ctx->_ImageTransferState || - texImage->IsCompressed || texObj->GenerateMipmap) - return 0; - - /* This list is incomplete, may be different on ppc??? - */ - switch (internalFormat) { - case GL_RGBA: - if (format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) { - texImage->TexFormat = _dri_texformat_argb8888; - } else - return 0; - break; - - case GL_RGB: - if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) { - texImage->TexFormat = _dri_texformat_rgb565; - } else - return 0; - break; - - case GL_YCBCR_MESA: - if (format == GL_YCBCR_MESA && - type == GL_UNSIGNED_SHORT_8_8_REV_APPLE) { - texImage->TexFormat = &_mesa_texformat_ycbcr_rev; - } else if (format == GL_YCBCR_MESA && - (type == GL_UNSIGNED_SHORT_8_8_APPLE || - type == GL_UNSIGNED_BYTE)) { - texImage->TexFormat = &_mesa_texformat_ycbcr; - } else - return 0; - break; - - default: - return 0; - } - - /* Could deal with these packing issues, but currently don't: - */ - if (packing->SkipPixels || - packing->SkipRows || packing->SwapBytes || packing->LsbFirst) { - return 0; - } - - GLint srcRowStride = _mesa_image_row_stride(packing, srcWidth, - format, type); - - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: srcRowStride %d/%x\n", - __FUNCTION__, srcRowStride, srcRowStride); - - /* Could check this later in upload, pitch restrictions could be - * relaxed, but would need to store the image pitch somewhere, - * as packing details might change before image is uploaded: - */ - if (!r300IsGartMemory(rmesa, pixels, srcHeight * srcRowStride) - || (srcRowStride & 63)) - return 0; - - /* Have validated that _mesa_transfer_teximage would be a straight - * memcpy at this point. NOTE: future calls to TexSubImage will - * overwrite the client data. This is explicitly mentioned in the - * extension spec. - */ - texImage->Data = (void *)pixels; - texImage->IsClientData = GL_TRUE; - texImage->RowStride = srcRowStride / texImage->TexFormat->TexelBytes; - - return 1; -} - -static void r300TexImage1D(GLcontext * ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint border, - GLenum format, GLenum type, const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) -{ - driTextureObject *t = (driTextureObject *) texObj->DriverData; - - if (t) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D"); - return; - } - } - - /* Note, this will call ChooseTextureFormat */ - _mesa_store_teximage1d(ctx, target, level, internalFormat, - width, border, format, type, pixels, - &ctx->Unpack, texObj, texImage); - - t->dirty_images[0] |= (1 << level); -} - -static void r300TexSubImage1D(GLcontext * ctx, GLenum target, GLint level, - GLint xoffset, - GLsizei width, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) -{ - driTextureObject *t = (driTextureObject *) texObj->DriverData; - - assert(t); /* this _should_ be true */ - if (t) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D"); - return; - } - } - - _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, - format, type, pixels, packing, texObj, - texImage); - - t->dirty_images[0] |= (1 << level); -} - -static void r300TexImage2D(GLcontext * ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLenum format, GLenum type, const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) -{ - driTextureObject *t = (driTextureObject *) texObj->DriverData; - GLuint face; - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = - (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - if (t != NULL) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); - return; - } - } - - texImage->IsClientData = GL_FALSE; - - if (r300ValidateClientStorage(ctx, target, - internalFormat, - width, height, - format, type, pixels, - packing, texObj, texImage)) { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using client storage\n", - __FUNCTION__); - } else { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using normal storage\n", - __FUNCTION__); - - /* Normal path: copy (to cached memory) and eventually upload - * via another copy to GART memory and then a blit... Could - * eliminate one copy by going straight to (permanent) GART. - * - * Note, this will call r300ChooseTextureFormat. - */ - _mesa_store_teximage2d(ctx, target, level, internalFormat, - width, height, border, format, type, - pixels, &ctx->Unpack, texObj, texImage); - - t->dirty_images[face] |= (1 << level); - } -} - -static void r300TexSubImage2D(GLcontext * ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) -{ - driTextureObject *t = (driTextureObject *) texObj->DriverData; - GLuint face; - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = - (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - assert(t); /* this _should_ be true */ - if (t) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D"); - return; - } - } - - _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, - height, format, type, pixels, packing, texObj, - texImage); - - t->dirty_images[face] |= (1 << level); -} - -static void r300CompressedTexImage2D(GLcontext * ctx, GLenum target, - GLint level, GLint internalFormat, - GLint width, GLint height, GLint border, - GLsizei imageSize, const GLvoid * data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) -{ - driTextureObject *t = (driTextureObject *) texObj->DriverData; - GLuint face; - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = - (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - if (t != NULL) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, - "glCompressedTexImage2D"); - return; - } - } - - texImage->IsClientData = GL_FALSE; - - /* can't call this, different parameters. Would never evaluate to true anyway currently */ -#if 0 - if (r300ValidateClientStorage(ctx, target, - internalFormat, - width, height, - format, type, pixels, - packing, texObj, texImage)) { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using client storage\n", - __FUNCTION__); - } else -#endif - { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using normal storage\n", - __FUNCTION__); - - /* Normal path: copy (to cached memory) and eventually upload - * via another copy to GART memory and then a blit... Could - * eliminate one copy by going straight to (permanent) GART. - * - * Note, this will call r300ChooseTextureFormat. - */ - _mesa_store_compressed_teximage2d(ctx, target, level, - internalFormat, width, height, - border, imageSize, data, - texObj, texImage); - - t->dirty_images[face] |= (1 << level); - } -} - -static void r300CompressedTexSubImage2D(GLcontext * ctx, GLenum target, - GLint level, GLint xoffset, - GLint yoffset, GLsizei width, - GLsizei height, GLenum format, - GLsizei imageSize, const GLvoid * data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) -{ - driTextureObject *t = (driTextureObject *) texObj->DriverData; - GLuint face; - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = - (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - assert(t); /* this _should_ be true */ - if (t) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, - "glCompressedTexSubImage3D"); - return; - } - } - - _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, - yoffset, width, height, format, - imageSize, data, texObj, texImage); - - t->dirty_images[face] |= (1 << level); -} - -static void r300TexImage3D(GLcontext * ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint depth, - GLint border, - GLenum format, GLenum type, const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) -{ - driTextureObject *t = (driTextureObject *) texObj->DriverData; - - if (t) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage3D"); - return; - } - } - - texImage->IsClientData = GL_FALSE; - -#if 0 - if (r300ValidateClientStorage(ctx, target, - internalFormat, - width, height, - format, type, pixels, - packing, texObj, texImage)) { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using client storage\n", - __FUNCTION__); - } else -#endif - { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: Using normal storage\n", - __FUNCTION__); - - /* Normal path: copy (to cached memory) and eventually upload - * via another copy to GART memory and then a blit... Could - * eliminate one copy by going straight to (permanent) GART. - * - * Note, this will call r300ChooseTextureFormat. - */ - _mesa_store_teximage3d(ctx, target, level, internalFormat, - width, height, depth, border, - format, type, pixels, - &ctx->Unpack, texObj, texImage); - - t->dirty_images[0] |= (1 << level); - } -} - -static void -r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, GLint zoffset, - GLsizei width, GLsizei height, GLsizei depth, - GLenum format, GLenum type, - const GLvoid * pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage) -{ - driTextureObject *t = (driTextureObject *) texObj->DriverData; - -/* fprintf(stderr, "%s\n", __FUNCTION__); */ - - assert(t); /* this _should_ be true */ - if (t) { - driSwapOutTextureObject(t); - } else { - t = (driTextureObject *) r300AllocTexObj(texObj); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage3D"); - return; - } - texObj->DriverData = t; - } - - _mesa_store_texsubimage3d(ctx, target, level, xoffset, yoffset, zoffset, - width, height, depth, - format, type, pixels, packing, texObj, - texImage); - - t->dirty_images[0] |= (1 << level); -} - -/** * Changes variables and flags for a state update, which will happen at the * next UpdateTextureState */ @@ -913,7 +195,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, struct gl_texture_object *texObj, GLenum pname, const GLfloat * params) { - r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData; + radeonTexObj* t = radeon_tex_obj(texObj); if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { fprintf(stderr, "%s( %s )\n", __FUNCTION__, @@ -946,7 +228,11 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, * we just have to rely on loading the right subset of mipmap levels * to simulate a clamped LOD. */ - driSwapOutTextureObject((driTextureObject *) t); + if (t->mt) { + radeon_miptree_unreference(t->mt); + t->mt = 0; + t->validated = GL_FALSE; + } break; case GL_DEPTH_TEXTURE_MODE: @@ -969,27 +255,10 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, } } -static void r300BindTexture(GLcontext * ctx, GLenum target, - struct gl_texture_object *texObj) -{ - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { - fprintf(stderr, "%s( %p ) unit=%d\n", __FUNCTION__, - (void *)texObj, ctx->Texture.CurrentUnit); - } - - if ((target == GL_TEXTURE_1D) - || (target == GL_TEXTURE_2D) - || (target == GL_TEXTURE_3D) - || (target == GL_TEXTURE_CUBE_MAP) - || (target == GL_TEXTURE_RECTANGLE_NV)) { - assert(texObj->DriverData != NULL); - } -} - static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - driTextureObject *t = (driTextureObject *) texObj->DriverData; + radeonTexObj* t = radeon_tex_obj(texObj); if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, @@ -997,14 +266,24 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) _mesa_lookup_enum_by_nr(texObj->Target)); } - if (t != NULL) { - if (rmesa) { - R300_FIREVERTICES(rmesa); - } + if (rmesa) { + int i; + radeon_firevertices(&rmesa->radeon); + + for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i) + if (rmesa->hw.textures[i] == t) + rmesa->hw.textures[i] = 0; + } - driDestroyTextureObject(t); + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + + if (t->mt) { + radeon_miptree_unreference(t->mt); + t->mt = 0; } - /* Free mipmap images and the texture object itself */ _mesa_delete_texture_object(ctx, texObj); } @@ -1013,8 +292,6 @@ static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) * Called via ctx->Driver.NewTextureObject. * Note: this function will be called during context creation to * allocate the default texture objects. - * Note: we could use containment here to 'derive' the driver-specific - * texture object from the core mesa gl_texture_object. Not done at this time. * Fixup MaxAnisotropy according to user preference. */ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, @@ -1022,14 +299,23 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, GLenum target) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct gl_texture_object *obj; - obj = _mesa_new_texture_object(ctx, name, target); - if (!obj) - return NULL; - obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; + radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); + - r300AllocTexObj(obj); - return obj; + if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { + fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, + t, _mesa_lookup_enum_by_nr(target)); + } + + _mesa_initialize_texture_object(&t->base, name, target); + t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy; + + /* Initialize hardware state */ + r300UpdateTexWrap(t); + r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy); + r300SetTexBorderColor(t, t->base.BorderColor); + + return &t->base; } void r300InitTextureFuncs(struct dd_function_table *functions) @@ -1037,22 +323,30 @@ void r300InitTextureFuncs(struct dd_function_table *functions) /* Note: we only plug in the functions we implement in the driver * since _mesa_init_driver_functions() was already called. */ - functions->ChooseTextureFormat = r300ChooseTextureFormat; - functions->TexImage1D = r300TexImage1D; - functions->TexImage2D = r300TexImage2D; - functions->TexImage3D = r300TexImage3D; - functions->TexSubImage1D = r300TexSubImage1D; - functions->TexSubImage2D = r300TexSubImage2D; - functions->TexSubImage3D = r300TexSubImage3D; + functions->NewTextureImage = radeonNewTextureImage; + functions->FreeTexImageData = radeonFreeTexImageData; + functions->MapTexture = radeonMapTexture; + functions->UnmapTexture = radeonUnmapTexture; + + functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa; + functions->TexImage1D = radeonTexImage1D; + functions->TexImage2D = radeonTexImage2D; + functions->TexImage3D = radeonTexImage3D; + functions->TexSubImage1D = radeonTexSubImage1D; + functions->TexSubImage2D = radeonTexSubImage2D; + functions->TexSubImage3D = radeonTexSubImage3D; + functions->GetTexImage = radeonGetTexImage; + functions->GetCompressedTexImage = radeonGetCompressedTexImage; functions->NewTextureObject = r300NewTextureObject; - functions->BindTexture = r300BindTexture; functions->DeleteTexture = r300DeleteTexture; functions->IsTextureResident = driIsTextureResident; functions->TexParameter = r300TexParameter; - functions->CompressedTexImage2D = r300CompressedTexImage2D; - functions->CompressedTexSubImage2D = r300CompressedTexSubImage2D; + functions->CompressedTexImage2D = radeonCompressedTexImage2D; + functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + + functions->GenerateMipmap = radeonGenerateMipmap; driInitTextureFormats(); } diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h index b86d45bfe0..8a653ea2d1 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.h +++ b/src/mesa/drivers/dri/r300/r300_tex.h @@ -37,16 +37,17 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. extern void r300SetDepthTexMode(struct gl_texture_object *tObj); +extern void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, + __DRIdrawable *dPriv); + +extern void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, + GLint format, __DRIdrawable *dPriv); + extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch); -extern void r300UpdateTextureState(GLcontext * ctx); - -extern int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, - GLuint face); - -extern void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t); +extern GLboolean r300ValidateBuffers(GLcontext * ctx); extern void r300InitTextureFuncs(struct dd_function_table *functions); diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c deleted file mode 100644 index a89ab83d94..0000000000 --- a/src/mesa/drivers/dri/r300/r300_texmem.c +++ /dev/null @@ -1,568 +0,0 @@ -/************************************************************************** - -Copyright (C) Tungsten Graphics 2002. All Rights Reserved. -The Weather Channel, Inc. funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 -license. This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation on the rights to use, copy, modify, merge, publish, -distribute, sub license, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR -SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -**************************************************************************/ - -/** - * \file - * - * \author Gareth Hughes <gareth@valinux.com> - * - * \author Kevin E. Martin <martin@valinux.com> - */ - -#include <errno.h> - -#include "main/glheader.h" -#include "main/imports.h" -#include "main/context.h" -#include "main/colormac.h" -#include "main/macros.h" -#include "main/simple_list.h" -#include "main/texobj.h" -#include "radeon_reg.h" /* gets definition for usleep */ -#include "r300_context.h" -#include "r300_state.h" -#include "r300_cmdbuf.h" -#include "radeon_ioctl.h" -#include "r300_tex.h" -#include "r300_ioctl.h" -#include <unistd.h> /* for usleep() */ - -#ifdef USER_BUFFERS -#include "r300_mem.h" -#endif - -/** - * Destroy any device-dependent state associated with the texture. This may - * include NULLing out hardware state that points to the texture. - */ -void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t) -{ - int i; - - if (RADEON_DEBUG & DEBUG_TEXTURE) { - fprintf(stderr, "%s( %p, %p )\n", __FUNCTION__, - (void *)t, (void *)t->base.tObj); - } - - for (i = 0; i < rmesa->radeon.glCtx->Const.MaxTextureUnits; i++) { - if (rmesa->state.texture.unit[i].texobj == t->base.tObj) { - _mesa_reference_texobj(&rmesa->state.texture.unit[i].texobj, NULL); - } - } -} - -/* ------------------------------------------------------------ - * Texture image conversions - */ - -static void r300UploadGARTClientSubImage(r300ContextPtr rmesa, - r300TexObjPtr t, - struct gl_texture_image *texImage, - GLint hwlevel, - GLint x, GLint y, - GLint width, GLint height) -{ - const struct gl_texture_format *texFormat = texImage->TexFormat; - GLuint srcPitch, dstPitch; - int blit_format; - int srcOffset; - - /* - * XXX it appears that we always upload the full image, not a subimage. - * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever - * changed, the src pitch will have to change. - */ - switch (texFormat->TexelBytes) { - case 1: - blit_format = R300_CP_COLOR_FORMAT_CI8; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; - break; - case 2: - blit_format = R300_CP_COLOR_FORMAT_RGB565; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; - break; - case 4: - blit_format = R300_CP_COLOR_FORMAT_ARGB8888; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; - break; - case 8: - case 16: - blit_format = R300_CP_COLOR_FORMAT_CI8; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; - break; - default: - return; - } - - t->image[0][hwlevel].data = texImage->Data; - srcOffset = r300GartOffsetFromVirtual(rmesa, texImage->Data); - - assert(srcOffset != ~0); - - /* Don't currently need to cope with small pitches? - */ - width = texImage->Width; - height = texImage->Height; - - if (texFormat->TexelBytes > 4) { - width *= texFormat->TexelBytes; - } - - r300EmitWait(rmesa, R300_WAIT_3D); - - r300EmitBlit(rmesa, blit_format, - srcPitch, - srcOffset, - dstPitch, - t->bufAddr, - x, - y, - t->image[0][hwlevel].x + x, - t->image[0][hwlevel].y + y, width, height); - - r300EmitWait(rmesa, R300_WAIT_2D); -} - -static void r300UploadRectSubImage(r300ContextPtr rmesa, - r300TexObjPtr t, - struct gl_texture_image *texImage, - GLint x, GLint y, GLint width, GLint height) -{ - const struct gl_texture_format *texFormat = texImage->TexFormat; - int blit_format, dstPitch, done; - - switch (texFormat->TexelBytes) { - case 1: - blit_format = R300_CP_COLOR_FORMAT_CI8; - break; - case 2: - blit_format = R300_CP_COLOR_FORMAT_RGB565; - break; - case 4: - blit_format = R300_CP_COLOR_FORMAT_ARGB8888; - break; - case 8: - case 16: - blit_format = R300_CP_COLOR_FORMAT_CI8; - break; - default: - return; - } - - t->image[0][0].data = texImage->Data; - - /* Currently don't need to cope with small pitches. - */ - width = texImage->Width; - height = texImage->Height; - dstPitch = t->pitch; - - if (texFormat->TexelBytes > 4) { - width *= texFormat->TexelBytes; - } - - if (rmesa->prefer_gart_client_texturing && texImage->IsClientData) { - /* In this case, could also use GART texturing. This is - * currently disabled, but has been tested & works. - */ - t->offset = r300GartOffsetFromVirtual(rmesa, texImage->Data); - t->pitch = texImage->RowStride * texFormat->TexelBytes - 32; - - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "Using GART texturing for rectangular client texture\n"); - - /* Release FB memory allocated for this image: - */ - /* FIXME This may not be correct as driSwapOutTextureObject sets - * FIXME dirty_images. It may be fine, though. - */ - if (t->base.memBlock) { - driSwapOutTextureObject((driTextureObject *) t); - } - } else if (texImage->IsClientData) { - /* Data already in GART memory, with usable pitch. - */ - GLuint srcPitch; - srcPitch = texImage->RowStride * texFormat->TexelBytes; - r300EmitBlit(rmesa, - blit_format, - srcPitch, - r300GartOffsetFromVirtual(rmesa, texImage->Data), - dstPitch, t->bufAddr, 0, 0, 0, 0, width, height); - } else { - /* Data not in GART memory, or bad pitch. - */ - for (done = 0; done < height;) { - struct r300_dma_region region; - int lines = - MIN2(height - done, RADEON_BUFFER_SIZE / dstPitch); - int src_pitch; - char *tex; - - src_pitch = texImage->RowStride * texFormat->TexelBytes; - - tex = (char *)texImage->Data + done * src_pitch; - - memset(®ion, 0, sizeof(region)); - r300AllocDmaRegion(rmesa, ®ion, lines * dstPitch, - 1024); - - /* Copy texdata to dma: - */ - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "%s: src_pitch %d dst_pitch %d\n", - __FUNCTION__, src_pitch, dstPitch); - - if (src_pitch == dstPitch) { - memcpy(region.address + region.start, tex, - lines * src_pitch); - } else { - char *buf = region.address + region.start; - int i; - for (i = 0; i < lines; i++) { - memcpy(buf, tex, src_pitch); - buf += dstPitch; - tex += src_pitch; - } - } - - r300EmitWait(rmesa, R300_WAIT_3D); - - /* Blit to framebuffer - */ - r300EmitBlit(rmesa, - blit_format, - dstPitch, GET_START(®ion), - dstPitch | (t->tile_bits >> 16), - t->bufAddr, 0, 0, 0, done, width, lines); - - r300EmitWait(rmesa, R300_WAIT_2D); -#ifdef USER_BUFFERS - r300_mem_use(rmesa, region.buf->id); -#endif - - r300ReleaseDmaRegion(rmesa, ®ion, __FUNCTION__); - done += lines; - } - } -} - -/** - * Upload the texture image associated with texture \a t at the specified - * level at the address relative to \a start. - */ -static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, - GLint hwlevel, - GLint x, GLint y, GLint width, GLint height, - GLuint face) -{ - struct gl_texture_image *texImage = NULL; - GLuint offset; - GLint imageWidth, imageHeight; - GLint ret; - drm_radeon_texture_t tex; - drm_radeon_tex_image_t tmp; - const int level = hwlevel + t->base.firstLevel; - - if (RADEON_DEBUG & DEBUG_TEXTURE) { - fprintf(stderr, - "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", - __FUNCTION__, (void *)t, (void *)t->base.tObj, level, - width, height, face); - } - - ASSERT(face < 6); - - /* Ensure we have a valid texture to upload */ - if ((hwlevel < 0) || (hwlevel >= R300_MAX_TEXTURE_LEVELS)) { - _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); - return; - } - - texImage = t->base.tObj->Image[face][level]; - - if (!texImage) { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: texImage %d is NULL!\n", - __FUNCTION__, level); - return; - } - if (!texImage->Data) { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: image data is NULL!\n", - __FUNCTION__); - return; - } - - if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - assert(level == 0); - assert(hwlevel == 0); - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: image data is rectangular\n", - __FUNCTION__); - r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height); - return; - } else if (texImage->IsClientData) { - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "%s: image data is in GART client storage\n", - __FUNCTION__); - r300UploadGARTClientSubImage(rmesa, t, texImage, hwlevel, x, y, - width, height); - return; - } else if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: image data is in normal memory\n", - __FUNCTION__); - - imageWidth = texImage->Width; - imageHeight = texImage->Height; - - offset = t->bufAddr; - - if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { - GLint imageX = 0; - GLint imageY = 0; - GLint blitX = t->image[face][hwlevel].x; - GLint blitY = t->image[face][hwlevel].y; - GLint blitWidth = t->image[face][hwlevel].width; - GLint blitHeight = t->image[face][hwlevel].height; - fprintf(stderr, " upload image: %d,%d at %d,%d\n", - imageWidth, imageHeight, imageX, imageY); - fprintf(stderr, " upload blit: %d,%d at %d,%d\n", - blitWidth, blitHeight, blitX, blitY); - fprintf(stderr, " blit ofs: 0x%07x level: %d/%d\n", - (GLuint) offset, hwlevel, level); - } - - t->image[face][hwlevel].data = texImage->Data; - - /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct. - * NOTE: we're always use a 1KB-wide blit and I8 texture format. - * We used to use 1, 2 and 4-byte texels and used to use the texture - * width to dictate the blit width - but that won't work for compressed - * textures. (Brian) - * NOTE: can't do that with texture tiling. (sroland) - */ - tex.offset = offset; - tex.image = &tmp; - /* copy (x,y,width,height,data) */ - memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp)); - - if (texImage->TexFormat->TexelBytes > 4) { - const int log2TexelBytes = - (3 + (texImage->TexFormat->TexelBytes >> 4)); - tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ - tex.pitch = - MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / - 64, 1); - tex.height = imageHeight; - tex.width = imageWidth << log2TexelBytes; - tex.offset += (tmp.x << log2TexelBytes) & ~1023; - tmp.x = tmp.x % (1024 >> log2TexelBytes); - tmp.width = tmp.width << log2TexelBytes; - } else if (texImage->TexFormat->TexelBytes) { - /* use multi-byte upload scheme */ - tex.height = imageHeight; - tex.width = imageWidth; - switch (texImage->TexFormat->TexelBytes) { - case 1: - tex.format = RADEON_TXFORMAT_I8; - break; - case 2: - tex.format = RADEON_TXFORMAT_AI88; - break; - case 4: - tex.format = RADEON_TXFORMAT_ARGB8888; - break; - } - tex.pitch = - MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / - 64, 1); - tex.offset += tmp.x & ~1023; - tmp.x = tmp.x % 1024; - - if (t->tile_bits & R300_TXO_MICRO_TILE) { - /* need something like "tiled coordinates" ? */ - tmp.y = tmp.x / (tex.pitch * 128) * 2; - tmp.x = - tmp.x % (tex.pitch * 128) / 2 / - texImage->TexFormat->TexelBytes; - tex.pitch |= RADEON_DST_TILE_MICRO >> 22; - } else { - tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); - } -#if 1 - if ((t->tile_bits & R300_TXO_MACRO_TILE) && - (texImage->Width * texImage->TexFormat->TexelBytes >= 256) - && ((!(t->tile_bits & R300_TXO_MICRO_TILE) - && (texImage->Height >= 8)) - || (texImage->Height >= 16))) { - /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes, - OR if height is smaller than 8 automatically, but if micro tiling is active - the limit is height 16 instead ? */ - tex.pitch |= RADEON_DST_TILE_MACRO >> 22; - } -#endif - } else { - /* In case of for instance 8x8 texture (2x2 dxt blocks), - padding after the first two blocks is needed (only - with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ - /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) - has 4 real pixels. Needed so the kernel module reads - the right amount of data. */ - tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ - tex.pitch = (R300_BLIT_WIDTH_BYTES / 64); - tex.height = (imageHeight + 3) / 4; - tex.width = (imageWidth + 3) / 4; - if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) { - tex.width *= 8; - } else { - tex.width *= 16; - } - } - - LOCK_HARDWARE(&rmesa->radeon); - do { - ret = - drmCommandWriteRead(rmesa->radeon.dri.fd, - DRM_RADEON_TEXTURE, &tex, - sizeof(drm_radeon_texture_t)); - if (ret) { - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, - "DRM_RADEON_TEXTURE: again!\n"); - usleep(1); - } - } while (ret == -EAGAIN); - - UNLOCK_HARDWARE(&rmesa->radeon); - - if (ret) { - fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret); - fprintf(stderr, " offset=0x%08x\n", offset); - fprintf(stderr, " image width=%d height=%d\n", - imageWidth, imageHeight); - fprintf(stderr, " blit width=%d height=%d data=%p\n", - t->image[face][hwlevel].width, - t->image[face][hwlevel].height, - t->image[face][hwlevel].data); - _mesa_exit(-1); - } -} - -/** - * Upload the texture images associated with texture \a t. This might - * require the allocation of texture memory. - * - * \param rmesa Context pointer - * \param t Texture to be uploaded - * \param face Cube map face to be uploaded. Zero for non-cube maps. - */ - -int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face) -{ - const int numLevels = t->base.lastLevel - t->base.firstLevel + 1; - - if (t->image_override) - return 0; - - if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { - fprintf(stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__, - (void *)rmesa->radeon.glCtx, (void *)t->base.tObj, - t->base.totalSize, t->base.firstLevel, - t->base.lastLevel); - } - - if (t->base.totalSize == 0) - return 0; - - if (RADEON_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "%s: Syncing\n", __FUNCTION__); - radeonFinish(rmesa->radeon.glCtx); - } - - LOCK_HARDWARE(&rmesa->radeon); - - if (t->base.memBlock == NULL) { - int heap; - - heap = driAllocateTexture(rmesa->texture_heaps, rmesa->nr_heaps, - (driTextureObject *) t); - if (heap == -1) { - UNLOCK_HARDWARE(&rmesa->radeon); - return -1; - } - - /* Set the base offset of the texture image */ - t->bufAddr = rmesa->radeon.radeonScreen->texOffset[heap] - + t->base.memBlock->ofs; - t->offset = t->bufAddr; - - if (!(t->base.tObj->Image[0][0]->IsClientData)) { - /* hope it's safe to add that here... */ - t->offset |= t->tile_bits; - } - } - - /* Let the world know we've used this memory recently. - */ - driUpdateTextureLRU((driTextureObject *) t); - UNLOCK_HARDWARE(&rmesa->radeon); - - /* Upload any images that are new */ - if (t->base.dirty_images[face]) { - int i; - for (i = 0; i < numLevels; i++) { - if ((t->base. - dirty_images[face] & (1 << - (i + t->base.firstLevel))) != - 0) { - r300UploadSubImage(rmesa, t, i, 0, 0, - t->image[face][i].width, - t->image[face][i].height, - face); - } - } - t->base.dirty_images[face] = 0; - } - - if (RADEON_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "%s: Syncing\n", __FUNCTION__); - radeonFinish(rmesa->radeon.glCtx); - } - - return 0; -} diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index f6ae4b675b..6e47321246 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -47,7 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_context.h" #include "r300_state.h" #include "r300_ioctl.h" -#include "radeon_ioctl.h" +#include "radeon_mipmap_tree.h" #include "r300_tex.h" #include "r300_reg.h" @@ -117,7 +117,12 @@ static const struct tx_table { _ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)), _ASSIGN(Z16, R300_EASY_TX_FORMAT(X, X, X, X, X16)), _ASSIGN(Z24_S8, R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8)), + _ASSIGN(S8_Z24, R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8)), _ASSIGN(Z32, R300_EASY_TX_FORMAT(X, X, X, X, X32)), + /* EXT_texture_sRGB */ + _ASSIGN(SRGBA8, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA), + _ASSIGN(SLA8, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA), + _ASSIGN(SL8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8) | R300_TX_FORMAT_GAMMA), /* *INDENT-ON* */ }; @@ -143,13 +148,12 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) }, }; const GLuint *format; - r300TexObjPtr t; + radeonTexObjPtr t; if (!tObj) return; - t = (r300TexObjPtr) tObj->DriverData; - + t = radeon_tex_obj(tObj); switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) { case MESA_FORMAT_Z16: @@ -171,13 +175,13 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) switch (tObj->DepthMode) { case GL_LUMINANCE: - t->format = format[0]; + t->pp_txformat = format[0]; break; case GL_INTENSITY: - t->format = format[1]; + t->pp_txformat = format[1]; break; case GL_ALPHA: - t->format = format[2]; + t->pp_txformat = format[2]; break; default: /* Error...which should have already been caught by higher @@ -190,406 +194,134 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) /** - * Compute sizes and fill in offset and blit information for the given - * image (determined by \p face and \p level). - * - * \param curOffset points to the offset at which the image is to be stored - * and is updated by this function according to the size of the image. - */ -static void compute_tex_image_offset( - struct gl_texture_object *tObj, - GLuint face, - GLint level, - GLint* curOffset) -{ - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; - const struct gl_texture_image* texImage; - GLuint blitWidth = R300_BLIT_WIDTH_BYTES; - GLuint texelBytes; - GLuint size; - - texImage = tObj->Image[0][level + t->base.firstLevel]; - if (!texImage) - return; - - texelBytes = texImage->TexFormat->TexelBytes; - - /* find image size in bytes */ - if (texImage->IsCompressed) { - if ((t->format & R300_TX_FORMAT_DXT1) == - R300_TX_FORMAT_DXT1) { - // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format); - if ((texImage->Width + 3) < 8) /* width one block */ - size = texImage->CompressedSize * 4; - else if ((texImage->Width + 3) < 16) - size = texImage->CompressedSize * 2; - else - size = texImage->CompressedSize; - } else { - /* DXT3/5, 16 bytes per block */ - WARN_ONCE - ("DXT 3/5 suffers from multitexturing problems!\n"); - // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width); - if ((texImage->Width + 3) < 8) - size = texImage->CompressedSize * 2; - else - size = texImage->CompressedSize; - } - } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - size = - ((texImage->Width * texelBytes + - 63) & ~63) * texImage->Height; - blitWidth = 64 / texelBytes; - } else if (t->tile_bits & R300_TXO_MICRO_TILE) { - /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, - though the actual offset may be different (if texture is less than - 32 bytes width) to the untiled case */ - int w = (texImage->Width * texelBytes * 2 + 31) & ~31; - size = - (w * ((texImage->Height + 1) / 2)) * - texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } else { - int w = (texImage->Width * texelBytes + 31) & ~31; - size = w * texImage->Height * texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } - assert(size > 0); - - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n", - texImage->Width, texImage->Height, - texImage->Depth, - texImage->TexFormat->TexelBytes, - texImage->InternalFormat); - - /* All images are aligned to a 32-byte offset */ - *curOffset = (*curOffset + 0x1f) & ~0x1f; - - if (texelBytes) { - /* fix x and y coords up later together with offset */ - t->image[face][level].x = *curOffset; - t->image[face][level].y = 0; - t->image[face][level].width = - MIN2(size / texelBytes, blitWidth); - t->image[face][level].height = - (size / texelBytes) / t->image[face][level].width; - } else { - t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES; - t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES; - t->image[face][level].width = - MIN2(size, R300_BLIT_WIDTH_BYTES); - t->image[face][level].height = size / t->image[face][level].width; - } - - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", - level, face, texImage->Width, texImage->Height, - t->image[face][level].x, t->image[face][level].y, - t->image[face][level].width, t->image[face][level].height, - size, *curOffset); - - *curOffset += size; -} - - - -/** - * This function computes the number of bytes of storage needed for - * the given texture object (all mipmap levels, all cube faces). - * The \c image[face][level].x/y/width/height parameters for upload/blitting - * are computed here. \c filter, \c format, etc. will be set here - * too. + * Compute the cached hardware register values for the given texture object. * * \param rmesa Context pointer - * \param tObj GL texture object whose images are to be posted to - * hardware state. + * \param t the r300 texture object */ -static void r300SetTexImages(r300ContextPtr rmesa, - struct gl_texture_object *tObj) +static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t) { - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; - const struct gl_texture_image *baseImage = - tObj->Image[0][tObj->BaseLevel]; - GLint curOffset; - GLint i, texelBytes; - GLint numLevels; - GLint log2Width, log2Height, log2Depth; - - /* Set the hardware texture format - */ + const struct gl_texture_image *firstImage; + int firstlevel = t->mt ? t->mt->firstLevel : 0; + + firstImage = t->base.Image[0][firstlevel]; + if (!t->image_override - && VALID_FORMAT(baseImage->TexFormat->MesaFormat)) { - if (baseImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) { - r300SetDepthTexMode(tObj); + && VALID_FORMAT(firstImage->TexFormat->MesaFormat)) { + if (firstImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) { + r300SetDepthTexMode(&t->base); } else { - t->format = tx_table[baseImage->TexFormat->MesaFormat].format; + t->pp_txformat = tx_table[firstImage->TexFormat->MesaFormat].format; } - t->filter |= tx_table[baseImage->TexFormat->MesaFormat].filter; + t->pp_txfilter |= tx_table[firstImage->TexFormat->MesaFormat].filter; } else if (!t->image_override) { _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); return; } - texelBytes = baseImage->TexFormat->TexelBytes; - - /* Compute which mipmap levels we really want to send to the hardware. - */ - driCalculateTextureFirstLastLevel((driTextureObject *) t); - log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; - log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; - log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; - - numLevels = t->base.lastLevel - t->base.firstLevel + 1; + if (t->image_override && t->bo) + return; - assert(numLevels <= R300_MAX_TEXTURE_LEVELS); + t->pp_txsize = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT) + | ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT) + | ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT) + | ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT)); - /* Calculate mipmap offsets and dimensions for blitting (uploading) - * The idea is that we lay out the mipmap levels within a block of - * memory organized as a rectangle of width BLIT_WIDTH_BYTES. - */ t->tile_bits = 0; - /* figure out if this texture is suitable for tiling. */ -#if 0 /* Disabled for now */ - if (texelBytes) { - if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) && - /* texrect might be able to use micro tiling too in theory? */ - (baseImage->Height > 1)) { - - /* allow 32 (bytes) x 1 mip (which will use two times the space - the non-tiled version would use) max if base texture is large enough */ - if ((numLevels == 1) || - (((baseImage->Width * texelBytes / - baseImage->Height) <= 32) - && (baseImage->Width * texelBytes > 64)) - || - ((baseImage->Width * texelBytes / - baseImage->Height) <= 16)) { - t->tile_bits |= R300_TXO_MICRO_TILE; - } - } - - if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) { - /* we can set macro tiling even for small textures, they will be untiled anyway */ - t->tile_bits |= R300_TXO_MACRO_TILE; - } - } -#endif - - curOffset = 0; + if (t->base.Target == GL_TEXTURE_CUBE_MAP) + t->pp_txformat |= R300_TX_FORMAT_CUBIC_MAP; + if (t->base.Target == GL_TEXTURE_3D) + t->pp_txformat |= R300_TX_FORMAT_3D; - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - ASSERT(log2Width == log2Height); - t->format |= R300_TX_FORMAT_CUBIC_MAP; - for(i = 0; i < numLevels; i++) { - GLuint face; - for(face = 0; face < 6; face++) - compute_tex_image_offset(tObj, face, i, &curOffset); - } - } else { - if (tObj->Target == GL_TEXTURE_3D) - t->format |= R300_TX_FORMAT_3D; - - for (i = 0; i < numLevels; i++) - compute_tex_image_offset(tObj, 0, i, &curOffset); - } - - /* Align the total size of texture memory block. - */ - t->base.totalSize = - (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; - - t->size = - (((tObj->Image[0][t->base.firstLevel]->Width - - 1) << R300_TX_WIDTHMASK_SHIFT) - | ((tObj->Image[0][t->base.firstLevel]->Height - 1) << - R300_TX_HEIGHTMASK_SHIFT) - | ((tObj->Image[0][t->base.firstLevel]->DepthLog2) << - R300_TX_DEPTHMASK_SHIFT)) - | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT); - - t->pitch = 0; - - /* Only need to round to nearest 32 for textures, but the blitter - * requires 64-byte aligned pitches, and we may/may not need the - * blitter. NPOT only! - */ - if (baseImage->IsCompressed) { - t->pitch |= - (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); - } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - unsigned int align = (64 / texelBytes) - 1; - t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width * - texelBytes) + 63) & ~(63); - t->size |= R300_TX_SIZE_TXPITCH_EN; + if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) { + unsigned int align = (64 / t->mt->bpp) - 1; + t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN; if (!t->image_override) - t->pitch_reg = - (((tObj->Image[0][t->base.firstLevel]->Width) + - align) & ~align) - 1; - } else { - t->pitch |= - ((tObj->Image[0][t->base.firstLevel]->Width * - texelBytes) + 63) & ~(63); + t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1; } if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { - if (tObj->Image[0][t->base.firstLevel]->Width > 2048) - t->pitch_reg |= R500_TXWIDTH_BIT11; - if (tObj->Image[0][t->base.firstLevel]->Height > 2048) - t->pitch_reg |= R500_TXHEIGHT_BIT11; + if (firstImage->Width > 2048) + t->pp_txpitch |= R500_TXWIDTH_BIT11; + if (firstImage->Height > 2048) + t->pp_txpitch |= R500_TXHEIGHT_BIT11; } } -/* ================================================================ - * Texture unit state management +/** + * Ensure the given texture is ready for rendering. + * + * Mostly this means populating the texture object's mipmap tree. */ - -static GLboolean r300EnableTexture2D(GLcontext * ctx, int unit) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; - - ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D); - - if (t->base.dirty_images[0]) { - R300_FIREVERTICES(rmesa); - - r300SetTexImages(rmesa, tObj); - r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); - if (!t->base.memBlock && !t->image_override) - return GL_FALSE; - } - - return GL_TRUE; -} - -static GLboolean r300EnableTexture3D(GLcontext * ctx, int unit) +static GLboolean r300_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + radeonTexObj *t = radeon_tex_obj(texObj); - ASSERT(tObj->Target == GL_TEXTURE_3D); - - /* r300 does not support mipmaps for 3D textures. */ - if ((tObj->MinFilter != GL_NEAREST) && (tObj->MinFilter != GL_LINEAR)) { + if (!radeon_validate_texture_miptree(ctx, texObj)) return GL_FALSE; - } - if (t->base.dirty_images[0]) { - R300_FIREVERTICES(rmesa); - r300SetTexImages(rmesa, tObj); - r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); - if (!t->base.memBlock) - return GL_FALSE; - } + /* Configure the hardware registers (more precisely, the cached version + * of the hardware registers). */ + setup_hardware_state(rmesa, t); + t->validated = GL_TRUE; return GL_TRUE; } -static GLboolean r300EnableTextureCube(GLcontext * ctx, int unit) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; - GLuint face; - - ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP); - - if (t->base.dirty_images[0] || t->base.dirty_images[1] || - t->base.dirty_images[2] || t->base.dirty_images[3] || - t->base.dirty_images[4] || t->base.dirty_images[5]) { - /* flush */ - R300_FIREVERTICES(rmesa); - /* layout memory space, once for all faces */ - r300SetTexImages(rmesa, tObj); - } - - /* upload (per face) */ - for (face = 0; face < 6; face++) { - if (t->base.dirty_images[face]) { - r300UploadTexImages(rmesa, - (r300TexObjPtr) tObj->DriverData, - face); - } - } - - if (!t->base.memBlock) { - /* texmem alloc failed, use s/w fallback */ - return GL_FALSE; - } - - return GL_TRUE; -} - -static GLboolean r300EnableTextureRect(GLcontext * ctx, int unit) +/** + * Ensure all enabled and complete textures are uploaded along with any buffers being used. + */ +GLboolean r300ValidateBuffers(GLcontext * ctx) { r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; - - ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV); + struct radeon_renderbuffer *rrb; + int i; - if (t->base.dirty_images[0]) { - R300_FIREVERTICES(rmesa); + radeon_validate_reset_bos(&rmesa->radeon); - r300SetTexImages(rmesa, tObj); - r300UploadTexImages(rmesa, (r300TexObjPtr) tObj->DriverData, 0); - if (!t->base.memBlock && !t->image_override && - !rmesa->prefer_gart_client_texturing) - return GL_FALSE; + rrb = radeon_get_colorbuffer(&rmesa->radeon); + /* color buffer */ + if (rrb && rrb->bo) { + radeon_validate_bo(&rmesa->radeon, rrb->bo, + 0, RADEON_GEM_DOMAIN_VRAM); } - return GL_TRUE; -} - -static GLboolean r300UpdateTexture(GLcontext * ctx, int unit) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_ReallyEnabled ? - texUnit->_Current : NULL; - r300TexObjPtr t = tObj ? (r300TexObjPtr) tObj->DriverData : NULL; - - /* Fallback if there's a texture border */ - if (tObj && tObj->Image[0][tObj->BaseLevel]->Border > 0) { - tObj = NULL; - t = NULL; + /* depth buffer */ + rrb = radeon_get_depthbuffer(&rmesa->radeon); + if (rrb && rrb->bo) { + radeon_validate_bo(&rmesa->radeon, rrb->bo, + 0, RADEON_GEM_DOMAIN_VRAM); } + + for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) { + radeonTexObj *t; - /* Update state if this is a different texture object to last - * time. - */ - if (rmesa->state.texture.unit[unit].texobj != tObj) { - if (rmesa->state.texture.unit[unit].texobj != NULL) { - r300TexObjPtr t_old = (r300TexObjPtr) rmesa->state.texture.unit[unit].texobj->DriverData; - - /* The old texture is no longer bound to this texture unit. - * Mark it as such. - */ - - t_old->base.bound &= ~(1 << unit); - } - - _mesa_reference_texobj(&rmesa->state.texture.unit[unit].texobj, tObj); + if (!ctx->Texture.Unit[i]._ReallyEnabled) + continue; - if (t) { - t->base.bound |= (1 << unit); - driUpdateTextureLRU(&t->base); /* XXX: should be locked! */ + if (!r300_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) { + _mesa_warning(ctx, + "failed to validate texture for unit %d.\n", + i); } + t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); + if (t->image_override && t->bo) + radeon_validate_bo(&rmesa->radeon, t->bo, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); + + else if (t->mt->bo) + radeon_validate_bo(&rmesa->radeon, t->mt->bo, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); } + if (rmesa->radeon.dma.current) + radeon_validate_bo(&rmesa->radeon, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0); - return !t || !t->border_fallback; + return radeon_revalidate_bos(ctx); } void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, @@ -598,78 +330,166 @@ void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname, r300ContextPtr rmesa = pDRICtx->driverPrivate; struct gl_texture_object *tObj = _mesa_lookup_texture(rmesa->radeon.glCtx, texname); - r300TexObjPtr t; + radeonTexObjPtr t = radeon_tex_obj(tObj); uint32_t pitch_val; if (!tObj) return; - t = (r300TexObjPtr) tObj->DriverData; - t->image_override = GL_TRUE; if (!offset) return; - t->offset = offset; - t->pitch_reg &= (1 << 13) -1; + t->bo = NULL; + t->override_offset = offset; + t->pp_txpitch &= (1 << 13) -1; pitch_val = pitch; switch (depth) { case 32: - t->format = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); - t->filter |= tx_table[2].filter; + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + t->pp_txfilter |= tx_table[2].filter; pitch_val /= 4; break; case 24: default: - t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); - t->filter |= tx_table[4].filter; + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + t->pp_txfilter |= tx_table[4].filter; pitch_val /= 4; break; case 16: - t->format = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); - t->filter |= tx_table[5].filter; + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + t->pp_txfilter |= tx_table[5].filter; pitch_val /= 2; break; } pitch_val--; - t->pitch_reg |= pitch_val; + t->pp_txpitch |= pitch_val; } -static GLboolean r300UpdateTextureUnit(GLcontext * ctx, int unit) +void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv) { - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - - if (texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT)) { - return (r300EnableTextureRect(ctx, unit) && - r300UpdateTexture(ctx, unit)); - } else if (texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT)) { - return (r300EnableTexture2D(ctx, unit) && - r300UpdateTexture(ctx, unit)); - } else if (texUnit->_ReallyEnabled & (TEXTURE_3D_BIT)) { - return (r300EnableTexture3D(ctx, unit) && - r300UpdateTexture(ctx, unit)); - } else if (texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT)) { - return (r300EnableTextureCube(ctx, unit) && - r300UpdateTexture(ctx, unit)); - } else if (texUnit->_ReallyEnabled) { - return GL_FALSE; - } else { - return r300UpdateTexture(ctx, unit); + struct gl_texture_unit *texUnit; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + struct radeon_renderbuffer *rb; + radeon_texture_image *rImage; + radeonContextPtr radeon; + r300ContextPtr rmesa; + struct radeon_framebuffer *rfb; + radeonTexObjPtr t; + uint32_t pitch_val; + uint32_t internalFormat, type, format; + + type = GL_BGRA; + format = GL_UNSIGNED_BYTE; + internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4); + + radeon = pDRICtx->driverPrivate; + rmesa = pDRICtx->driverPrivate; + + rfb = dPriv->driverPrivate; + texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; + texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target); + texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0); + + rImage = get_radeon_texture_image(texImage); + t = radeon_tex_obj(texObj); + if (t == NULL) { + return; + } + + radeon_update_renderbuffers(pDRICtx, dPriv); + /* back & depth buffer are useless free them right away */ + rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; + if (rb && rb->bo) { + radeon_bo_unref(rb->bo); + rb->bo = NULL; + } + rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; + if (rb && rb->bo) { + radeon_bo_unref(rb->bo); + rb->bo = NULL; } + rb = rfb->color_rb[0]; + if (rb->bo == NULL) { + /* Failed to BO for the buffer */ + return; + } + + _mesa_lock_texture(radeon->glCtx, texObj); + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + if (rImage->bo) { + radeon_bo_unref(rImage->bo); + rImage->bo = NULL; + } + if (t->mt) { + radeon_miptree_unreference(t->mt); + t->mt = NULL; + } + if (rImage->mt) { + radeon_miptree_unreference(rImage->mt); + rImage->mt = NULL; + } + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, + rb->width, rb->height, 1, 0, rb->cpp); + texImage->RowStride = rb->pitch / rb->cpp; + texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx, + internalFormat, + type, format, 0); + rImage->bo = rb->bo; + radeon_bo_ref(rImage->bo); + t->bo = rb->bo; + radeon_bo_ref(t->bo); + t->tile_bits = 0; + t->image_override = GL_TRUE; + t->override_offset = 0; + t->pp_txpitch &= (1 << 13) -1; + pitch_val = rb->pitch; + switch (rb->cpp) { + case 4: + if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + else + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8); + t->pp_txfilter |= tx_table[2].filter; + pitch_val /= 4; + break; + case 3: + default: + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8); + t->pp_txfilter |= tx_table[4].filter; + pitch_val /= 4; + break; + case 2: + t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5); + t->pp_txfilter |= tx_table[5].filter; + pitch_val /= 2; + break; + } + pitch_val--; + t->pp_txsize = ((rb->width - 1) << R300_TX_WIDTHMASK_SHIFT) | + ((rb->height - 1) << R300_TX_HEIGHTMASK_SHIFT); + t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN; + t->pp_txpitch |= pitch_val; + + if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) { + if (rb->width > 2048) + t->pp_txpitch |= R500_TXWIDTH_BIT11; + if (rb->height > 2048) + t->pp_txpitch |= R500_TXHEIGHT_BIT11; + } + t->validated = GL_TRUE; + _mesa_unlock_texture(radeon->glCtx, texObj); + return; } -void r300UpdateTextureState(GLcontext * ctx) +void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) { - int i; - - for (i = 0; i < 8; i++) { - if (!r300UpdateTextureUnit(ctx, i)) { - _mesa_warning(ctx, - "failed to update texture state for unit %d.\n", - i); - } - } + r300SetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); } diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 146daa367c..c41a8fdd62 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -34,10 +34,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "shader/program.h" #include "shader/prog_instruction.h" #include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/prog_statevars.h" #include "tnl/tnl.h" #include "r300_context.h" +#include "r300_state.h" /* TODO: Get rid of t_src_class call */ #define CMP_SRCS(a, b) ((a.RelAddr != b.RelAddr) || (a.Index != b.Index && \ @@ -64,7 +66,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. int u_temp_used = (VSF_MAX_FRAGMENT_TEMPS - 1) - u_temp_i; \ if((vp->num_temporaries + u_temp_used) > VSF_MAX_FRAGMENT_TEMPS) { \ WARN_ONCE("Ran out of temps, num temps %d, us %d\n", vp->num_temporaries, u_temp_used); \ - vp->native = GL_FALSE; \ + vp->error = GL_TRUE; \ } \ u_temp_i=VSF_MAX_FRAGMENT_TEMPS-1; \ } while (0) @@ -214,21 +216,8 @@ static void vp_dump_inputs(struct r300_vertex_program *vp, char *caller) static unsigned long t_src_index(struct r300_vertex_program *vp, struct prog_src_register *src) { - int i; - int max_reg = -1; - if (src->File == PROGRAM_INPUT) { - if (vp->inputs[src->Index] != -1) - return vp->inputs[src->Index]; - - for (i = 0; i < VERT_ATTRIB_MAX; i++) - if (vp->inputs[i] > max_reg) - max_reg = vp->inputs[i]; - - vp->inputs[src->Index] = max_reg + 1; - - //vp_dump_inputs(vp, __FUNCTION__); - + assert(vp->inputs[src->Index] != -1); return vp->inputs[src->Index]; } else { if (src->Index < 0) { @@ -943,11 +932,17 @@ static GLuint *r300TranslateOpcodeXPD(struct r300_vertex_program *vp, static void t_inputs_outputs(struct r300_vertex_program *vp) { int i; - int cur_reg = 0; + int cur_reg; - for (i = 0; i < VERT_ATTRIB_MAX; i++) - vp->inputs[i] = -1; + cur_reg = -1; + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + if (vp->key.InputsRead & (1 << i)) + vp->inputs[i] = ++cur_reg; + else + vp->inputs[i] = -1; + } + cur_reg = 0; for (i = 0; i < VERT_RESULT_MAX; i++) vp->outputs[i] = -1; @@ -961,26 +956,36 @@ static void t_inputs_outputs(struct r300_vertex_program *vp) vp->outputs[VERT_RESULT_PSIZ] = cur_reg++; } + /* If we're writing back facing colors we need to send + * four colors to make front/back face colors selection work. + * If the vertex program doesn't write all 4 colors, lets + * pretend it does by skipping output index reg so the colors + * get written into appropriate output vectors. + */ if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL0)) { vp->outputs[VERT_RESULT_COL0] = cur_reg++; + } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) || + vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; } if (vp->key.OutputsWritten & (1 << VERT_RESULT_COL1)) { - vp->outputs[VERT_RESULT_COL1] = - vp->outputs[VERT_RESULT_COL0] + 1; - cur_reg = vp->outputs[VERT_RESULT_COL1] + 1; + vp->outputs[VERT_RESULT_COL1] = cur_reg++; + } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0) || + vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; } if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { - vp->outputs[VERT_RESULT_BFC0] = - vp->outputs[VERT_RESULT_COL0] + 2; - cur_reg = vp->outputs[VERT_RESULT_BFC0] + 2; + vp->outputs[VERT_RESULT_BFC0] = cur_reg++; + } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { + cur_reg++; } if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC1)) { - vp->outputs[VERT_RESULT_BFC1] = - vp->outputs[VERT_RESULT_COL0] + 3; - cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1; + vp->outputs[VERT_RESULT_BFC1] = cur_reg++; + } else if (vp->key.OutputsWritten & (1 << VERT_RESULT_BFC0)) { + cur_reg++; } for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) { @@ -1007,14 +1012,13 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, struct prog_src_register src[3]; vp->pos_end = 0; /* Not supported yet */ - vp->program.length = 0; - /*vp->num_temporaries=mesa_vp->Base.NumTemporaries; */ + vp->hw_code.length = 0; vp->translated = GL_TRUE; - vp->native = GL_TRUE; + vp->error = GL_FALSE; t_inputs_outputs(vp); - for (inst = vp->program.body.i; vpi->Opcode != OPCODE_END; + for (inst = vp->hw_code.body.d; vpi->Opcode != OPCODE_END; vpi++, inst += 4) { FREE_TEMPS(); @@ -1176,38 +1180,15 @@ static void r300TranslateVertexShader(struct r300_vertex_program *vp, &u_temp_i); break; default: - assert(0); + vp->error = GL_TRUE; break; } } - /* Some outputs may be artificially added, to match the inputs - of the fragment program. Blank the outputs here. */ - for (i = 0; i < VERT_RESULT_MAX; i++) { - if (vp->key.OutputsAdded & (1 << i)) { - inst[0] = PVS_OP_DST_OPERAND(VE_ADD, - GL_FALSE, - GL_FALSE, - vp->outputs[i], - VSF_FLAG_ALL, - PVS_DST_REG_OUT); - inst[1] = __CONST(0, SWIZZLE_ZERO); - inst[2] = __CONST(0, SWIZZLE_ZERO); - inst[3] = __CONST(0, SWIZZLE_ZERO); - inst += 4; - } + vp->hw_code.length = (inst - vp->hw_code.body.d); + if (vp->hw_code.length >= VSF_MAX_FRAGMENT_LENGTH) { + vp->error = GL_TRUE; } - - vp->program.length = (inst - vp->program.body.i); - if (vp->program.length >= VSF_MAX_FRAGMENT_LENGTH) { - vp->program.length = 0; - vp->native = GL_FALSE; - } -#if 0 - fprintf(stderr, "hw program:\n"); - for (i = 0; i < vp->program.length; i++) - fprintf(stderr, "%08x\n", vp->program.body.d[i]); -#endif } /* DP4 version seems to trigger some hw peculiarity */ @@ -1386,6 +1367,49 @@ static struct r300_vertex_program *build_program(struct r300_vertex_program_key pos_as_texcoord(vp, &mesa_vp->Base); } + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "Vertex program after native rewrite:\n"); + _mesa_print_program(&mesa_vp->Base); + fflush(stdout); + } + + /* Some outputs may be artificially added, to match the inputs of the fragment program. + * Issue 16 of vertex program spec says that all vertex attributes that are unwritten by + * vertex program are undefined, so just use MOV [vertex_result], CONST[0] + */ + { + int i, count = 0; + for (i = 0; i < VERT_RESULT_MAX; ++i) { + if (vp->key.OutputsAdded & (1 << i)) { + ++count; + } + } + + if (count > 0) { + struct prog_instruction *inst; + + _mesa_insert_instructions(&mesa_vp->Base, mesa_vp->Base.NumInstructions - 1, count); + inst = &mesa_vp->Base.Instructions[mesa_vp->Base.NumInstructions - 1 - count]; + + for (i = 0; i < VERT_RESULT_MAX; ++i) { + if (vp->key.OutputsAdded & (1 << i)) { + inst->Opcode = OPCODE_MOV; + + inst->DstReg.File = PROGRAM_OUTPUT; + inst->DstReg.Index = i; + inst->DstReg.WriteMask = WRITEMASK_XYZW; + inst->DstReg.CondMask = COND_TR; + + inst->SrcReg[0].File = PROGRAM_CONSTANT; + inst->SrcReg[0].Index = 0; + inst->SrcReg[0].Swizzle = SWIZZLE_XYZW; + + ++inst; + } + } + } + } + assert(mesa_vp->Base.NumInstructions); vp->num_temporaries = mesa_vp->Base.NumTemporaries; r300TranslateVertexShader(vp, mesa_vp->Base.Instructions); @@ -1432,7 +1456,12 @@ void r300SelectVertexShader(r300ContextPtr r300) wpos_idx = i; } - add_outputs(&wanted_key, VERT_RESULT_HPOS); + if (vpc->mesa_program.IsPositionInvariant) { + wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); + wanted_key.OutputsWritten |= (1 << VERT_RESULT_HPOS); + } else { + add_outputs(&wanted_key, VERT_RESULT_HPOS); + } if (InputsRead & FRAG_BIT_COL0) { add_outputs(&wanted_key, VERT_RESULT_COL0); @@ -1442,27 +1471,103 @@ void r300SelectVertexShader(r300ContextPtr r300) add_outputs(&wanted_key, VERT_RESULT_COL1); } + if (InputsRead & FRAG_BIT_FOGC) { + add_outputs(&wanted_key, VERT_RESULT_FOGC); + } + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { if (InputsRead & (FRAG_BIT_TEX0 << i)) { add_outputs(&wanted_key, VERT_RESULT_TEX0 + i); } } - if (vpc->mesa_program.IsPositionInvariant) { - /* we wan't position don't we ? */ - wanted_key.InputsRead |= (1 << VERT_ATTRIB_POS); - } - for (vp = vpc->progs; vp; vp = vp->next) if (_mesa_memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) { r300->selected_vp = vp; return; } - //_mesa_print_program(&vpc->mesa_program.Base); + + if (RADEON_DEBUG & DEBUG_VERTS) { + fprintf(stderr, "Initial vertex program:\n"); + _mesa_print_program(&vpc->mesa_program.Base); + fflush(stdout); + } vp = build_program(&wanted_key, &vpc->mesa_program, wpos_idx); vp->next = vpc->progs; vpc->progs = vp; r300->selected_vp = vp; } + +#define bump_vpu_count(ptr, new_count) do { \ + drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \ + int _nc=(new_count)/4; \ + assert(_nc < 256); \ + if(_nc>_p->vpu.count)_p->vpu.count=_nc; \ + } while(0) + +static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_shader_hw_code *code) +{ + int i; + + assert((code->length > 0) && (code->length % 4 == 0)); + + switch ((dest >> 8) & 0xf) { + case 0: + R300_STATECHANGE(r300, vpi); + for (i = 0; i < code->length; i++) + r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff)); + break; + case 2: + R300_STATECHANGE(r300, vpp); + for (i = 0; i < code->length; i++) + r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff)); + break; + case 4: + R300_STATECHANGE(r300, vps); + for (i = 0; i < code->length; i++) + r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]); + bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff)); + break; + default: + fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest); + _mesa_exit(-1); + } +} + +void r300SetupVertexProgram(r300ContextPtr rmesa) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + struct r300_vertex_program *prog = rmesa->selected_vp; + int inst_count = 0; + int param_count = 0; + + /* Reset state, in case we don't use something */ + ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0; + ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0; + + R300_STATECHANGE(rmesa, vpp); + param_count = r300VertexProgUpdateParams(ctx, + (struct r300_vertex_program_cont *) + ctx->VertexProgram._Current, + (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]); + bump_vpu_count(rmesa->hw.vpp.cmd, param_count); + param_count /= 4; + + r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->hw_code)); + inst_count = (prog->hw_code.length / 4) - 1; + + r300VapCntl(rmesa, _mesa_bitcount(prog->key.InputsRead), + _mesa_bitcount(prog->key.OutputsWritten), prog->num_temporaries); + + R300_STATECHANGE(rmesa, pvs); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) | + (inst_count << R300_PVS_LAST_INST_SHIFT); + + rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | (param_count << R300_PVS_MAX_CONST_ADDR_SHIFT); + rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT); +} diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h index 2f35f02bc8..b552e3fb1b 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.h +++ b/src/mesa/drivers/dri/r300/r300_vertprog.h @@ -32,4 +32,6 @@ #endif +void r300SetupVertexProgram(r300ContextPtr rmesa); + #endif diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 292573de89..4d58cf2162 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -27,10 +27,6 @@ #include "r500_fragprog.h" -#include "radeon_nqssadce.h" -#include "radeon_program_alu.h" - - static void reset_srcreg(struct prog_src_register* reg) { _mesa_bzero(reg, sizeof(*reg)); @@ -58,12 +54,12 @@ static struct prog_src_register shadow_ambient(struct gl_program *program, int t * - introduce a temporary register when write masks are needed * */ -static GLboolean transform_TEX( +GLboolean r500_transform_TEX( struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data) { - struct r500_fragment_program_compiler *compiler = - (struct r500_fragment_program_compiler*)data; + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; struct prog_instruction inst = *orig_inst; struct prog_instruction* tgt; GLboolean destredirect = GL_FALSE; @@ -188,121 +184,7 @@ static GLboolean transform_TEX( return GL_TRUE; } - -static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp) -{ - struct gl_fragment_program *mp = &fp->mesa_program; - - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (mp->Base.Parameters) - _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); -} - - -/** - * Transform the program to support fragment.position. - * - * Introduce a small fragment at the start of the program that will be - * the only code that directly reads the FRAG_ATTRIB_WPOS input. - * All other code pieces that reference that input will be rewritten - * to read from a newly allocated temporary. - * - * \todo if/when r5xx supports the radeon_program architecture, this is a - * likely candidate for code sharing. - */ -static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) -{ - GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; - - if (!(InputsRead & FRAG_BIT_WPOS)) - return; - - static gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 - }; - struct prog_instruction *fpi; - GLuint window_index; - int i = 0; - GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY); - - _mesa_insert_instructions(compiler->program, 0, 3); - fpi = compiler->program->Instructions; - - /* perspective divide */ - fpi[i].Opcode = OPCODE_RCP; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_W; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; - i++; - - fpi[i].Opcode = OPCODE_MUL; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_INPUT; - fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; - fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; - - fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[1].Index = tempregi; - fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; - i++; - - /* viewport transformation */ - window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens); - - fpi[i].Opcode = OPCODE_MAD; - - fpi[i].DstReg.File = PROGRAM_TEMPORARY; - fpi[i].DstReg.Index = tempregi; - fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; - fpi[i].DstReg.CondMask = COND_TR; - - fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[0].Index = tempregi; - fpi[i].SrcReg[0].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[1].Index = window_index; - fpi[i].SrcReg[1].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - - fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; - fpi[i].SrcReg[2].Index = window_index; - fpi[i].SrcReg[2].Swizzle = - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); - i++; - - for (; i < compiler->program->NumInstructions; ++i) { - int reg; - for (reg = 0; reg < 3; reg++) { - if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && - fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { - fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; - fpi[i].SrcReg[reg].Index = tempregi; - } - } - } -} - - -static void nqssadce_init(struct nqssadce_state* s) -{ - s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW; - s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W; -} - -static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) +GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg) { GLuint relevant; int i; @@ -314,22 +196,20 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) if (reg.Abs) return GL_FALSE; + if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE)) + return GL_FALSE; + if (reg.Negate) reg.Negate ^= NEGATE_XYZW; - if (opcode == OPCODE_KIL) { - if (reg.Swizzle != SWIZZLE_NOOP) - return GL_FALSE; - } else { - for(i = 0; i < 4; ++i) { - GLuint swz = GET_SWZ(reg.Swizzle, i); - if (swz == SWIZZLE_NIL) { - reg.Negate &= ~(1 << i); - continue; - } - if (swz >= 4) - return GL_FALSE; + for(i = 0; i < 4; ++i) { + GLuint swz = GET_SWZ(reg.Swizzle, i); + if (swz == SWIZZLE_NIL) { + reg.Negate &= ~(1 << i); + continue; } + if (swz >= 4) + return GL_FALSE; } if (reg.Negate) @@ -367,8 +247,7 @@ static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg) * The only thing we *cannot* do in an ALU instruction is per-component * negation. Therefore, we split the MOV into two instructions when necessary. */ -static void nqssadce_build_swizzle(struct nqssadce_state *s, - struct prog_dst_register dst, struct prog_src_register src) +void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src) { struct prog_instruction *inst; GLuint negatebase[2] = { 0, 0 }; @@ -392,129 +271,12 @@ static void nqssadce_build_swizzle(struct nqssadce_state *s, inst->DstReg = dst; inst->DstReg.WriteMask = negatebase[i]; inst->SrcReg[0] = src; + inst->SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW; inst++; s->IP++; } } -static GLuint build_dtm(GLuint depthmode) -{ - switch(depthmode) { - default: - case GL_LUMINANCE: return 0; - case GL_INTENSITY: return 1; - case GL_ALPHA: return 2; - } -} - -static GLuint build_func(GLuint comparefunc) -{ - return comparefunc - GL_NEVER; -} - - -/** - * Collect all external state that is relevant for compiling the given - * fragment program. - */ -static void build_state( - r300ContextPtr r300, - struct r500_fragment_program *fp, - struct r500_fragment_program_external_state *state) -{ - int unit; - - _mesa_bzero(state, sizeof(*state)); - - for(unit = 0; unit < 16; ++unit) { - if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { - struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; - - state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); - state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); - } - } -} - -static void dump_program(struct r500_fragment_program_code *code); - -void r500TranslateFragmentShader(r300ContextPtr r300, - struct r500_fragment_program *fp) -{ - struct r500_fragment_program_external_state state; - - build_state(r300, fp, &state); - if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { - /* TODO: cache compiled programs */ - fp->translated = GL_FALSE; - _mesa_memcpy(&fp->state, &state, sizeof(state)); - } - - if (!fp->translated) { - struct r500_fragment_program_compiler compiler; - - compiler.r300 = r300; - compiler.fp = fp; - compiler.code = &fp->code; - compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: Initial program:\n"); - _mesa_print_program(compiler.program); - } - - insert_WPOS_trailer(&compiler); - - struct radeon_program_transformation transformations[] = { - { &transform_TEX, &compiler }, - { &radeonTransformALU, 0 }, - { &radeonTransformDeriv, 0 }, - { &radeonTransformTrigScale, 0 } - }; - radeonLocalTransform(r300->radeon.glCtx, compiler.program, - 4, transformations); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after native rewrite:\n"); - _mesa_print_program(compiler.program); - } - - struct radeon_nqssadce_descr nqssadce = { - .Init = &nqssadce_init, - .IsNativeSwizzle = &is_native_swizzle, - .BuildSwizzle = &nqssadce_build_swizzle, - .RewriteDepthOut = GL_TRUE - }; - radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - _mesa_printf("Compiler: after NqSSA-DCE:\n"); - _mesa_print_program(compiler.program); - } - - fp->translated = r500FragmentProgramEmit(&compiler); - - /* Subtle: Rescue any parameters that have been added during transformations */ - _mesa_free_parameter_list(fp->mesa_program.Base.Parameters); - fp->mesa_program.Base.Parameters = compiler.program->Parameters; - compiler.program->Parameters = 0; - - _mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0); - - r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); - - if (RADEON_DEBUG & DEBUG_PIXEL) { - if (fp->translated) { - _mesa_printf("Machine-readable code:\n"); - dump_program(&fp->code); - } - } - - } - - update_params(r300, fp); - -} static char *toswiz(int swiz_val) { switch(swiz_val) { @@ -613,9 +375,9 @@ static char *to_texop(int val) return NULL; } -static void dump_program(struct r500_fragment_program_code *code) +void r500FragmentProgramDump(union rX00_fragment_program_code *c) { - + struct r500_fragment_program_code *code = &c->r500; fprintf(stderr, "R500 Fragment Program:\n--------\n"); int n; diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h index 1e45538f80..1179bf6607 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -33,30 +33,20 @@ #ifndef __R500_FRAGPROG_H_ #define __R500_FRAGPROG_H_ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" #include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/program.h" #include "shader/prog_instruction.h" #include "r300_context.h" -#include "r300_state.h" -#include "radeon_program.h" +#include "radeon_nqssadce.h" -struct r500_fragment_program; +extern GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler); -extern void r500TranslateFragmentShader(r300ContextPtr r300, - struct r500_fragment_program *fp); +extern void r500FragmentProgramDump(union rX00_fragment_program_code *c); -struct r500_fragment_program_compiler { - r300ContextPtr r300; - struct r500_fragment_program *fp; - struct r500_fragment_program_code *code; - struct gl_program *program; -}; +extern GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg); -extern GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler); +extern void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src); + +extern GLboolean r500_transform_TEX(struct radeon_transform_context *t, struct prog_instruction* orig_inst, void* data); #endif diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c index 4631235f0d..30f4514897 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -49,8 +49,8 @@ #define PROG_CODE \ - struct r500_fragment_program_compiler *c = (struct r500_fragment_program_compiler*)data; \ - struct r500_fragment_program_code *code = c->code + struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ + struct r500_fragment_program_code *code = &c->code->r500 #define error(fmt, args...) do { \ fprintf(stderr, "%s::%s(): " fmt "\n", \ @@ -72,7 +72,7 @@ static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex } if (*hwindex >= code->const_nr) { - if (*hwindex >= PFS_NUM_CONST_REGS) { + if (*hwindex >= R500_PFS_NUM_CONST_REGS) { error("Out of hw constants!\n"); return GL_FALSE; } @@ -299,9 +299,9 @@ static const struct radeon_pair_handler pair_handler = { .MaxHwTemps = 128 }; -GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler) +GLboolean r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { - struct r500_fragment_program_code *code = compiler->code; + struct r500_fragment_program_code *code = &compiler->code->r500; _mesa_bzero(code, sizeof(*code)); code->max_temp_idx = 1; diff --git a/src/mesa/drivers/dri/r300/radeon_context.c b/src/mesa/drivers/dri/r300/radeon_context.c deleted file mode 100644 index 5267fe9a77..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_context.c +++ /dev/null @@ -1,330 +0,0 @@ -/* -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/** - * \file radeon_context.c - * Common context initialization. - * - * \author Keith Whitwell <keith@tungstengraphics.com> - */ - -#include <dlfcn.h> - -#include "main/glheader.h" -#include "main/imports.h" -#include "main/context.h" -#include "main/state.h" -#include "main/matrix.h" -#include "main/framebuffer.h" - -#include "drivers/common/driverfuncs.h" -#include "swrast/swrast.h" - -#include "radeon_screen.h" -#include "radeon_ioctl.h" -#include "radeon_macros.h" -#include "radeon_reg.h" - -#include "radeon_state.h" -#include "r300_state.h" - -#include "utils.h" -#include "vblank.h" -#include "xmlpool.h" /* for symbolic values of enum-type options */ - -#define DRIVER_DATE "20060815" - - -/* Return various strings for glGetString(). - */ -static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name) -{ - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - static char buffer[128]; - - switch (name) { - case GL_VENDOR: - if (IS_R300_CLASS(radeon->radeonScreen)) - return (GLubyte *) "DRI R300 Project"; - else - return (GLubyte *) "Tungsten Graphics, Inc."; - - case GL_RENDERER: - { - unsigned offset; - GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 : - radeon->radeonScreen->AGPMode; - const char* chipname; - - if (IS_R300_CLASS(radeon->radeonScreen)) - chipname = "R300"; - else - chipname = "R200"; - - offset = driGetRendererString(buffer, chipname, DRIVER_DATE, - agp_mode); - - if (IS_R300_CLASS(radeon->radeonScreen)) { - sprintf(&buffer[offset], " %sTCL", - (radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) - ? "" : "NO-"); - } else { - sprintf(&buffer[offset], " %sTCL", - !(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE) - ? "" : "NO-"); - } - - return (GLubyte *) buffer; - } - - default: - return NULL; - } -} - -/* Initialize the driver's misc functions. - */ -static void radeonInitDriverFuncs(struct dd_function_table *functions) -{ - functions->GetString = radeonGetString; -} - - -/** - * Create and initialize all common fields of the context, - * including the Mesa context itself. - */ -GLboolean radeonInitContext(radeonContextPtr radeon, - struct dd_function_table* functions, - const __GLcontextModes * glVisual, - __DRIcontextPrivate * driContextPriv, - void *sharedContextPrivate) -{ - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; - radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); - GLcontext* ctx; - GLcontext* shareCtx; - int fthrottle_mode; - - /* Fill in additional standard functions. */ - radeonInitDriverFuncs(functions); - - radeon->radeonScreen = screen; - /* Allocate and initialize the Mesa context */ - if (sharedContextPrivate) - shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx; - else - shareCtx = NULL; - radeon->glCtx = _mesa_create_context(glVisual, shareCtx, - functions, (void *)radeon); - if (!radeon->glCtx) - return GL_FALSE; - - ctx = radeon->glCtx; - driContextPriv->driverPrivate = radeon; - - /* DRI fields */ - radeon->dri.context = driContextPriv; - radeon->dri.screen = sPriv; - radeon->dri.drawable = NULL; - radeon->dri.readable = NULL; - radeon->dri.hwContext = driContextPriv->hHWContext; - radeon->dri.hwLock = &sPriv->pSAREA->lock; - radeon->dri.fd = sPriv->fd; - radeon->dri.drmMinor = sPriv->drm_version.minor; - - radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA + - screen->sarea_priv_offset); - - /* Setup IRQs */ - fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode"); - radeon->iw.irq_seq = -1; - radeon->irqsEmitted = 0; - radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && - radeon->radeonScreen->irq); - - radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); - - if (!radeon->do_irqs) - fprintf(stderr, - "IRQ's not enabled, falling back to %s: %d %d\n", - radeon->do_usleeps ? "usleeps" : "busy waits", - fthrottle_mode, radeon->radeonScreen->irq); - - (*sPriv->systemTime->getUST) (&radeon->swap_ust); - - return GL_TRUE; -} - - -/** - * Cleanup common context fields. - * Called by r200DestroyContext/r300DestroyContext - */ -void radeonCleanupContext(radeonContextPtr radeon) -{ - /* _mesa_destroy_context() might result in calls to functions that - * depend on the DriverCtx, so don't set it to NULL before. - * - * radeon->glCtx->DriverCtx = NULL; - */ - - /* free the Mesa context */ - _mesa_destroy_context(radeon->glCtx); - - if (radeon->state.scissor.pClipRects) { - FREE(radeon->state.scissor.pClipRects); - radeon->state.scissor.pClipRects = 0; - } -} - - -/** - * Swap front and back buffer. - */ -void radeonSwapBuffers(__DRIdrawablePrivate * dPriv) -{ - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - radeonContextPtr radeon; - GLcontext *ctx; - - radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; - ctx = radeon->glCtx; - - if (ctx->Visual.doubleBufferMode) { - _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ - if (radeon->doPageFlip) { - radeonPageFlip(dPriv); - } else { - radeonCopyBuffer(dPriv, NULL); - } - } - } else { - /* XXX this shouldn't be an error but we can't handle it for now */ - _mesa_problem(NULL, "%s: drawable has no context!", - __FUNCTION__); - } -} - -void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, - int x, int y, int w, int h ) -{ - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - radeonContextPtr radeon; - GLcontext *ctx; - - radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; - ctx = radeon->glCtx; - - if (ctx->Visual.doubleBufferMode) { - drm_clip_rect_t rect; - rect.x1 = x + dPriv->x; - rect.y1 = (dPriv->h - y - h) + dPriv->y; - rect.x2 = rect.x1 + w; - rect.y2 = rect.y1 + h; - _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ - radeonCopyBuffer(dPriv, &rect); - } - } else { - /* XXX this shouldn't be an error but we can't handle it for now */ - _mesa_problem(NULL, "%s: drawable has no context!", - __FUNCTION__); - } -} - -/* Force the context `c' to be the current context and associate with it - * buffer `b'. - */ -GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv) -{ - if (driContextPriv) { - radeonContextPtr radeon = - (radeonContextPtr) driContextPriv->driverPrivate; - - if (RADEON_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s ctx %p\n", __FUNCTION__, - radeon->glCtx); - - if (radeon->dri.drawable != driDrawPriv) { - if (driDrawPriv->swap_interval == (unsigned)-1) { - driDrawPriv->vblFlags = - (radeon->radeonScreen->irq != 0) - ? driGetDefaultVBlankFlags(&radeon-> - optionCache) - : VBLANK_FLAG_NO_IRQ; - - driDrawableInitVBlank(driDrawPriv); - } - } - - radeon->dri.readable = driReadPriv; - - if (radeon->dri.drawable != driDrawPriv || - radeon->lastStamp != driDrawPriv->lastStamp) { - radeon->dri.drawable = driDrawPriv; - - radeonSetCliprects(radeon); - r300UpdateViewportOffset(radeon->glCtx); - } - - _mesa_make_current(radeon->glCtx, - (GLframebuffer *) driDrawPriv-> - driverPrivate, - (GLframebuffer *) driReadPriv-> - driverPrivate); - - _mesa_update_state(radeon->glCtx); - - radeonUpdatePageFlipping(radeon); - } else { - if (RADEON_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s ctx is null\n", __FUNCTION__); - _mesa_make_current(0, 0, 0); - } - - if (RADEON_DEBUG & DEBUG_DRI) - fprintf(stderr, "End %s\n", __FUNCTION__); - return GL_TRUE; -} - -/* Force the context `c' to be unbound from its buffer. - */ -GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv) -{ - radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; - - if (RADEON_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s ctx %p\n", __FUNCTION__, - radeon->glCtx); - - return GL_TRUE; -} - diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h index 47cbc22a72..250570f6b8 100644 --- a/src/mesa/drivers/dri/r300/radeon_context.h +++ b/src/mesa/drivers/dri/r300/radeon_context.h @@ -49,20 +49,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drm.h" #include "dri_util.h" -struct radeon_context; -typedef struct radeon_context radeonContextRec; -typedef struct radeon_context *radeonContextPtr; - -/* Rasterizing fallbacks */ -/* See correponding strings in r200_swtcl.c */ -#define RADEON_FALLBACK_TEXTURE 0x0001 -#define RADEON_FALLBACK_DRAW_BUFFER 0x0002 -#define RADEON_FALLBACK_STENCIL 0x0004 -#define RADEON_FALLBACK_RENDER_MODE 0x0008 -#define RADEON_FALLBACK_BLEND_EQ 0x0010 -#define RADEON_FALLBACK_BLEND_FUNC 0x0020 -#define RADEON_FALLBACK_DISABLE 0x0040 -#define RADEON_FALLBACK_BORDER_MODE 0x0080 +#include "radeon_screen.h" #if R200_MERGED extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode); @@ -79,155 +66,11 @@ extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode); /* TCL fallbacks */ extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode); -#define RADEON_TCL_FALLBACK_RASTER 0x0001 /* rasterization */ -#define RADEON_TCL_FALLBACK_UNFILLED 0x0002 /* unfilled tris */ -#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE 0x0004 /* twoside tris */ -#define RADEON_TCL_FALLBACK_MATERIAL 0x0008 /* material in vb */ -#define RADEON_TCL_FALLBACK_TEXGEN_0 0x0010 /* texgen, unit 0 */ -#define RADEON_TCL_FALLBACK_TEXGEN_1 0x0020 /* texgen, unit 1 */ -#define RADEON_TCL_FALLBACK_TEXGEN_2 0x0040 /* texgen, unit 2 */ -#define RADEON_TCL_FALLBACK_TEXGEN_3 0x0080 /* texgen, unit 3 */ -#define RADEON_TCL_FALLBACK_TEXGEN_4 0x0100 /* texgen, unit 4 */ -#define RADEON_TCL_FALLBACK_TEXGEN_5 0x0200 /* texgen, unit 5 */ -#define RADEON_TCL_FALLBACK_TCL_DISABLE 0x0400 /* user disable */ -#define RADEON_TCL_FALLBACK_BITMAP 0x0800 /* draw bitmap with points */ -#define RADEON_TCL_FALLBACK_VERTEX_PROGRAM 0x1000 /* vertex program active */ - #if R200_MERGED #define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode ) #else #define TCL_FALLBACK( ctx, bit, mode ) ; #endif -struct radeon_dri_mirror { - __DRIcontextPrivate *context; /* DRI context */ - __DRIscreenPrivate *screen; /* DRI screen */ - /** - * DRI drawable bound to this context for drawing. - */ - __DRIdrawablePrivate *drawable; - - /** - * DRI drawable bound to this context for reading. - */ - __DRIdrawablePrivate *readable; - - drm_context_t hwContext; - drm_hw_lock_t *hwLock; - int fd; - int drmMinor; -}; - -/** - * Derived state for internal purposes. - */ -struct radeon_scissor_state { - drm_clip_rect_t rect; - GLboolean enabled; - - GLuint numClipRects; /* Cliprects active */ - GLuint numAllocedClipRects; /* Cliprects available */ - drm_clip_rect_t *pClipRects; -}; - -struct radeon_colorbuffer_state { - GLuint clear; - GLint drawOffset, drawPitch; -}; - -struct radeon_state { - struct radeon_colorbuffer_state color; - struct radeon_scissor_state scissor; -}; - -/** - * Common per-context variables shared by R200 and R300. - * R200- and R300-specific code "derive" their own context from this - * structure. - */ -struct radeon_context { - GLcontext *glCtx; /* Mesa context */ - radeonScreenPtr radeonScreen; /* Screen private DRI data */ - - /* Fallback state */ - GLuint Fallback; - GLuint TclFallback; - - /* Page flipping */ - GLuint doPageFlip; - - /* Drawable, cliprect and scissor information */ - GLuint numClipRects; /* Cliprects for the draw buffer */ - drm_clip_rect_t *pClipRects; - unsigned int lastStamp; - GLboolean lost_context; - drm_radeon_sarea_t *sarea; /* Private SAREA data */ - - /* Mirrors of some DRI state */ - struct radeon_dri_mirror dri; - - /* Busy waiting */ - GLuint do_usleeps; - GLuint do_irqs; - GLuint irqsEmitted; - drm_radeon_irq_wait_t iw; - - /* buffer swap */ - int64_t swap_ust; - int64_t swap_missed_ust; - - GLuint swap_count; - GLuint swap_missed_count; - - /* Derived state */ - struct radeon_state state; - - /* Configuration cache - */ - driOptionCache optionCache; -}; - -#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx)) - -extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv); -extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, - int x, int y, int w, int h); -extern GLboolean radeonInitContext(radeonContextPtr radeon, - struct dd_function_table *functions, - const __GLcontextModes * glVisual, - __DRIcontextPrivate * driContextPriv, - void *sharedContextPrivate); -extern void radeonCleanupContext(radeonContextPtr radeon); -extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv); -extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv); - -/* ================================================================ - * Debugging: - */ -#define DO_DEBUG 1 - -#if DO_DEBUG -extern int RADEON_DEBUG; -#else -#define RADEON_DEBUG 0 -#endif - -#define DEBUG_TEXTURE 0x0001 -#define DEBUG_STATE 0x0002 -#define DEBUG_IOCTL 0x0004 -#define DEBUG_PRIMS 0x0008 -#define DEBUG_VERTS 0x0010 -#define DEBUG_FALLBACKS 0x0020 -#define DEBUG_VFMT 0x0040 -#define DEBUG_CODEGEN 0x0080 -#define DEBUG_VERBOSE 0x0100 -#define DEBUG_DRI 0x0200 -#define DEBUG_DMA 0x0400 -#define DEBUG_SANITY 0x0800 -#define DEBUG_SYNC 0x1000 -#define DEBUG_PIXEL 0x2000 -#define DEBUG_MEMORY 0x4000 #endif /* __RADEON_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c deleted file mode 100644 index f042a7b943..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_ioctl.c +++ /dev/null @@ -1,396 +0,0 @@ -/* -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include <sched.h> -#include <errno.h> - -#include "main/glheader.h" -#include "main/imports.h" -#include "main/macros.h" -#include "main/context.h" -#include "swrast/swrast.h" -#include "r300_context.h" -#include "radeon_ioctl.h" -#include "r300_ioctl.h" -#include "r300_state.h" -#include "radeon_reg.h" - -#include "drirenderbuffer.h" -#include "vblank.h" - -static void radeonWaitForIdle(radeonContextPtr radeon); - -/* ================================================================ - * SwapBuffers with client-side throttling - */ - -static uint32_t radeonGetLastFrame(radeonContextPtr radeon) -{ - drm_radeon_getparam_t gp; - int ret; - uint32_t frame = 0; - - gp.param = RADEON_PARAM_LAST_FRAME; - gp.value = (int *)&frame; - ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM, - &gp, sizeof(gp)); - if (ret) { - fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, - ret); - exit(1); - } - - return frame; -} - -uint32_t radeonGetAge(radeonContextPtr radeon) -{ - drm_radeon_getparam_t gp; - int ret; - uint32_t age = 0; - - gp.param = RADEON_PARAM_LAST_CLEAR; - gp.value = (int *)&age; - ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM, - &gp, sizeof(gp)); - if (ret) { - fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, - ret); - exit(1); - } - - return age; -} - -static void radeonEmitIrqLocked(radeonContextPtr radeon) -{ - drm_radeon_irq_emit_t ie; - int ret; - - ie.irq_seq = &radeon->iw.irq_seq; - ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT, - &ie, sizeof(ie)); - if (ret) { - fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, - ret); - exit(1); - } -} - -static void radeonWaitIrq(radeonContextPtr radeon) -{ - int ret; - - do { - ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT, - &radeon->iw, sizeof(radeon->iw)); - } while (ret && (errno == EINTR || errno == EBUSY)); - - if (ret) { - fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, - ret); - exit(1); - } -} - -static void radeonWaitForFrameCompletion(radeonContextPtr radeon) -{ - drm_radeon_sarea_t *sarea = radeon->sarea; - - if (radeon->do_irqs) { - if (radeonGetLastFrame(radeon) < sarea->last_frame) { - if (!radeon->irqsEmitted) { - while (radeonGetLastFrame(radeon) < - sarea->last_frame) ; - } else { - UNLOCK_HARDWARE(radeon); - radeonWaitIrq(radeon); - LOCK_HARDWARE(radeon); - } - radeon->irqsEmitted = 10; - } - - if (radeon->irqsEmitted) { - radeonEmitIrqLocked(radeon); - radeon->irqsEmitted--; - } - } else { - while (radeonGetLastFrame(radeon) < sarea->last_frame) { - UNLOCK_HARDWARE(radeon); - if (radeon->do_usleeps) - DO_USLEEP(1); - LOCK_HARDWARE(radeon); - } - } -} - -/* Copy the back color buffer to the front color buffer. - */ -void radeonCopyBuffer(__DRIdrawablePrivate * dPriv, - const drm_clip_rect_t * rect) -{ - radeonContextPtr radeon; - GLint nbox, i, ret; - GLboolean missed_target; - int64_t ust; - __DRIscreenPrivate *psp = dPriv->driScreenPriv; - - assert(dPriv); - assert(dPriv->driContextPriv); - assert(dPriv->driContextPriv->driverPrivate); - - radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; - - if (RADEON_DEBUG & DEBUG_IOCTL) { - fprintf(stderr, "\n%s( %p )\n\n", __FUNCTION__, - (void *)radeon->glCtx); - } - - r300Flush(radeon->glCtx); - - LOCK_HARDWARE(radeon); - - /* Throttle the frame rate -- only allow one pending swap buffers - * request at a time. - */ - radeonWaitForFrameCompletion(radeon); - if (!rect) - { - UNLOCK_HARDWARE(radeon); - driWaitForVBlank(dPriv, &missed_target); - LOCK_HARDWARE(radeon); - } - - nbox = dPriv->numClipRects; /* must be in locked region */ - - for (i = 0; i < nbox;) { - GLint nr = MIN2(i + RADEON_NR_SAREA_CLIPRECTS, nbox); - drm_clip_rect_t *box = dPriv->pClipRects; - drm_clip_rect_t *b = radeon->sarea->boxes; - GLint n = 0; - - for ( ; i < nr ; i++ ) { - - *b = box[i]; - - if (rect) - { - if (rect->x1 > b->x1) - b->x1 = rect->x1; - if (rect->y1 > b->y1) - b->y1 = rect->y1; - if (rect->x2 < b->x2) - b->x2 = rect->x2; - if (rect->y2 < b->y2) - b->y2 = rect->y2; - - if (b->x1 >= b->x2 || b->y1 >= b->y2) - continue; - } - - b++; - n++; - } - radeon->sarea->nbox = n; - - if (!n) - continue; - - ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_SWAP); - - if (ret) { - fprintf(stderr, "DRM_RADEON_SWAP: return = %d\n", - ret); - UNLOCK_HARDWARE(radeon); - exit(1); - } - } - - UNLOCK_HARDWARE(radeon); - if (!rect) - { - ((r300ContextPtr)radeon)->hw.all_dirty = GL_TRUE; - - radeon->swap_count++; - (*psp->systemTime->getUST) (&ust); - if (missed_target) { - radeon->swap_missed_count++; - radeon->swap_missed_ust = ust - radeon->swap_ust; - } - - radeon->swap_ust = ust; - - sched_yield(); - } -} - -void radeonPageFlip(__DRIdrawablePrivate * dPriv) -{ - radeonContextPtr radeon; - GLint ret; - GLboolean missed_target; - __DRIscreenPrivate *psp = dPriv->driScreenPriv; - - assert(dPriv); - assert(dPriv->driContextPriv); - assert(dPriv->driContextPriv->driverPrivate); - - radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; - - if (RADEON_DEBUG & DEBUG_IOCTL) { - fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__, - radeon->sarea->pfCurrentPage); - } - - r300Flush(radeon->glCtx); - LOCK_HARDWARE(radeon); - - if (!dPriv->numClipRects) { - UNLOCK_HARDWARE(radeon); - usleep(10000); /* throttle invisible client 10ms */ - return; - } - - /* Need to do this for the perf box placement: - */ - { - drm_clip_rect_t *box = dPriv->pClipRects; - drm_clip_rect_t *b = radeon->sarea->boxes; - b[0] = box[0]; - radeon->sarea->nbox = 1; - } - - /* Throttle the frame rate -- only allow a few pending swap buffers - * request at a time. - */ - radeonWaitForFrameCompletion(radeon); - UNLOCK_HARDWARE(radeon); - driWaitForVBlank(dPriv, &missed_target); - if (missed_target) { - radeon->swap_missed_count++; - (void)(*psp->systemTime->getUST) (&radeon->swap_missed_ust); - } - LOCK_HARDWARE(radeon); - - ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_FLIP); - - UNLOCK_HARDWARE(radeon); - - if (ret) { - fprintf(stderr, "DRM_RADEON_FLIP: return = %d\n", ret); - exit(1); - } - - radeon->swap_count++; - (void)(*psp->systemTime->getUST) (&radeon->swap_ust); - - driFlipRenderbuffers(radeon->glCtx->WinSysDrawBuffer, - radeon->sarea->pfCurrentPage); - - if (radeon->sarea->pfCurrentPage == 1) { - radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset; - radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch; - } else { - radeon->state.color.drawOffset = radeon->radeonScreen->backOffset; - radeon->state.color.drawPitch = radeon->radeonScreen->backPitch; - } - - if (IS_R300_CLASS(radeon->radeonScreen)) { - r300ContextPtr r300 = (r300ContextPtr)radeon; - R300_STATECHANGE(r300, cb); - r300->hw.cb.cmd[R300_CB_OFFSET] = r300->radeon.state.color.drawOffset + - r300->radeon.radeonScreen->fbLocation; - r300->hw.cb.cmd[R300_CB_PITCH] = r300->radeon.state.color.drawPitch; - - if (r300->radeon.radeonScreen->cpp == 4) - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_ARGB8888; - else - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_FORMAT_RGB565; - - if (r300->radeon.sarea->tiling_enabled) - r300->hw.cb.cmd[R300_CB_PITCH] |= R300_COLOR_TILE_ENABLE; - } -} - -void radeonWaitForIdleLocked(radeonContextPtr radeon) -{ - int ret; - int i = 0; - - do { - ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE); - if (ret) - DO_USLEEP(1); - } while (ret && ++i < 100); - - if (ret < 0) { - UNLOCK_HARDWARE(radeon); - fprintf(stderr, "Error: R300 timed out... exiting\n"); - exit(-1); - } -} - -static void radeonWaitForIdle(radeonContextPtr radeon) -{ - LOCK_HARDWARE(radeon); - radeonWaitForIdleLocked(radeon); - UNLOCK_HARDWARE(radeon); -} - -void radeonFlush(GLcontext * ctx) -{ - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - - if (IS_R300_CLASS(radeon->radeonScreen)) - r300Flush(ctx); -} - - -/* Make sure all commands have been sent to the hardware and have - * completed processing. - */ -void radeonFinish(GLcontext * ctx) -{ - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - - radeonFlush(ctx); - - if (radeon->do_irqs) { - LOCK_HARDWARE(radeon); - radeonEmitIrqLocked(radeon); - UNLOCK_HARDWARE(radeon); - radeonWaitIrq(radeon); - } else - radeonWaitForIdle(radeon); -} diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.h b/src/mesa/drivers/dri/r300/radeon_ioctl.h deleted file mode 100644 index 3add775b82..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_ioctl.h +++ /dev/null @@ -1,57 +0,0 @@ -/* -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#ifndef __RADEON_IOCTL_H__ -#define __RADEON_IOCTL_H__ - -#include "main/simple_list.h" -#include "radeon_dri.h" -#include "radeon_lock.h" - -#include "xf86drm.h" -#include "drm.h" -#if 0 -#include "r200context.h" -#endif -#include "radeon_drm.h" - -extern void radeonCopyBuffer(__DRIdrawablePrivate * drawable, - const drm_clip_rect_t * rect); -extern void radeonPageFlip(__DRIdrawablePrivate * drawable); -extern void radeonFlush(GLcontext * ctx); -extern void radeonFinish(GLcontext * ctx); -extern void radeonWaitForIdleLocked(radeonContextPtr radeon); -extern uint32_t radeonGetAge(radeonContextPtr radeon); - -#endif /* __RADEON_IOCTL_H__ */ diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c deleted file mode 100644 index 4f47afd5dc..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_lock.c +++ /dev/null @@ -1,137 +0,0 @@ -/************************************************************************** - -Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and - VA Linux Systems Inc., Fremont, California. -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Gareth Hughes <gareth@valinux.com> - * Keith Whitwell <keith@tungstengraphics.com> - * Kevin E. Martin <martin@valinux.com> - */ - -#include "radeon_lock.h" -#include "radeon_ioctl.h" -#include "radeon_state.h" -#include "r300_context.h" -#include "r300_state.h" - -#include "main/framebuffer.h" - -#include "drirenderbuffer.h" - -#if DEBUG_LOCKING -char *prevLockFile = NULL; -int prevLockLine = 0; -#endif - -/* Turn on/off page flipping according to the flags in the sarea: - */ -void radeonUpdatePageFlipping(radeonContextPtr rmesa) -{ - int use_back; - - rmesa->doPageFlip = rmesa->sarea->pfState; - if (rmesa->glCtx->WinSysDrawBuffer) { - driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, - rmesa->sarea->pfCurrentPage); - r300UpdateDrawBuffer(rmesa->glCtx); - } - - use_back = rmesa->glCtx->DrawBuffer ? - (rmesa->glCtx->DrawBuffer->_ColorDrawBufferIndexes[0] == - BUFFER_BACK_LEFT) : 1; - use_back ^= (rmesa->sarea->pfCurrentPage == 1); - - if (use_back) { - rmesa->state.color.drawOffset = - rmesa->radeonScreen->backOffset; - rmesa->state.color.drawPitch = rmesa->radeonScreen->backPitch; - } else { - rmesa->state.color.drawOffset = - rmesa->radeonScreen->frontOffset; - rmesa->state.color.drawPitch = - rmesa->radeonScreen->frontPitch; - } -} - -/* Update the hardware state. This is called if another context has - * grabbed the hardware lock, which includes the X server. This - * function also updates the driver's window state after the X server - * moves, resizes or restacks a window -- the change will be reflected - * in the drawable position and clip rects. Since the X server grabs - * the hardware lock when it changes the window state, this routine will - * automatically be called after such a change. - */ -void radeonGetLock(radeonContextPtr rmesa, GLuint flags) -{ - __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; - __DRIdrawablePrivate *const readable = rmesa->dri.readable; - __DRIscreenPrivate *sPriv = rmesa->dri.screen; - drm_radeon_sarea_t *sarea = rmesa->sarea; - r300ContextPtr r300 = (r300ContextPtr) rmesa; - - assert(drawable != NULL); - - drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags); - - /* The window might have moved, so we might need to get new clip - * rects. - * - * NOTE: This releases and regrabs the hw lock to allow the X server - * to respond to the DRI protocol request for new drawable info. - * Since the hardware state depends on having the latest drawable - * clip rects, all state checking must be done _after_ this call. - */ - DRI_VALIDATE_DRAWABLE_INFO(sPriv, drawable); - if (drawable != readable) { - DRI_VALIDATE_DRAWABLE_INFO(sPriv, readable); - } - - if (rmesa->lastStamp != drawable->lastStamp) { - radeonUpdatePageFlipping(rmesa); - radeonSetCliprects(rmesa); - r300UpdateViewportOffset(rmesa->glCtx); - driUpdateFramebufferSize(rmesa->glCtx, drawable); - } - - if (sarea->ctx_owner != rmesa->dri.hwContext) { - int i; - - sarea->ctx_owner = rmesa->dri.hwContext; - for (i = 0; i < r300->nr_heaps; i++) { - DRI_AGE_TEXTURES(r300->texture_heaps[i]); - } - } - - rmesa->lost_context = GL_TRUE; -} diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h deleted file mode 100644 index a344837f47..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_lock.h +++ /dev/null @@ -1,115 +0,0 @@ -/************************************************************************** - -Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and - VA Linux Systems Inc., Fremont, California. -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Gareth Hughes <gareth@valinux.com> - * Keith Whitwell <keith@tungstengraphics.com> - * Kevin E. Martin <martin@valinux.com> - */ - -#ifndef __RADEON_LOCK_H__ -#define __RADEON_LOCK_H__ - -#include "radeon_context.h" - -extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags); -extern void radeonUpdatePageFlipping(radeonContextPtr rmesa); - -/* Turn DEBUG_LOCKING on to find locking conflicts. - */ -#define DEBUG_LOCKING 0 - -#if DEBUG_LOCKING -extern char *prevLockFile; -extern int prevLockLine; - -#define DEBUG_LOCK() \ - do { \ - prevLockFile = (__FILE__); \ - prevLockLine = (__LINE__); \ - } while (0) - -#define DEBUG_RESET() \ - do { \ - prevLockFile = 0; \ - prevLockLine = 0; \ - } while (0) - -#define DEBUG_CHECK_LOCK() \ - do { \ - if (prevLockFile) { \ - fprintf(stderr, \ - "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \ - prevLockFile, prevLockLine, __FILE__, __LINE__); \ - exit(1); \ - } \ - } while (0) - -#else - -#define DEBUG_LOCK() -#define DEBUG_RESET() -#define DEBUG_CHECK_LOCK() - -#endif - -/* - * !!! We may want to separate locks from locks with validation. This - * could be used to improve performance for those things commands that - * do not do any drawing !!! - */ - -/* Lock the hardware and validate our state. - */ -#define LOCK_HARDWARE( rmesa ) \ - do { \ - char __ret = 0; \ - DEBUG_CHECK_LOCK(); \ - DRM_CAS((rmesa)->dri.hwLock, (rmesa)->dri.hwContext, \ - (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret); \ - if (__ret) \ - radeonGetLock((rmesa), 0); \ - DEBUG_LOCK(); \ - } while (0) - -#define UNLOCK_HARDWARE( rmesa ) \ - do { \ - DRM_UNLOCK((rmesa)->dri.fd, \ - (rmesa)->dri.hwLock, \ - (rmesa)->dri.hwContext); \ - DEBUG_RESET(); \ - } while (0) - -#endif /* __RADEON_LOCK_H__ */ diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c index 2e21f7bf66..906d36e522 100644 --- a/src/mesa/drivers/dri/r300/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c @@ -35,7 +35,7 @@ #include "radeon_program_pair.h" -#include "radeon_context.h" +#include "radeon_common.h" #include "shader/prog_print.h" @@ -47,7 +47,6 @@ struct pair_state_instruction { GLuint IsTex:1; /**< Is a texture instruction */ - GLuint IsOutput:1; /**< Is output instruction */ GLuint NeedRGB:1; /**< Needs the RGB ALU */ GLuint NeedAlpha:1; /**< Needs the Alpha ALU */ GLuint IsTranscendent:1; /**< Is a special transcendent instruction */ @@ -124,7 +123,6 @@ struct pair_state { GLboolean Debug; GLboolean Verbose; void *UserData; - GLubyte NumKillInsts; /** * Translate Mesa registers to hardware registers @@ -151,11 +149,6 @@ struct pair_state { struct pair_state_instruction *ReadyTEX; /** - * Linked list of deferred instructions - */ - struct pair_state_instruction *DeferredInsts; - - /** * Pool of @ref reg_value structures for fast allocation. */ struct reg_value *ValuePool; @@ -238,9 +231,7 @@ static void instruction_ready(struct pair_state *s, int ip) if (s->Verbose) _mesa_printf("instruction_ready(%i)\n", ip); - if (s->NumKillInsts > 0 && pairinst->IsOutput) - add_pairinst_to_list(&s->DeferredInsts, pairinst); - else if (pairinst->IsTex) + if (pairinst->IsTex) add_pairinst_to_list(&s->ReadyTEX, pairinst); else if (!pairinst->NeedAlpha) add_pairinst_to_list(&s->ReadyRGB, pairinst); @@ -348,8 +339,6 @@ static void classify_instruction(struct pair_state *s, error("Unknown opcode %d\n", inst->Opcode); break; } - - pairinst->IsOutput = (inst->DstReg.File == PROGRAM_OUTPUT); } @@ -613,16 +602,14 @@ static void emit_all_tex(struct pair_state *s) struct prog_instruction *inst = s->Program->Instructions + ip; commit_instruction(s, ip); - if (inst->Opcode == OPCODE_KIL) - --s->NumKillInsts; - else + if (inst->Opcode != OPCODE_KIL) inst->DstReg.Index = get_hw_reg(s, inst->DstReg.File, inst->DstReg.Index); - inst->SrcReg[0].Index = get_hw_reg(s, inst->SrcReg[0].File, inst->SrcReg[0].Index); if (s->Debug) { _mesa_printf(" "); _mesa_print_instruction(inst); + fflush(stdout); } s->Error = s->Error || !s->Handler->EmitTex(s->UserData, inst); } @@ -875,17 +862,6 @@ static void emit_alu(struct pair_state *s) s->Error = s->Error || !s->Handler->EmitPaired(s->UserData, &pair); } -static GLubyte countKillInsts(struct gl_program *prog) -{ - GLubyte i, count = 0; - - for (i = 0; i < prog->NumInstructions; ++i) { - if (prog->Instructions[i].Opcode == OPCODE_KIL) - ++count; - } - - return count; -} GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, const struct radeon_pair_handler* handler, void *userdata) @@ -899,7 +875,6 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, s.UserData = userdata; s.Debug = (RADEON_DEBUG & DEBUG_PIXEL) ? GL_TRUE : GL_FALSE; s.Verbose = GL_FALSE && s.Debug; - s.NumKillInsts = countKillInsts(program); s.Instructions = (struct pair_state_instruction*)_mesa_calloc( sizeof(struct pair_state_instruction)*s.Program->NumInstructions); @@ -918,21 +893,6 @@ GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program, if (s.ReadyTEX) emit_all_tex(&s); - if (!s.NumKillInsts) { - struct pair_state_instruction *pairinst = s.DeferredInsts; - while (pairinst) { - if (!pairinst->NeedAlpha) - add_pairinst_to_list(&s.ReadyRGB, pairinst); - else if (!pairinst->NeedRGB) - add_pairinst_to_list(&s.ReadyAlpha, pairinst); - else - add_pairinst_to_list(&s.ReadyFullALU, pairinst); - - pairinst = pairinst->NextReady; - } - s.DeferredInsts = NULL; - } - while(s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha) emit_alu(&s); } diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c deleted file mode 100644 index 16f9fb99e6..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_span.c +++ /dev/null @@ -1,349 +0,0 @@ -/************************************************************************** - -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. -Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and - VA Linux Systems Inc., Fremont, California. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Kevin E. Martin <martin@valinux.com> - * Gareth Hughes <gareth@valinux.com> - * Keith Whitwell <keith@tungstengraphics.com> - * - */ - -#include "main/glheader.h" -#include "swrast/swrast.h" - -#include "r300_state.h" -#include "radeon_ioctl.h" -#include "r300_ioctl.h" -#include "radeon_span.h" - -#include "drirenderbuffer.h" - -#define DBG 0 - -/* - * Note that all information needed to access pixels in a renderbuffer - * should be obtained through the gl_renderbuffer parameter, not per-context - * information. - */ -#define LOCAL_VARS \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - const __DRIdrawablePrivate *dPriv = drb->dPriv; \ - const GLuint bottom = dPriv->h - 1; \ - GLubyte *buf = (GLubyte *) drb->flippedData \ - + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ - GLuint p; \ - (void) p; - -#define LOCAL_DEPTH_VARS \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - const __DRIdrawablePrivate *dPriv = drb->dPriv; \ - const GLuint bottom = dPriv->h - 1; \ - GLuint xo = dPriv->x; \ - GLuint yo = dPriv->y; \ - GLubyte *buf = (GLubyte *) drb->Base.Data; - -#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS - -#define Y_FLIP(Y) (bottom - (Y)) - -#define HW_LOCK() - -#define HW_UNLOCK() - -/* ================================================================ - * Color buffer - */ - -/* 16 bit, RGB565 color spanline and pixel functions - */ -#define SPANTMP_PIXEL_FMT GL_RGB -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 - -#define TAG(x) radeon##x##_RGB565 -#define TAG2(x,y) radeon##x##_RGB565##y -#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) -#include "spantmp2.h" - -/* 32 bit, ARGB8888 color spanline and pixel functions - */ -#define SPANTMP_PIXEL_FMT GL_BGRA -#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV - -#define TAG(x) radeon##x##_ARGB8888 -#define TAG2(x,y) radeon##x##_ARGB8888##y -#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) -#include "spantmp2.h" - -/* ================================================================ - * Depth buffer - */ - -/* The Radeon family has depth tiling on all the time, so we have to convert - * the x,y coordinates into the memory bus address (mba) in the same - * manner as the engine. In each case, the linear block address (ba) - * is calculated, and then wired with x and y to produce the final - * memory address. - * The chip will do address translation on its own if the surface registers - * are set up correctly. It is not quite enough to get it working with hyperz - * too... - */ - -static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y) -{ - GLuint pitch = drb->pitch; - if (drb->depthHasSurface) { - return 4 * (x + y * pitch); - } else { - GLuint ba, address = 0; /* a[0..1] = 0 */ - -#ifdef COMPILE_R300 - ba = (y / 8) * (pitch / 8) + (x / 8); -#else - ba = (y / 16) * (pitch / 16) + (x / 16); -#endif - - address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */ - address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */ - address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */ - address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ - - address |= (y & 0x8) << 7; /* a[10] = y[3] */ - address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */ - address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ - - return address; - } -} - -static INLINE GLuint -radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) -{ - GLuint pitch = drb->pitch; - if (drb->depthHasSurface) { - return 2 * (x + y * pitch); - } else { - GLuint ba, address = 0; /* a[0] = 0 */ - - ba = (y / 16) * (pitch / 32) + (x / 32); - - address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */ - address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */ - address |= (x & 0x8) << 4; /* a[7] = x[3] */ - address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ - address |= (y & 0x8) << 7; /* a[10] = y[3] */ - address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */ - address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ - - return address; - } -} - -/* 16-bit depth buffer functions - */ -#define VALUE_TYPE GLushort - -#define WRITE_DEPTH( _x, _y, d ) \ - *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d; - -#define READ_DEPTH( d, _x, _y ) \ - d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )); - -#define TAG(x) radeon##x##_z16 -#include "depthtmp.h" - -/* 24 bit depth, 8 bit stencil depthbuffer functions - * - * Careful: It looks like the R300 uses ZZZS byte order while the R200 - * uses SZZZ for 24 bit depth, 8 bit stencil mode. - */ -#define VALUE_TYPE GLuint - -#ifdef COMPILE_R300 -#define WRITE_DEPTH( _x, _y, d ) \ -do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - tmp &= 0x000000ff; \ - tmp |= ((d << 8) & 0xffffff00); \ - *(GLuint *)(buf + offset) = tmp; \ -} while (0) -#else -#define WRITE_DEPTH( _x, _y, d ) \ -do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - tmp &= 0xff000000; \ - tmp |= ((d) & 0x00ffffff); \ - *(GLuint *)(buf + offset) = tmp; \ -} while (0) -#endif - -#ifdef COMPILE_R300 -#define READ_DEPTH( d, _x, _y ) \ - do { \ - d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ - _y + yo )) & 0xffffff00) >> 8; \ - }while(0) -#else -#define READ_DEPTH( d, _x, _y ) \ - d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ - _y + yo )) & 0x00ffffff; -#endif - -#define TAG(x) radeon##x##_z24_s8 -#include "depthtmp.h" - -/* ================================================================ - * Stencil buffer - */ - -/* 24 bit depth, 8 bit stencil depthbuffer functions - */ -#ifdef COMPILE_R300 -#define WRITE_STENCIL( _x, _y, d ) \ -do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - tmp &= 0xffffff00; \ - tmp |= (d) & 0xff; \ - *(GLuint *)(buf + offset) = tmp; \ -} while (0) -#else -#define WRITE_STENCIL( _x, _y, d ) \ -do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - tmp &= 0x00ffffff; \ - tmp |= (((d) & 0xff) << 24); \ - *(GLuint *)(buf + offset) = tmp; \ -} while (0) -#endif - -#ifdef COMPILE_R300 -#define READ_STENCIL( d, _x, _y ) \ -do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - d = tmp & 0x000000ff; \ -} while (0) -#else -#define READ_STENCIL( d, _x, _y ) \ -do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ - d = (tmp & 0xff000000) >> 24; \ -} while (0) -#endif - -#define TAG(x) radeon##x##_z24_s8 -#include "stenciltmp.h" - -/* Move locking out to get reasonable span performance (10x better - * than doing this in HW_LOCK above). WaitForIdle() is the main - * culprit. - */ - -static void radeonSpanRenderStart(GLcontext * ctx) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); -#ifdef COMPILE_R300 - r300ContextPtr r300 = (r300ContextPtr) rmesa; - R300_FIREVERTICES(r300); -#else - RADEON_FIREVERTICES(rmesa); -#endif - LOCK_HARDWARE(rmesa); - radeonWaitForIdleLocked(rmesa); - - /* Read the first pixel in the frame buffer. This should - * be a noop, right? In fact without this conform fails as reading - * from the framebuffer sometimes produces old results -- the - * on-card read cache gets mixed up and doesn't notice that the - * framebuffer has been updated. - * - * Note that we should probably be reading some otherwise unused - * region of VRAM, otherwise we might get incorrect results when - * reading pixels from the top left of the screen. - * - * I found this problem on an R420 with glean's texCube test. - * Note that the R200 span code also *writes* the first pixel in the - * framebuffer, but I've found this to be unnecessary. - * -- Nicolai Hähnle, June 2008 - */ - { - int p; - driRenderbuffer *drb = - (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0]; - volatile int *buf = - (volatile int *)(rmesa->dri.screen->pFB + drb->offset); - p = *buf; - } -} - -static void radeonSpanRenderFinish(GLcontext * ctx) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - _swrast_flush(ctx); - UNLOCK_HARDWARE(rmesa); -} - -void radeonInitSpanFuncs(GLcontext * ctx) -{ - struct swrast_device_driver *swdd = - _swrast_GetDeviceDriverReference(ctx); - swdd->SpanRenderStart = radeonSpanRenderStart; - swdd->SpanRenderFinish = radeonSpanRenderFinish; -} - -/** - * Plug in the Get/Put routines for the given driRenderbuffer. - */ -void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis) -{ - if (drb->Base.InternalFormat == GL_RGBA) { - if (vis->redBits == 5 && vis->greenBits == 6 - && vis->blueBits == 5) { - radeonInitPointers_RGB565(&drb->Base); - } else { - radeonInitPointers_ARGB8888(&drb->Base); - } - } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { - radeonInitDepthPointers_z16(&drb->Base); - } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { - radeonInitDepthPointers_z24_s8(&drb->Base); - } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { - radeonInitStencilPointers_z24_s8(&drb->Base); - } -} diff --git a/src/mesa/drivers/dri/r300/radeon_state.c b/src/mesa/drivers/dri/r300/radeon_state.c deleted file mode 100644 index c401da6c54..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_state.c +++ /dev/null @@ -1,244 +0,0 @@ -/************************************************************************** - -Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. - -The Weather Channel (TM) funded Tungsten Graphics to develop the -initial release of the Radeon 8500 driver under the XFree86 license. -This notice must be preserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - */ - -#include "main/glheader.h" -#include "main/imports.h" -#include "main/api_arrayelt.h" -#include "main/enums.h" -#include "main/framebuffer.h" -#include "main/colormac.h" -#include "main/light.h" - -#include "swrast/swrast.h" -#include "vbo/vbo.h" -#include "tnl/tnl.h" -#include "tnl/t_pipeline.h" -#include "swrast_setup/swrast_setup.h" - -#include "radeon_ioctl.h" -#include "radeon_state.h" -#include "r300_ioctl.h" - - -/* ============================================================= - * Scissoring - */ - -static GLboolean intersect_rect(drm_clip_rect_t * out, - drm_clip_rect_t * a, drm_clip_rect_t * b) -{ - *out = *a; - if (b->x1 > out->x1) - out->x1 = b->x1; - if (b->y1 > out->y1) - out->y1 = b->y1; - if (b->x2 < out->x2) - out->x2 = b->x2; - if (b->y2 < out->y2) - out->y2 = b->y2; - if (out->x1 >= out->x2) - return GL_FALSE; - if (out->y1 >= out->y2) - return GL_FALSE; - return GL_TRUE; -} - -void radeonRecalcScissorRects(radeonContextPtr radeon) -{ - drm_clip_rect_t *out; - int i; - - /* Grow cliprect store? - */ - if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) { - while (radeon->state.scissor.numAllocedClipRects < - radeon->numClipRects) { - radeon->state.scissor.numAllocedClipRects += 1; /* zero case */ - radeon->state.scissor.numAllocedClipRects *= 2; - } - - if (radeon->state.scissor.pClipRects) - FREE(radeon->state.scissor.pClipRects); - - radeon->state.scissor.pClipRects = - MALLOC(radeon->state.scissor.numAllocedClipRects * - sizeof(drm_clip_rect_t)); - - if (radeon->state.scissor.pClipRects == NULL) { - radeon->state.scissor.numAllocedClipRects = 0; - return; - } - } - - out = radeon->state.scissor.pClipRects; - radeon->state.scissor.numClipRects = 0; - - for (i = 0; i < radeon->numClipRects; i++) { - if (intersect_rect(out, - &radeon->pClipRects[i], - &radeon->state.scissor.rect)) { - radeon->state.scissor.numClipRects++; - out++; - } - } -} - -void radeonUpdateScissor(GLcontext* ctx) -{ - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - - if (radeon->dri.drawable) { - __DRIdrawablePrivate *dPriv = radeon->dri.drawable; - int x1 = dPriv->x + ctx->Scissor.X; - int y1 = dPriv->y + dPriv->h - (ctx->Scissor.Y + ctx->Scissor.Height); - - radeon->state.scissor.rect.x1 = x1; - radeon->state.scissor.rect.y1 = y1; - radeon->state.scissor.rect.x2 = x1 + ctx->Scissor.Width; - radeon->state.scissor.rect.y2 = y1 + ctx->Scissor.Height; - - radeonRecalcScissorRects(radeon); - } -} - -static void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h) -{ - if (ctx->Scissor.Enabled) { - /* We don't pipeline cliprect changes */ - r300Flush(ctx); - radeonUpdateScissor(ctx); - } -} - - -/** - * Update cliprects and scissors. - */ -void radeonSetCliprects(radeonContextPtr radeon) -{ - __DRIdrawablePrivate *const drawable = radeon->dri.drawable; - __DRIdrawablePrivate *const readable = radeon->dri.readable; - GLframebuffer *const draw_fb = (GLframebuffer*)drawable->driverPrivate; - GLframebuffer *const read_fb = (GLframebuffer*)readable->driverPrivate; - - if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { - /* Can't ignore 2d windows if we are page flipping. */ - if (drawable->numBackClipRects == 0 || radeon->doPageFlip || - radeon->sarea->pfCurrentPage == 1) { - radeon->numClipRects = drawable->numClipRects; - radeon->pClipRects = drawable->pClipRects; - } else { - radeon->numClipRects = drawable->numBackClipRects; - radeon->pClipRects = drawable->pBackClipRects; - } - } else { - /* front buffer (or none, or multiple buffers */ - radeon->numClipRects = drawable->numClipRects; - radeon->pClipRects = drawable->pClipRects; - } - - if ((draw_fb->Width != drawable->w) || - (draw_fb->Height != drawable->h)) { - _mesa_resize_framebuffer(radeon->glCtx, draw_fb, - drawable->w, drawable->h); - draw_fb->Initialized = GL_TRUE; - } - - if (drawable != readable) { - if ((read_fb->Width != readable->w) || - (read_fb->Height != readable->h)) { - _mesa_resize_framebuffer(radeon->glCtx, read_fb, - readable->w, readable->h); - read_fb->Initialized = GL_TRUE; - } - } - - if (radeon->state.scissor.enabled) - radeonRecalcScissorRects(radeon); - - radeon->lastStamp = drawable->lastStamp; -} - - -/** - * Handle common enable bits. - * Called as a fallback by r200Enable/r300Enable. - */ -void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state) -{ - radeonContextPtr radeon = RADEON_CONTEXT(ctx); - - switch(cap) { - case GL_SCISSOR_TEST: - /* We don't pipeline cliprect & scissor changes */ - r300Flush(ctx); - - radeon->state.scissor.enabled = state; - radeonUpdateScissor(ctx); - break; - - default: - return; - } -} - - -/** - * Initialize default state. - * This function is called once at context init time from - * r200InitState/r300InitState - */ -void radeonInitState(radeonContextPtr radeon) -{ - radeon->Fallback = 0; - - if (radeon->glCtx->Visual.doubleBufferMode && radeon->sarea->pfCurrentPage == 0) { - radeon->state.color.drawOffset = radeon->radeonScreen->backOffset; - radeon->state.color.drawPitch = radeon->radeonScreen->backPitch; - } else { - radeon->state.color.drawOffset = radeon->radeonScreen->frontOffset; - radeon->state.color.drawPitch = radeon->radeonScreen->frontPitch; - } -} - - -/** - * Initialize common state functions. - * Called by r200InitStateFuncs/r300InitStateFuncs - */ -void radeonInitStateFuncs(struct dd_function_table *functions) -{ - functions->Scissor = radeonScissor; -} diff --git a/src/mesa/drivers/dri/r300/radeon_state.h b/src/mesa/drivers/dri/r300/radeon_state.h deleted file mode 100644 index 821cb40c7e..0000000000 --- a/src/mesa/drivers/dri/r300/radeon_state.h +++ /dev/null @@ -1,43 +0,0 @@ -/* -Copyright (C) 2004 Nicolai Haehnle. All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Nicolai Haehnle <prefect_@gmx.net> - */ - -#ifndef __RADEON_STATE_H__ -#define __RADEON_STATE_H__ - -extern void radeonRecalcScissorRects(radeonContextPtr radeon); -extern void radeonSetCliprects(radeonContextPtr radeon); -extern void radeonUpdateScissor(GLcontext* ctx); - -extern void radeonEnable(GLcontext* ctx, GLenum cap, GLboolean state); - -extern void radeonInitState(radeonContextPtr radeon); -extern void radeonInitStateFuncs(struct dd_function_table* functions); - -#endif diff --git a/src/mesa/drivers/dri/radeon/Makefile b/src/mesa/drivers/dri/radeon/Makefile index f223b2d922..ba409ba813 100644 --- a/src/mesa/drivers/dri/radeon/Makefile +++ b/src/mesa/drivers/dri/radeon/Makefile @@ -4,25 +4,37 @@ TOP = ../../../../.. include $(TOP)/configs/current +CFLAGS += $(RADEON_CFLAGS) + LIBNAME = radeon_dri.so MINIGLX_SOURCES = server/radeon_dri.c +RADEON_COMMON_SOURCES = \ + radeon_texture.c \ + radeon_common_context.c \ + radeon_common.c \ + radeon_dma.c \ + radeon_lock.c \ + radeon_bo_legacy.c \ + radeon_cs_legacy.c \ + radeon_mipmap_tree.c \ + radeon_span.c \ + radeon_fbo.c + DRIVER_SOURCES = \ radeon_context.c \ radeon_ioctl.c \ - radeon_lock.c \ radeon_screen.c \ radeon_state.c \ radeon_state_init.c \ radeon_tex.c \ - radeon_texmem.c \ radeon_texstate.c \ radeon_tcl.c \ radeon_swtcl.c \ - radeon_span.c \ radeon_maos.c \ - radeon_sanity.c + radeon_sanity.c \ + $(RADEON_COMMON_SOURCES) C_SOURCES = \ $(COMMON_SOURCES) \ @@ -30,6 +42,8 @@ C_SOURCES = \ DRIVER_DEFINES = -DRADEON_COMMON=0 +DRI_LIB_DEPS += $(RADEON_LDFLAGS) + X86_SOURCES = include ../Makefile.template diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_drm.h b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h new file mode 100644 index 0000000000..1ed13f1795 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_bo_drm.h @@ -0,0 +1,182 @@ +/* + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Jérôme Glisse <glisse@freedesktop.org> + */ +#ifndef RADEON_BO_H +#define RADEON_BO_H + +#include <stdio.h> +#include <stdint.h> +//#include "radeon_track.h" + +/* bo object */ +#define RADEON_BO_FLAGS_MACRO_TILE 1 +#define RADEON_BO_FLAGS_MICRO_TILE 2 + +struct radeon_bo_manager; + +struct radeon_bo { + uint32_t alignment; + uint32_t handle; + uint32_t size; + uint32_t domains; + uint32_t flags; + unsigned cref; +#ifdef RADEON_BO_TRACK + struct radeon_track *track; +#endif + void *ptr; + struct radeon_bo_manager *bom; + uint32_t space_accounted; +}; + +/* bo functions */ +struct radeon_bo_funcs { + struct radeon_bo *(*bo_open)(struct radeon_bo_manager *bom, + uint32_t handle, + uint32_t size, + uint32_t alignment, + uint32_t domains, + uint32_t flags); + void (*bo_ref)(struct radeon_bo *bo); + struct radeon_bo *(*bo_unref)(struct radeon_bo *bo); + int (*bo_map)(struct radeon_bo *bo, int write); + int (*bo_unmap)(struct radeon_bo *bo); + int (*bo_wait)(struct radeon_bo *bo); +}; + +struct radeon_bo_manager { + struct radeon_bo_funcs *funcs; + int fd; + +#ifdef RADEON_BO_TRACK + struct radeon_tracker tracker; +#endif +}; + +static inline void _radeon_bo_debug(struct radeon_bo *bo, + const char *op, + const char *file, + const char *func, + int line) +{ + fprintf(stderr, "%s %p 0x%08X 0x%08X 0x%08X [%s %s %d]\n", + op, bo, bo->handle, bo->size, bo->cref, file, func, line); +} + +static inline struct radeon_bo *_radeon_bo_open(struct radeon_bo_manager *bom, + uint32_t handle, + uint32_t size, + uint32_t alignment, + uint32_t domains, + uint32_t flags, + const char *file, + const char *func, + int line) +{ + struct radeon_bo *bo; + + bo = bom->funcs->bo_open(bom, handle, size, alignment, domains, flags); +#ifdef RADEON_BO_TRACK + if (bo) { + bo->track = radeon_tracker_add_track(&bom->tracker, bo->handle); + radeon_track_add_event(bo->track, file, func, "open", line); + } +#endif + return bo; +} + +static inline void _radeon_bo_ref(struct radeon_bo *bo, + const char *file, + const char *func, + int line) +{ + bo->cref++; +#ifdef RADEON_BO_TRACK + radeon_track_add_event(bo->track, file, func, "ref", line); +#endif + bo->bom->funcs->bo_ref(bo); +} + +static inline struct radeon_bo *_radeon_bo_unref(struct radeon_bo *bo, + const char *file, + const char *func, + int line) +{ + bo->cref--; +#ifdef RADEON_BO_TRACK + radeon_track_add_event(bo->track, file, func, "unref", line); + if (bo->cref <= 0) { + radeon_tracker_remove_track(&bo->bom->tracker, bo->track); + bo->track = NULL; + } +#endif + return bo->bom->funcs->bo_unref(bo); +} + +static inline int _radeon_bo_map(struct radeon_bo *bo, + int write, + const char *file, + const char *func, + int line) +{ + return bo->bom->funcs->bo_map(bo, write); +} + +static inline int _radeon_bo_unmap(struct radeon_bo *bo, + const char *file, + const char *func, + int line) +{ + return bo->bom->funcs->bo_unmap(bo); +} + +static inline int _radeon_bo_wait(struct radeon_bo *bo, + const char *file, + const char *func, + int line) +{ + return bo->bom->funcs->bo_wait(bo); +} + +#define radeon_bo_open(bom, h, s, a, d, f)\ + _radeon_bo_open(bom, h, s, a, d, f, __FILE__, __FUNCTION__, __LINE__) +#define radeon_bo_ref(bo)\ + _radeon_bo_ref(bo, __FILE__, __FUNCTION__, __LINE__) +#define radeon_bo_unref(bo)\ + _radeon_bo_unref(bo, __FILE__, __FUNCTION__, __LINE__) +#define radeon_bo_map(bo, w)\ + _radeon_bo_map(bo, w, __FILE__, __FUNCTION__, __LINE__) +#define radeon_bo_unmap(bo)\ + _radeon_bo_unmap(bo, __FILE__, __FUNCTION__, __LINE__) +#define radeon_bo_debug(bo, opcode)\ + _radeon_bo_debug(bo, opcode, __FILE__, __FUNCTION__, __LINE__) +#define radeon_bo_wait(bo) \ + _radeon_bo_wait(bo, __FILE__, __func__, __LINE__) + +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c new file mode 100644 index 0000000000..6a8da402b1 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.c @@ -0,0 +1,830 @@ +/* + * Copyright © 2008 Nicolai Haehnle + * Copyright © 2008 Dave Airlie + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Aapo Tahkola <aet@rasterburn.org> + * Nicolai Haehnle <prefect_@gmx.net> + * Dave Airlie + * Jérôme Glisse <glisse@freedesktop.org> + */ +#include <stdio.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include "xf86drm.h" +#include "texmem.h" +#include "main/simple_list.h" + +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_common.h" +#include "radeon_bocs_wrapper.h" +#include "radeon_macros.h" + +/* no seriously texmem.c is this screwed up */ +struct bo_legacy_texture_object { + driTextureObject base; + struct bo_legacy *parent; +}; + +struct bo_legacy { + struct radeon_bo base; + int map_count; + uint32_t pending; + int is_pending; + int static_bo; + uint32_t offset; + struct bo_legacy_texture_object *tobj; + int validated; + int dirty; + void *ptr; + struct bo_legacy *next, *prev; + struct bo_legacy *pnext, *pprev; +}; + +struct bo_manager_legacy { + struct radeon_bo_manager base; + unsigned nhandle; + unsigned nfree_handles; + unsigned cfree_handles; + uint32_t current_age; + struct bo_legacy bos; + struct bo_legacy pending_bos; + uint32_t fb_location; + uint32_t texture_offset; + unsigned dma_alloc_size; + uint32_t dma_buf_count; + unsigned cpendings; + driTextureObject texture_swapped; + driTexHeap *texture_heap; + struct radeon_screen *screen; + unsigned *free_handles; +}; + +static void bo_legacy_tobj_destroy(void *data, driTextureObject *t) +{ + struct bo_legacy_texture_object *tobj = (struct bo_legacy_texture_object *)t; + + if (tobj->parent) { + tobj->parent->tobj = NULL; + tobj->parent->validated = 0; + } +} + +static void inline clean_handles(struct bo_manager_legacy *bom) +{ + while (bom->cfree_handles > 0 && + !bom->free_handles[bom->cfree_handles - 1]) + bom->cfree_handles--; + +} +static int legacy_new_handle(struct bo_manager_legacy *bom, uint32_t *handle) +{ + uint32_t tmp; + + *handle = 0; + if (bom->nhandle == 0xFFFFFFFF) { + return -EINVAL; + } + if (bom->cfree_handles > 0) { + tmp = bom->free_handles[--bom->cfree_handles]; + clean_handles(bom); + } else { + bom->cfree_handles = 0; + tmp = bom->nhandle++; + } + assert(tmp); + *handle = tmp; + return 0; +} + +static int legacy_free_handle(struct bo_manager_legacy *bom, uint32_t handle) +{ + uint32_t *handles; + + if (!handle) { + return 0; + } + if (handle == (bom->nhandle - 1)) { + int i; + + bom->nhandle--; + for (i = bom->cfree_handles - 1; i >= 0; i--) { + if (bom->free_handles[i] == (bom->nhandle - 1)) { + bom->nhandle--; + bom->free_handles[i] = 0; + } + } + clean_handles(bom); + return 0; + } + if (bom->cfree_handles < bom->nfree_handles) { + bom->free_handles[bom->cfree_handles++] = handle; + return 0; + } + bom->nfree_handles += 0x100; + handles = (uint32_t*)realloc(bom->free_handles, bom->nfree_handles * 4); + if (handles == NULL) { + bom->nfree_handles -= 0x100; + return -ENOMEM; + } + bom->free_handles = handles; + bom->free_handles[bom->cfree_handles++] = handle; + return 0; +} + +static void legacy_get_current_age(struct bo_manager_legacy *boml) +{ + drm_radeon_getparam_t gp; + unsigned char *RADEONMMIO = NULL; + int r; + + if (IS_R300_CLASS(boml->screen)) { + gp.param = RADEON_PARAM_LAST_CLEAR; + gp.value = (int *)&boml->current_age; + r = drmCommandWriteRead(boml->base.fd, DRM_RADEON_GETPARAM, + &gp, sizeof(gp)); + if (r) { + fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, r); + exit(1); + } + } else { + RADEONMMIO = boml->screen->mmio.map; + boml->current_age = boml->screen->scratch[3]; + boml->current_age = INREG(RADEON_GUI_SCRATCH_REG3); + } +} + +static int legacy_is_pending(struct radeon_bo *bo) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; + struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + + if (bo_legacy->is_pending <= 0) { + bo_legacy->is_pending = 0; + return 0; + } + if (boml->current_age >= bo_legacy->pending) { + if (boml->pending_bos.pprev == bo_legacy) { + boml->pending_bos.pprev = bo_legacy->pprev; + } + bo_legacy->pprev->pnext = bo_legacy->pnext; + if (bo_legacy->pnext) { + bo_legacy->pnext->pprev = bo_legacy->pprev; + } + assert(bo_legacy->is_pending <= bo->cref); + while (bo_legacy->is_pending--) { + bo = radeon_bo_unref(bo); + if (!bo) + break; + } + if (bo) + bo_legacy->is_pending = 0; + boml->cpendings--; + return 0; + } + return 1; +} + +static int legacy_wait_pending(struct radeon_bo *bo) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; + struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + + if (!bo_legacy->is_pending) { + return 0; + } + /* FIXME: lockup and userspace busy looping that's all the folks */ + legacy_get_current_age(boml); + while (legacy_is_pending(bo)) { + usleep(10); + legacy_get_current_age(boml); + } + return 0; +} + +static void legacy_track_pending(struct bo_manager_legacy *boml, int debug) +{ + struct bo_legacy *bo_legacy; + struct bo_legacy *next; + + legacy_get_current_age(boml); + bo_legacy = boml->pending_bos.pnext; + while (bo_legacy) { + if (debug) + fprintf(stderr,"pending %p %d %d %d\n", bo_legacy, bo_legacy->base.size, + boml->current_age, bo_legacy->pending); + next = bo_legacy->pnext; + if (legacy_is_pending(&(bo_legacy->base))) { + } + bo_legacy = next; + } +} + +static int legacy_wait_any_pending(struct bo_manager_legacy *boml) +{ + struct bo_legacy *bo_legacy; + + legacy_get_current_age(boml); + bo_legacy = boml->pending_bos.pnext; + if (!bo_legacy) + return -1; + legacy_wait_pending(&bo_legacy->base); + return 0; +} + +static void legacy_kick_all_buffers(struct bo_manager_legacy *boml) +{ + struct bo_legacy *legacy; + + legacy = boml->bos.next; + while (legacy != &boml->bos) { + if (legacy->tobj) { + if (legacy->validated) { + driDestroyTextureObject(&legacy->tobj->base); + legacy->tobj = 0; + legacy->validated = 0; + } + } + legacy = legacy->next; + } +} + +static struct bo_legacy *bo_allocate(struct bo_manager_legacy *boml, + uint32_t size, + uint32_t alignment, + uint32_t domains, + uint32_t flags) +{ + struct bo_legacy *bo_legacy; + static int pgsize; + + if (pgsize == 0) + pgsize = getpagesize() - 1; + + size = (size + pgsize) & ~pgsize; + + bo_legacy = (struct bo_legacy*)calloc(1, sizeof(struct bo_legacy)); + if (bo_legacy == NULL) { + return NULL; + } + bo_legacy->base.bom = (struct radeon_bo_manager*)boml; + bo_legacy->base.handle = 0; + bo_legacy->base.size = size; + bo_legacy->base.alignment = alignment; + bo_legacy->base.domains = domains; + bo_legacy->base.flags = flags; + bo_legacy->base.ptr = NULL; + bo_legacy->map_count = 0; + bo_legacy->next = NULL; + bo_legacy->prev = NULL; + bo_legacy->pnext = NULL; + bo_legacy->pprev = NULL; + bo_legacy->next = boml->bos.next; + bo_legacy->prev = &boml->bos; + boml->bos.next = bo_legacy; + if (bo_legacy->next) { + bo_legacy->next->prev = bo_legacy; + } + return bo_legacy; +} + +static int bo_dma_alloc(struct radeon_bo *bo) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; + struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + drm_radeon_mem_alloc_t alloc; + unsigned size; + int base_offset; + int r; + + /* align size on 4Kb */ + size = (((4 * 1024) - 1) + bo->size) & ~((4 * 1024) - 1); + alloc.region = RADEON_MEM_REGION_GART; + alloc.alignment = bo_legacy->base.alignment; + alloc.size = size; + alloc.region_offset = &base_offset; + r = drmCommandWriteRead(bo->bom->fd, + DRM_RADEON_ALLOC, + &alloc, + sizeof(alloc)); + if (r) { + /* ptr is set to NULL if dma allocation failed */ + bo_legacy->ptr = NULL; + return r; + } + bo_legacy->ptr = boml->screen->gartTextures.map + base_offset; + bo_legacy->offset = boml->screen->gart_texture_offset + base_offset; + bo->size = size; + boml->dma_alloc_size += size; + boml->dma_buf_count++; + return 0; +} + +static int bo_dma_free(struct radeon_bo *bo) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; + struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + drm_radeon_mem_free_t memfree; + int r; + + if (bo_legacy->ptr == NULL) { + /* ptr is set to NULL if dma allocation failed */ + return 0; + } + legacy_get_current_age(boml); + memfree.region = RADEON_MEM_REGION_GART; + memfree.region_offset = bo_legacy->offset; + memfree.region_offset -= boml->screen->gart_texture_offset; + r = drmCommandWrite(boml->base.fd, + DRM_RADEON_FREE, + &memfree, + sizeof(memfree)); + if (r) { + fprintf(stderr, "Failed to free bo[%p] at %08x\n", + &bo_legacy->base, memfree.region_offset); + fprintf(stderr, "ret = %s\n", strerror(-r)); + return r; + } + boml->dma_alloc_size -= bo_legacy->base.size; + boml->dma_buf_count--; + return 0; +} + +static void bo_free(struct bo_legacy *bo_legacy) +{ + struct bo_manager_legacy *boml; + + if (bo_legacy == NULL) { + return; + } + boml = (struct bo_manager_legacy *)bo_legacy->base.bom; + bo_legacy->prev->next = bo_legacy->next; + if (bo_legacy->next) { + bo_legacy->next->prev = bo_legacy->prev; + } + if (!bo_legacy->static_bo) { + legacy_free_handle(boml, bo_legacy->base.handle); + if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) { + /* dma buffers */ + bo_dma_free(&bo_legacy->base); + } else { + driDestroyTextureObject(&bo_legacy->tobj->base); + bo_legacy->tobj = NULL; + /* free backing store */ + free(bo_legacy->ptr); + } + } + memset(bo_legacy, 0 , sizeof(struct bo_legacy)); + free(bo_legacy); +} + +static struct radeon_bo *bo_open(struct radeon_bo_manager *bom, + uint32_t handle, + uint32_t size, + uint32_t alignment, + uint32_t domains, + uint32_t flags) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom; + struct bo_legacy *bo_legacy; + int r; + + if (handle) { + bo_legacy = boml->bos.next; + while (bo_legacy) { + if (bo_legacy->base.handle == handle) { + radeon_bo_ref(&(bo_legacy->base)); + return (struct radeon_bo*)bo_legacy; + } + bo_legacy = bo_legacy->next; + } + return NULL; + } + + bo_legacy = bo_allocate(boml, size, alignment, domains, flags); + bo_legacy->static_bo = 0; + r = legacy_new_handle(boml, &bo_legacy->base.handle); + if (r) { + bo_free(bo_legacy); + return NULL; + } + if (bo_legacy->base.domains & RADEON_GEM_DOMAIN_GTT) { + retry: + legacy_track_pending(boml, 0); + /* dma buffers */ + + r = bo_dma_alloc(&(bo_legacy->base)); + if (r) { + if (legacy_wait_any_pending(boml) == -1) { + bo_free(bo_legacy); + return NULL; + } + goto retry; + return NULL; + } + } else { + bo_legacy->ptr = malloc(bo_legacy->base.size); + if (bo_legacy->ptr == NULL) { + bo_free(bo_legacy); + return NULL; + } + } + radeon_bo_ref(&(bo_legacy->base)); + return (struct radeon_bo*)bo_legacy; +} + +static void bo_ref(struct radeon_bo *bo) +{ +} + +static struct radeon_bo *bo_unref(struct radeon_bo *bo) +{ + struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + + if (bo->cref <= 0) { + bo_legacy->prev->next = bo_legacy->next; + if (bo_legacy->next) { + bo_legacy->next->prev = bo_legacy->prev; + } + if (!bo_legacy->is_pending) { + bo_free(bo_legacy); + } + return NULL; + } + return bo; +} + +static int bo_map(struct radeon_bo *bo, int write) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; + struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + + legacy_wait_pending(bo); + bo_legacy->validated = 0; + bo_legacy->dirty = 1; + bo_legacy->map_count++; + bo->ptr = bo_legacy->ptr; + /* Read the first pixel in the frame buffer. This should + * be a noop, right? In fact without this conform fails as reading + * from the framebuffer sometimes produces old results -- the + * on-card read cache gets mixed up and doesn't notice that the + * framebuffer has been updated. + * + * Note that we should probably be reading some otherwise unused + * region of VRAM, otherwise we might get incorrect results when + * reading pixels from the top left of the screen. + * + * I found this problem on an R420 with glean's texCube test. + * Note that the R200 span code also *writes* the first pixel in the + * framebuffer, but I've found this to be unnecessary. + * -- Nicolai Hähnle, June 2008 + */ + if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) { + int p; + volatile int *buf = (int*)boml->screen->driScreen->pFB; + p = *buf; + } + return 0; +} + +static int bo_unmap(struct radeon_bo *bo) +{ + struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + + if (--bo_legacy->map_count > 0) { + return 0; + } + bo->ptr = NULL; + return 0; +} + +static struct radeon_bo_funcs bo_legacy_funcs = { + bo_open, + bo_ref, + bo_unref, + bo_map, + bo_unmap +}; + +static int bo_vram_validate(struct radeon_bo *bo, + uint32_t *soffset, + uint32_t *eoffset) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; + struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + int r; + int retry_count = 0, pending_retry = 0; + + if (!bo_legacy->tobj) { + bo_legacy->tobj = CALLOC(sizeof(struct bo_legacy_texture_object)); + bo_legacy->tobj->parent = bo_legacy; + make_empty_list(&bo_legacy->tobj->base); + bo_legacy->tobj->base.totalSize = bo->size; + retry: + r = driAllocateTexture(&boml->texture_heap, 1, + &bo_legacy->tobj->base); + if (r) { + pending_retry = 0; + while(boml->cpendings && pending_retry++ < 10000) { + legacy_track_pending(boml, 0); + retry_count++; + if (retry_count > 2) { + free(bo_legacy->tobj); + bo_legacy->tobj = NULL; + fprintf(stderr, "Ouch! vram_validate failed %d\n", r); + return -1; + } + goto retry; + } + } + bo_legacy->offset = boml->texture_offset + + bo_legacy->tobj->base.memBlock->ofs; + bo_legacy->dirty = 1; + } + + assert(bo_legacy->tobj->base.memBlock); + + if (bo_legacy->tobj) + driUpdateTextureLRU(&bo_legacy->tobj->base); + + if (bo_legacy->dirty || bo_legacy->tobj->base.dirty_images[0]) { + /* Copy to VRAM using a blit. + * All memory is 4K aligned. We're using 1024 pixels wide blits. + */ + drm_radeon_texture_t tex; + drm_radeon_tex_image_t tmp; + int ret; + + tex.offset = bo_legacy->offset; + tex.image = &tmp; + assert(!(tex.offset & 1023)); + + tmp.x = 0; + tmp.y = 0; + if (bo->size < 4096) { + tmp.width = (bo->size + 3) / 4; + tmp.height = 1; + } else { + tmp.width = 1024; + tmp.height = (bo->size + 4095) / 4096; + } + tmp.data = bo_legacy->ptr; + tex.format = RADEON_TXFORMAT_ARGB8888; + tex.width = tmp.width; + tex.height = tmp.height; + tex.pitch = MAX2(tmp.width / 16, 1); + do { + ret = drmCommandWriteRead(bo->bom->fd, + DRM_RADEON_TEXTURE, + &tex, + sizeof(drm_radeon_texture_t)); + if (ret) { + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "DRM_RADEON_TEXTURE: again!\n"); + usleep(1); + } + } while (ret == -EAGAIN); + bo_legacy->dirty = 0; + bo_legacy->tobj->base.dirty_images[0] = 0; + } + return 0; +} + +/* + * radeon_bo_legacy_validate - + * returns: + * 0 - all good + * -EINVAL - mapped buffer can't be validated + * -EAGAIN - restart validation we've kicked all the buffers out + */ +int radeon_bo_legacy_validate(struct radeon_bo *bo, + uint32_t *soffset, + uint32_t *eoffset) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; + struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + int r; + int retries = 0; + + if (bo_legacy->map_count) { + fprintf(stderr, "bo(%p, %d) is mapped (%d) can't valide it.\n", + bo, bo->size, bo_legacy->map_count); + return -EINVAL; + } + if (bo_legacy->static_bo || bo_legacy->validated) { + *soffset = bo_legacy->offset; + *eoffset = bo_legacy->offset + bo->size; + return 0; + } + if (!(bo->domains & RADEON_GEM_DOMAIN_GTT)) { + + r = bo_vram_validate(bo, soffset, eoffset); + if (r) { + legacy_track_pending(boml, 0); + legacy_kick_all_buffers(boml); + retries++; + if (retries == 2) { + fprintf(stderr,"legacy bo: failed to get relocations into aperture\n"); + assert(0); + exit(-1); + } + return -EAGAIN; + } + } + *soffset = bo_legacy->offset; + *eoffset = bo_legacy->offset + bo->size; + bo_legacy->validated = 1; + return 0; +} + +void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bo->bom; + struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + + bo_legacy->pending = pending; + bo_legacy->is_pending++; + /* add to pending list */ + radeon_bo_ref(bo); + if (bo_legacy->is_pending > 1) { + return; + } + bo_legacy->pprev = boml->pending_bos.pprev; + bo_legacy->pnext = NULL; + bo_legacy->pprev->pnext = bo_legacy; + boml->pending_bos.pprev = bo_legacy; + boml->cpendings++; +} + +void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom; + struct bo_legacy *bo_legacy; + + if (bom == NULL) { + return; + } + bo_legacy = boml->bos.next; + while (bo_legacy) { + struct bo_legacy *next; + + next = bo_legacy->next; + bo_free(bo_legacy); + bo_legacy = next; + } + driDestroyTextureHeap(boml->texture_heap); + free(boml->free_handles); + free(boml); +} + +static struct bo_legacy *radeon_legacy_bo_alloc_static(struct bo_manager_legacy *bom, + int size, uint32_t offset) +{ + struct bo_legacy *bo; + + bo = bo_allocate(bom, size, 0, RADEON_GEM_DOMAIN_VRAM, 0); + if (bo == NULL) + return NULL; + bo->static_bo = 1; + bo->offset = offset + bom->fb_location; + bo->base.handle = bo->offset; + bo->ptr = bom->screen->driScreen->pFB + offset; + if (bo->base.handle > bom->nhandle) { + bom->nhandle = bo->base.handle + 1; + } + radeon_bo_ref(&(bo->base)); + return bo; +} + +struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn) +{ + struct bo_manager_legacy *bom; + struct bo_legacy *bo; + unsigned size; + + bom = (struct bo_manager_legacy*) + calloc(1, sizeof(struct bo_manager_legacy)); + if (bom == NULL) { + return NULL; + } + + make_empty_list(&bom->texture_swapped); + + bom->texture_heap = driCreateTextureHeap(0, + bom, + scrn->texSize[0], + 12, + RADEON_NR_TEX_REGIONS, + (drmTextureRegionPtr)scrn->sarea->tex_list[0], + &scrn->sarea->tex_age[0], + &bom->texture_swapped, + sizeof(struct bo_legacy_texture_object), + &bo_legacy_tobj_destroy); + bom->texture_offset = scrn->texOffset[0]; + + bom->base.funcs = &bo_legacy_funcs; + bom->base.fd = scrn->driScreen->fd; + bom->bos.next = NULL; + bom->bos.prev = NULL; + bom->pending_bos.pprev = &bom->pending_bos; + bom->pending_bos.pnext = NULL; + bom->screen = scrn; + bom->fb_location = scrn->fbLocation; + bom->nhandle = 1; + bom->cfree_handles = 0; + bom->nfree_handles = 0x400; + bom->free_handles = (uint32_t*)malloc(bom->nfree_handles * 4); + if (bom->free_handles == NULL) { + radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom); + return NULL; + } + + /* biggest framebuffer size */ + size = 4096*4096*4; + + /* allocate front */ + bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->frontOffset); + if (!bo) { + radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom); + return NULL; + } + if (scrn->sarea->tiling_enabled) { + bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE; + } + + /* allocate back */ + bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->backOffset); + if (!bo) { + radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom); + return NULL; + } + if (scrn->sarea->tiling_enabled) { + bo->base.flags = RADEON_BO_FLAGS_MACRO_TILE; + } + + /* allocate depth */ + bo = radeon_legacy_bo_alloc_static(bom, size, bom->screen->depthOffset); + if (!bo) { + radeon_bo_manager_legacy_dtor((struct radeon_bo_manager*)bom); + return NULL; + } + bo->base.flags = 0; + if (scrn->sarea->tiling_enabled) { + bo->base.flags |= RADEON_BO_FLAGS_MACRO_TILE; + bo->base.flags |= RADEON_BO_FLAGS_MICRO_TILE; + } + return (struct radeon_bo_manager*)bom; +} + +void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom) +{ + struct bo_manager_legacy *boml = (struct bo_manager_legacy *)bom; + DRI_AGE_TEXTURES(boml->texture_heap); +} + +unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo) +{ + struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + + if (bo_legacy->static_bo || (bo->domains & RADEON_GEM_DOMAIN_GTT)) { + return 0; + } + return bo->size; +} + +int radeon_legacy_bo_is_static(struct radeon_bo *bo) +{ + struct bo_legacy *bo_legacy = (struct bo_legacy*)bo; + return bo_legacy->static_bo; +} + diff --git a/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h new file mode 100644 index 0000000000..9187cd7201 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_bo_legacy.h @@ -0,0 +1,47 @@ +/* + * Copyright © 2008 Nicolai Haehnle + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Aapo Tahkola <aet@rasterburn.org> + * Nicolai Haehnle <prefect_@gmx.net> + * Jérôme Glisse <glisse@freedesktop.org> + */ +#ifndef RADEON_BO_LEGACY_H +#define RADEON_BO_LEGACY_H + +#include "radeon_screen.h" + +void radeon_bo_legacy_pending(struct radeon_bo *bo, uint32_t pending); +int radeon_bo_legacy_validate(struct radeon_bo *bo, + uint32_t *soffset, + uint32_t *eoffset); +struct radeon_bo_manager *radeon_bo_manager_legacy_ctor(struct radeon_screen *scrn); +void radeon_bo_manager_legacy_dtor(struct radeon_bo_manager *bom); +void radeon_bo_legacy_texture_age(struct radeon_bo_manager *bom); +unsigned radeon_bo_legacy_relocs_size(struct radeon_bo *bo); + +int radeon_legacy_bo_is_static(struct radeon_bo *bo); +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h new file mode 100644 index 0000000000..e0c70dd9a1 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h @@ -0,0 +1,85 @@ +#ifndef RADEON_CS_WRAPPER_H +#define RADEON_CS_WRAPPER_H + +#ifdef HAVE_LIBDRM_RADEON + +#include "radeon_bo.h" +#include "radeon_bo_gem.h" +#include "radeon_cs.h" +#include "radeon_cs_gem.h" + +#else +#include <stdint.h> + +#define RADEON_GEM_DOMAIN_CPU 0x1 // Cached CPU domain +#define RADEON_GEM_DOMAIN_GTT 0x2 // GTT or cache flushed +#define RADEON_GEM_DOMAIN_VRAM 0x4 // VRAM domain + +/* to be used to build locally in mesa with no libdrm bits */ +#include "../radeon/radeon_bo_drm.h" +#include "../radeon/radeon_cs_drm.h" + +#ifndef DRM_RADEON_GEM_INFO +#define DRM_RADEON_GEM_INFO 0x1c + +struct drm_radeon_gem_info { + uint64_t gart_size; + uint64_t vram_size; + uint64_t vram_visible; +}; + +struct drm_radeon_info { + uint32_t request; + uint32_t pad; + uint32_t value; +}; +#endif + +#ifndef RADEON_PARAM_DEVICE_ID +#define RADEON_PARAM_DEVICE_ID 16 +#endif + +#ifndef RADEON_INFO_DEVICE_ID +#define RADEON_INFO_DEVICE_ID 0 +#endif +#ifndef RADEON_INFO_NUM_GB_PIPES +#define RADEON_INFO_NUM_GB_PIPES 0 +#endif + +#ifndef DRM_RADEON_INFO +#define DRM_RADEON_INFO 0x1 +#endif + + +static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy) +{ + return 0; +} + +static inline void *radeon_bo_manager_gem_ctor(int fd) +{ + return NULL; +} + +static inline void radeon_bo_manager_gem_dtor(void *dummy) +{ +} + +static inline void *radeon_cs_manager_gem_ctor(int fd) +{ + return NULL; +} + +static inline void radeon_cs_manager_gem_dtor(void *dummy) +{ +} + +static inline void radeon_tracker_print(void *ptr, int io) +{ +} +#endif + +#include "radeon_bo_legacy.h" +#include "radeon_cs_legacy.h" + +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h new file mode 100644 index 0000000000..4b5116c474 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h @@ -0,0 +1,143 @@ +#ifndef COMMON_CMDBUF_H +#define COMMON_CMDBUF_H + +#include "radeon_bocs_wrapper.h" + +void rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller); +int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller); +int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller); +void rcommonInitCmdBuf(radeonContextPtr rmesa); +void rcommonDestroyCmdBuf(radeonContextPtr rmesa); + +void rcommonBeginBatch(radeonContextPtr rmesa, + int n, + int dostate, + const char *file, + const char *function, + int line); + +#define RADEON_CP_PACKET3_NOP 0xC0001000 +#define RADEON_CP_PACKET3_NEXT_CHAR 0xC0001900 +#define RADEON_CP_PACKET3_PLY_NEXTSCAN 0xC0001D00 +#define RADEON_CP_PACKET3_SET_SCISSORS 0xC0001E00 +#define RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM 0xC0002300 +#define RADEON_CP_PACKET3_LOAD_MICROCODE 0xC0002400 +#define RADEON_CP_PACKET3_WAIT_FOR_IDLE 0xC0002600 +#define RADEON_CP_PACKET3_3D_DRAW_VBUF 0xC0002800 +#define RADEON_CP_PACKET3_3D_DRAW_IMMD 0xC0002900 +#define RADEON_CP_PACKET3_3D_DRAW_INDX 0xC0002A00 +#define RADEON_CP_PACKET3_LOAD_PALETTE 0xC0002C00 +#define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00 +#define RADEON_CP_PACKET3_CNTL_PAINT 0xC0009100 +#define RADEON_CP_PACKET3_CNTL_BITBLT 0xC0009200 +#define RADEON_CP_PACKET3_CNTL_SMALLTEXT 0xC0009300 +#define RADEON_CP_PACKET3_CNTL_HOSTDATA_BLT 0xC0009400 +#define RADEON_CP_PACKET3_CNTL_POLYLINE 0xC0009500 +#define RADEON_CP_PACKET3_CNTL_POLYSCANLINES 0xC0009800 +#define RADEON_CP_PACKET3_CNTL_PAINT_MULTI 0xC0009A00 +#define RADEON_CP_PACKET3_CNTL_BITBLT_MULTI 0xC0009B00 +#define RADEON_CP_PACKET3_CNTL_TRANS_BITBLT 0xC0009C00 + +#define CP_PACKET2 (2 << 30) +#define CP_PACKET0(reg, n) (RADEON_CP_PACKET0 | ((n)<<16) | ((reg)>>2)) +#define CP_PACKET0_ONE(reg, n) (RADEON_CP_PACKET0 | RADEON_CP_PACKET0_ONE_REG_WR | ((n)<<16) | ((reg)>>2)) +#define CP_PACKET3( pkt, n ) \ + (RADEON_CP_PACKET3 | (pkt) | ((n) << 16)) + +/** + * Every function writing to the command buffer needs to declare this + * to get the necessary local variables. + */ +#define BATCH_LOCALS(rmesa) \ + const radeonContextPtr b_l_rmesa = rmesa + +/** + * Prepare writing n dwords to the command buffer, + * including producing any necessary state emits on buffer wraparound. + */ +#define BEGIN_BATCH(n) rcommonBeginBatch(b_l_rmesa, n, 1, __FILE__, __FUNCTION__, __LINE__) + +/** + * Same as BEGIN_BATCH, but do not cause automatic state emits. + */ +#define BEGIN_BATCH_NO_AUTOSTATE(n) rcommonBeginBatch(b_l_rmesa, n, 0, __FILE__, __FUNCTION__, __LINE__) + +/** + * Write one dword to the command buffer. + */ +#define OUT_BATCH(data) \ + do { \ + radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, data);\ + } while(0) + +/** + * Write a relocated dword to the command buffer. + */ +#define OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags) \ + do { \ + if (0 && offset) { \ + fprintf(stderr, "(%s:%s:%d) offset : %d\n", \ + __FILE__, __FUNCTION__, __LINE__, offset); \ + } \ + radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, offset); \ + radeon_cs_write_reloc(b_l_rmesa->cmdbuf.cs, \ + bo, rd, wd, flags); \ + if (!b_l_rmesa->radeonScreen->kernel_mm) \ + b_l_rmesa->cmdbuf.cs->section_cdw += 2; \ + } while(0) + + +/** + * Write n dwords from ptr to the command buffer. + */ +#define OUT_BATCH_TABLE(ptr,n) \ + do { \ + int _i; \ + for (_i=0; _i < n; _i++) {\ + radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, ptr[_i]);\ + }\ + } while(0) + +/** + * Finish writing dwords to the command buffer. + * The number of (direct or indirect) OUT_BATCH calls between the previous + * BEGIN_BATCH and END_BATCH must match the number specified at BEGIN_BATCH time. + */ +#define END_BATCH() \ + do { \ + radeon_cs_end(b_l_rmesa->cmdbuf.cs, __FILE__, __FUNCTION__, __LINE__);\ + } while(0) + +/** + * After the last END_BATCH() of rendering, this indicates that flushing + * the command buffer now is okay. + */ +#define COMMIT_BATCH() \ + do { \ + } while(0) + + +/** Single register write to command buffer; requires 2 dwords. */ +#define OUT_BATCH_REGVAL(reg, val) \ + OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), 1)); \ + OUT_BATCH((val)) + +/** Continuous register range write to command buffer; requires 1 dword, + * expects count dwords afterwards for register contents. */ +#define OUT_BATCH_REGSEQ(reg, count) \ + OUT_BATCH(cmdpacket0(b_l_rmesa->radeonScreen, (reg), (count))); + +/** Write a 32 bit float to the ring; requires 1 dword. */ +#define OUT_BATCH_FLOAT32(f) \ + OUT_BATCH(radeonPackFloat32((f))); + + +/* Fire the buffered vertices no matter what. + */ +static INLINE void radeon_firevertices(radeonContextPtr radeon) +{ + if (radeon->cmdbuf.cs->cdw || radeon->dma.flush ) + radeonFlush(radeon->glCtx); +} + +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c new file mode 100644 index 0000000000..7bd4a6f14f --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -0,0 +1,1503 @@ +/************************************************************************** + +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +/* + - Scissor implementation + - buffer swap/copy ioctls + - finish/flush + - state emission + - cmdbuffer management +*/ + +#include <errno.h> +#include "main/glheader.h" +#include "main/imports.h" +#include "main/context.h" +#include "main/api_arrayelt.h" +#include "main/enums.h" +#include "main/colormac.h" +#include "main/light.h" +#include "main/framebuffer.h" +#include "main/simple_list.h" +#include "main/renderbuffer.h" +#include "swrast/swrast.h" +#include "vbo/vbo.h" +#include "tnl/tnl.h" +#include "tnl/t_pipeline.h" +#include "swrast_setup/swrast_setup.h" + +#include "main/blend.h" +#include "main/bufferobj.h" +#include "main/buffers.h" +#include "main/depth.h" +#include "main/polygon.h" +#include "main/shaders.h" +#include "main/texstate.h" +#include "main/varray.h" +#include "glapi/dispatch.h" +#include "swrast/swrast.h" +#include "main/stencil.h" +#include "main/matrix.h" +#include "main/attrib.h" +#include "main/enable.h" +#include "main/viewport.h" + +#include "dri_util.h" +#include "vblank.h" + +#include "radeon_common.h" +#include "radeon_bocs_wrapper.h" +#include "radeon_lock.h" +#include "radeon_drm.h" +#include "radeon_mipmap_tree.h" + +#define DEBUG_CMDBUF 0 + +/* ============================================================= + * Scissoring + */ + +static GLboolean intersect_rect(drm_clip_rect_t * out, + drm_clip_rect_t * a, drm_clip_rect_t * b) +{ + *out = *a; + if (b->x1 > out->x1) + out->x1 = b->x1; + if (b->y1 > out->y1) + out->y1 = b->y1; + if (b->x2 < out->x2) + out->x2 = b->x2; + if (b->y2 < out->y2) + out->y2 = b->y2; + if (out->x1 >= out->x2) + return GL_FALSE; + if (out->y1 >= out->y2) + return GL_FALSE; + return GL_TRUE; +} + +void radeonRecalcScissorRects(radeonContextPtr radeon) +{ + drm_clip_rect_t *out; + int i; + + /* Grow cliprect store? + */ + if (radeon->state.scissor.numAllocedClipRects < radeon->numClipRects) { + while (radeon->state.scissor.numAllocedClipRects < + radeon->numClipRects) { + radeon->state.scissor.numAllocedClipRects += 1; /* zero case */ + radeon->state.scissor.numAllocedClipRects *= 2; + } + + if (radeon->state.scissor.pClipRects) + FREE(radeon->state.scissor.pClipRects); + + radeon->state.scissor.pClipRects = + MALLOC(radeon->state.scissor.numAllocedClipRects * + sizeof(drm_clip_rect_t)); + + if (radeon->state.scissor.pClipRects == NULL) { + radeon->state.scissor.numAllocedClipRects = 0; + return; + } + } + + out = radeon->state.scissor.pClipRects; + radeon->state.scissor.numClipRects = 0; + + for (i = 0; i < radeon->numClipRects; i++) { + if (intersect_rect(out, + &radeon->pClipRects[i], + &radeon->state.scissor.rect)) { + radeon->state.scissor.numClipRects++; + out++; + } + } +} + +void radeon_get_cliprects(radeonContextPtr radeon, + struct drm_clip_rect **cliprects, + unsigned int *num_cliprects, + int *x_off, int *y_off) +{ + __DRIdrawablePrivate *dPriv = radeon_get_drawable(radeon); + struct radeon_framebuffer *rfb = dPriv->driverPrivate; + + if (radeon->constant_cliprect) { + radeon->fboRect.x1 = 0; + radeon->fboRect.y1 = 0; + radeon->fboRect.x2 = radeon->glCtx->DrawBuffer->Width; + radeon->fboRect.y2 = radeon->glCtx->DrawBuffer->Height; + + *cliprects = &radeon->fboRect; + *num_cliprects = 1; + *x_off = 0; + *y_off = 0; + } else if (radeon->front_cliprects || + rfb->pf_active || dPriv->numBackClipRects == 0) { + *cliprects = dPriv->pClipRects; + *num_cliprects = dPriv->numClipRects; + *x_off = dPriv->x; + *y_off = dPriv->y; + } else { + *num_cliprects = dPriv->numBackClipRects; + *cliprects = dPriv->pBackClipRects; + *x_off = dPriv->backX; + *y_off = dPriv->backY; + } +} + +/** + * Update cliprects and scissors. + */ +void radeonSetCliprects(radeonContextPtr radeon) +{ + __DRIdrawablePrivate *const drawable = radeon_get_drawable(radeon); + __DRIdrawablePrivate *const readable = radeon_get_readable(radeon); + struct radeon_framebuffer *const draw_rfb = drawable->driverPrivate; + struct radeon_framebuffer *const read_rfb = readable->driverPrivate; + int x_off, y_off; + + radeon_get_cliprects(radeon, &radeon->pClipRects, + &radeon->numClipRects, &x_off, &y_off); + + if ((draw_rfb->base.Width != drawable->w) || + (draw_rfb->base.Height != drawable->h)) { + _mesa_resize_framebuffer(radeon->glCtx, &draw_rfb->base, + drawable->w, drawable->h); + draw_rfb->base.Initialized = GL_TRUE; + } + + if (drawable != readable) { + if ((read_rfb->base.Width != readable->w) || + (read_rfb->base.Height != readable->h)) { + _mesa_resize_framebuffer(radeon->glCtx, &read_rfb->base, + readable->w, readable->h); + read_rfb->base.Initialized = GL_TRUE; + } + } + + if (radeon->state.scissor.enabled) + radeonRecalcScissorRects(radeon); + +} + + + +void radeonUpdateScissor( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + + if ( radeon_get_drawable(rmesa) ) { + __DRIdrawablePrivate *dPriv = radeon_get_drawable(rmesa); + + int x = ctx->Scissor.X; + int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height; + int w = ctx->Scissor.X + ctx->Scissor.Width - 1; + int h = dPriv->h - ctx->Scissor.Y - 1; + + rmesa->state.scissor.rect.x1 = x + dPriv->x; + rmesa->state.scissor.rect.y1 = y + dPriv->y; + rmesa->state.scissor.rect.x2 = w + dPriv->x + 1; + rmesa->state.scissor.rect.y2 = h + dPriv->y + 1; + + radeonRecalcScissorRects( rmesa ); + } +} + +/* ============================================================= + * Scissoring + */ + +void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + if (ctx->Scissor.Enabled) { + /* We don't pipeline cliprect changes */ + radeon_firevertices(radeon); + radeonUpdateScissor(ctx); + } +} + + +/* ================================================================ + * SwapBuffers with client-side throttling + */ + +static uint32_t radeonGetLastFrame(radeonContextPtr radeon) +{ + drm_radeon_getparam_t gp; + int ret; + uint32_t frame = 0; + + gp.param = RADEON_PARAM_LAST_FRAME; + gp.value = (int *)&frame; + ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM, + &gp, sizeof(gp)); + if (ret) { + fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, + ret); + exit(1); + } + + return frame; +} + +uint32_t radeonGetAge(radeonContextPtr radeon) +{ + drm_radeon_getparam_t gp; + int ret; + uint32_t age; + + gp.param = RADEON_PARAM_LAST_CLEAR; + gp.value = (int *)&age; + ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_GETPARAM, + &gp, sizeof(gp)); + if (ret) { + fprintf(stderr, "%s: drmRadeonGetParam: %d\n", __FUNCTION__, + ret); + exit(1); + } + + return age; +} + +static void radeonEmitIrqLocked(radeonContextPtr radeon) +{ + drm_radeon_irq_emit_t ie; + int ret; + + ie.irq_seq = &radeon->iw.irq_seq; + ret = drmCommandWriteRead(radeon->dri.fd, DRM_RADEON_IRQ_EMIT, + &ie, sizeof(ie)); + if (ret) { + fprintf(stderr, "%s: drmRadeonIrqEmit: %d\n", __FUNCTION__, + ret); + exit(1); + } +} + +static void radeonWaitIrq(radeonContextPtr radeon) +{ + int ret; + + do { + ret = drmCommandWrite(radeon->dri.fd, DRM_RADEON_IRQ_WAIT, + &radeon->iw, sizeof(radeon->iw)); + } while (ret && (errno == EINTR || errno == EBUSY)); + + if (ret) { + fprintf(stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, + ret); + exit(1); + } +} + +static void radeonWaitForFrameCompletion(radeonContextPtr radeon) +{ + drm_radeon_sarea_t *sarea = radeon->sarea; + + if (radeon->do_irqs) { + if (radeonGetLastFrame(radeon) < sarea->last_frame) { + if (!radeon->irqsEmitted) { + while (radeonGetLastFrame(radeon) < + sarea->last_frame) ; + } else { + UNLOCK_HARDWARE(radeon); + radeonWaitIrq(radeon); + LOCK_HARDWARE(radeon); + } + radeon->irqsEmitted = 10; + } + + if (radeon->irqsEmitted) { + radeonEmitIrqLocked(radeon); + radeon->irqsEmitted--; + } + } else { + while (radeonGetLastFrame(radeon) < sarea->last_frame) { + UNLOCK_HARDWARE(radeon); + if (radeon->do_usleeps) + DO_USLEEP(1); + LOCK_HARDWARE(radeon); + } + } +} + +/* wait for idle */ +void radeonWaitForIdleLocked(radeonContextPtr radeon) +{ + int ret; + int i = 0; + + do { + ret = drmCommandNone(radeon->dri.fd, DRM_RADEON_CP_IDLE); + if (ret) + DO_USLEEP(1); + } while (ret && ++i < 100); + + if (ret < 0) { + UNLOCK_HARDWARE(radeon); + fprintf(stderr, "Error: R300 timed out... exiting\n"); + exit(-1); + } +} + +static void radeonWaitForIdle(radeonContextPtr radeon) +{ + if (!radeon->radeonScreen->driScreen->dri2.enabled) { + LOCK_HARDWARE(radeon); + radeonWaitForIdleLocked(radeon); + UNLOCK_HARDWARE(radeon); + } +} + +static void radeon_flip_renderbuffers(struct radeon_framebuffer *rfb) +{ + int current_page = rfb->pf_current_page; + int next_page = (current_page + 1) % rfb->pf_num_pages; + struct gl_renderbuffer *tmp_rb; + + /* Exchange renderbuffers if necessary but make sure their + * reference counts are preserved. + */ + if (rfb->color_rb[current_page] && + rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer != + &rfb->color_rb[current_page]->base) { + tmp_rb = NULL; + _mesa_reference_renderbuffer(&tmp_rb, + rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer); + tmp_rb = &rfb->color_rb[current_page]->base; + _mesa_reference_renderbuffer(&rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer, tmp_rb); + _mesa_reference_renderbuffer(&tmp_rb, NULL); + } + + if (rfb->color_rb[next_page] && + rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer != + &rfb->color_rb[next_page]->base) { + tmp_rb = NULL; + _mesa_reference_renderbuffer(&tmp_rb, + rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer); + tmp_rb = &rfb->color_rb[next_page]->base; + _mesa_reference_renderbuffer(&rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer, tmp_rb); + _mesa_reference_renderbuffer(&tmp_rb, NULL); + } +} + +/* Copy the back color buffer to the front color buffer. + */ +void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, + const drm_clip_rect_t *rect) +{ + radeonContextPtr rmesa; + struct radeon_framebuffer *rfb; + GLint nbox, i, ret; + + assert(dPriv); + assert(dPriv->driContextPriv); + assert(dPriv->driContextPriv->driverPrivate); + + rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + + LOCK_HARDWARE(rmesa); + + rfb = dPriv->driverPrivate; + + if ( RADEON_DEBUG & DEBUG_IOCTL ) { + fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx ); + } + + nbox = dPriv->numClipRects; /* must be in locked region */ + + for ( i = 0 ; i < nbox ; ) { + GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox ); + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t *b = rmesa->sarea->boxes; + GLint n = 0; + + for ( ; i < nr ; i++ ) { + + *b = box[i]; + + if (rect) + { + if (rect->x1 > b->x1) + b->x1 = rect->x1; + if (rect->y1 > b->y1) + b->y1 = rect->y1; + if (rect->x2 < b->x2) + b->x2 = rect->x2; + if (rect->y2 < b->y2) + b->y2 = rect->y2; + + if (b->x1 >= b->x2 || b->y1 >= b->y2) + continue; + } + + b++; + n++; + } + rmesa->sarea->nbox = n; + + if (!n) + continue; + + ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); + + if ( ret ) { + fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret ); + UNLOCK_HARDWARE( rmesa ); + exit( 1 ); + } + } + + UNLOCK_HARDWARE( rmesa ); +} + +static int radeonScheduleSwap(__DRIdrawablePrivate *dPriv, GLboolean *missed_target) +{ + radeonContextPtr rmesa; + + rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + radeon_firevertices(rmesa); + + LOCK_HARDWARE( rmesa ); + + if (!dPriv->numClipRects) { + UNLOCK_HARDWARE(rmesa); + usleep(10000); /* throttle invisible client 10ms */ + return 0; + } + + radeonWaitForFrameCompletion(rmesa); + + UNLOCK_HARDWARE(rmesa); + driWaitForVBlank(dPriv, missed_target); + + return 0; +} + +static GLboolean radeonPageFlip( __DRIdrawablePrivate *dPriv ) +{ + radeonContextPtr radeon; + GLint ret; + __DRIscreenPrivate *psp; + struct radeon_renderbuffer *rrb; + struct radeon_framebuffer *rfb; + + assert(dPriv); + assert(dPriv->driContextPriv); + assert(dPriv->driContextPriv->driverPrivate); + + radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + rfb = dPriv->driverPrivate; + rrb = (void *)rfb->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer; + + psp = dPriv->driScreenPriv; + + LOCK_HARDWARE(radeon); + + if ( RADEON_DEBUG & DEBUG_IOCTL ) { + fprintf(stderr, "%s: pfCurrentPage: %d %d\n", __FUNCTION__, + radeon->sarea->pfCurrentPage, radeon->sarea->pfState); + } + drm_clip_rect_t *box = dPriv->pClipRects; + drm_clip_rect_t *b = radeon->sarea->boxes; + b[0] = box[0]; + radeon->sarea->nbox = 1; + + ret = drmCommandNone( radeon->dri.fd, DRM_RADEON_FLIP ); + + UNLOCK_HARDWARE(radeon); + + if ( ret ) { + fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret ); + return GL_FALSE; + } + + if (!rfb->pf_active) + return GL_FALSE; + + rfb->pf_current_page = radeon->sarea->pfCurrentPage; + radeon_flip_renderbuffers(rfb); + radeon_draw_buffer(radeon->glCtx, &rfb->base); + + return GL_TRUE; +} + + +/** + * Swap front and back buffer. + */ +void radeonSwapBuffers(__DRIdrawablePrivate * dPriv) +{ + int64_t ust; + __DRIscreenPrivate *psp; + + if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { + radeonContextPtr radeon; + GLcontext *ctx; + + radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + ctx = radeon->glCtx; + + if (ctx->Visual.doubleBufferMode) { + GLboolean missed_target; + struct radeon_framebuffer *rfb = dPriv->driverPrivate; + _mesa_notifySwapBuffers(ctx);/* flush pending rendering comands */ + + radeonScheduleSwap(dPriv, &missed_target); + + if (rfb->pf_active) { + radeonPageFlip(dPriv); + } else { + radeonCopyBuffer(dPriv, NULL); + } + + psp = dPriv->driScreenPriv; + + rfb->swap_count++; + (*psp->systemTime->getUST)( & ust ); + if ( missed_target ) { + rfb->swap_missed_count++; + rfb->swap_missed_ust = ust - rfb->swap_ust; + } + + rfb->swap_ust = ust; + radeon->hw.all_dirty = GL_TRUE; + } + } else { + /* XXX this shouldn't be an error but we can't handle it for now */ + _mesa_problem(NULL, "%s: drawable has no context!", + __FUNCTION__); + } +} + +void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, + int x, int y, int w, int h ) +{ + if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { + radeonContextPtr radeon; + GLcontext *ctx; + + radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; + ctx = radeon->glCtx; + + if (ctx->Visual.doubleBufferMode) { + drm_clip_rect_t rect; + rect.x1 = x + dPriv->x; + rect.y1 = (dPriv->h - y - h) + dPriv->y; + rect.x2 = rect.x1 + w; + rect.y2 = rect.y1 + h; + _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ + radeonCopyBuffer(dPriv, &rect); + } + } else { + /* XXX this shouldn't be an error but we can't handle it for now */ + _mesa_problem(NULL, "%s: drawable has no context!", + __FUNCTION__); + } +} + +void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_renderbuffer *rrbDepth = NULL, *rrbStencil = NULL, + *rrbColor = NULL; + uint32_t offset = 0; + + + if (!fb) { + /* this can happen during the initial context initialization */ + return; + } + + /* radeons only handle 1 color draw so far */ + if (fb->_NumColorDrawBuffers != 1) { + radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE); + return; + } + + /* Do this here, note core Mesa, since this function is called from + * many places within the driver. + */ + if (ctx->NewState & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { + /* this updates the DrawBuffer->_NumColorDrawBuffers fields, etc */ + _mesa_update_framebuffer(ctx); + /* this updates the DrawBuffer's Width/Height if it's a FBO */ + _mesa_update_draw_buffer_bounds(ctx); + } + + if (fb->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) { + /* this may occur when we're called by glBindFrameBuffer() during + * the process of someone setting up renderbuffers, etc. + */ + /*_mesa_debug(ctx, "DrawBuffer: incomplete user FBO\n");*/ + return; + } + + if (fb->Name) + ;/* do something depthy/stencily TODO */ + + + /* none */ + if (fb->Name == 0) { + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { + rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); + radeon->front_cliprects = GL_TRUE; + radeon->front_buffer_dirty = GL_TRUE; + } else { + rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer); + radeon->front_cliprects = GL_FALSE; + } + } else { + /* user FBO in theory */ + struct radeon_renderbuffer *rrb; + rrb = radeon_renderbuffer(fb->_ColorDrawBuffers[0]); + if (rrb) { + offset = rrb->draw_offset; + rrbColor = rrb; + } + radeon->constant_cliprect = GL_TRUE; + } + + if (rrbColor == NULL) + radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE); + else + radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE); + + + if (fb->_DepthBuffer && fb->_DepthBuffer->Wrapped) { + rrbDepth = radeon_renderbuffer(fb->_DepthBuffer->Wrapped); + if (rrbDepth && rrbDepth->bo) { + radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_FALSE); + } else { + radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_TRUE); + } + } else { + radeon->vtbl.fallback(ctx, RADEON_FALLBACK_DEPTH_BUFFER, GL_FALSE); + rrbDepth = NULL; + } + + if (fb->_StencilBuffer && fb->_StencilBuffer->Wrapped) { + rrbStencil = radeon_renderbuffer(fb->_DepthBuffer->Wrapped); + if (rrbStencil && rrbStencil->bo) { + radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_FALSE); + /* need to re-compute stencil hw state */ + if (!rrbDepth) + rrbDepth = rrbStencil; + } else { + radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_TRUE); + } + } else { + radeon->vtbl.fallback(ctx, RADEON_FALLBACK_STENCIL_BUFFER, GL_FALSE); + if (ctx->Driver.Enable != NULL) + ctx->Driver.Enable(ctx, GL_STENCIL_TEST, ctx->Stencil.Enabled); + else + ctx->NewState |= _NEW_STENCIL; + } + + /* Update culling direction which changes depending on the + * orientation of the buffer: + */ + if (ctx->Driver.FrontFace) + ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace); + else + ctx->NewState |= _NEW_POLYGON; + + /* + * Update depth test state + */ + if (ctx->Driver.Enable) { + ctx->Driver.Enable(ctx, GL_DEPTH_TEST, + (ctx->Depth.Test && fb->Visual.depthBits > 0)); + /* Need to update the derived ctx->Stencil._Enabled first */ + _mesa_update_stencil(ctx); + ctx->Driver.Enable(ctx, GL_STENCIL_TEST, + (ctx->Stencil._Enabled && fb->Visual.stencilBits > 0)); + } else { + ctx->NewState |= (_NEW_DEPTH | _NEW_STENCIL); + } + + _mesa_reference_renderbuffer(&radeon->state.depth.rb, &rrbDepth->base); + _mesa_reference_renderbuffer(&radeon->state.color.rb, &rrbColor->base); + radeon->state.color.draw_offset = offset; + +#if 0 + /* update viewport since it depends on window size */ + if (ctx->Driver.Viewport) { + ctx->Driver.Viewport(ctx, ctx->Viewport.X, ctx->Viewport.Y, + ctx->Viewport.Width, ctx->Viewport.Height); + } else { + + } +#endif + ctx->NewState |= _NEW_VIEWPORT; + + /* Set state we know depends on drawable parameters: + */ + radeonUpdateScissor(ctx); + radeon->NewGLState |= _NEW_SCISSOR; + + if (ctx->Driver.DepthRange) + ctx->Driver.DepthRange(ctx, + ctx->Viewport.Near, + ctx->Viewport.Far); + + /* Update culling direction which changes depending on the + * orientation of the buffer: + */ + if (ctx->Driver.FrontFace) + ctx->Driver.FrontFace(ctx, ctx->Polygon.FrontFace); + else + ctx->NewState |= _NEW_POLYGON; +} + +/** + * Called via glDrawBuffer. + */ +void radeonDrawBuffer( GLcontext *ctx, GLenum mode ) +{ + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s %s\n", __FUNCTION__, + _mesa_lookup_enum_by_nr( mode )); + + if (ctx->DrawBuffer->Name == 0) { + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + + const GLboolean was_front_buffer_rendering = + radeon->is_front_buffer_rendering; + + radeon->is_front_buffer_rendering = (mode == GL_FRONT_LEFT) || + (mode == GL_FRONT); + + /* If we weren't front-buffer rendering before but we are now, make sure + * that the front-buffer has actually been allocated. + */ + if (!was_front_buffer_rendering && radeon->is_front_buffer_rendering) { + radeon_update_renderbuffers(radeon->dri.context, + radeon->dri.context->driDrawablePriv); + } + } + + radeon_draw_buffer(ctx, ctx->DrawBuffer); +} + +void radeonReadBuffer( GLcontext *ctx, GLenum mode ) +{ + /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */ + if (ctx->ReadBuffer == ctx->DrawBuffer) { + /* This will update FBO completeness status. + * A framebuffer will be incomplete if the GL_READ_BUFFER setting + * refers to a missing renderbuffer. Calling glReadBuffer can set + * that straight and can make the drawing buffer complete. + */ + radeon_draw_buffer(ctx, ctx->DrawBuffer); + } +} + + +/* Turn on/off page flipping according to the flags in the sarea: + */ +void radeonUpdatePageFlipping(radeonContextPtr radeon) +{ + struct radeon_framebuffer *rfb = radeon_get_drawable(radeon)->driverPrivate; + + rfb->pf_active = radeon->sarea->pfState; + rfb->pf_current_page = radeon->sarea->pfCurrentPage; + rfb->pf_num_pages = 2; + radeon_flip_renderbuffers(rfb); + radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer); +} + +void radeon_window_moved(radeonContextPtr radeon) +{ + if (!radeon->radeonScreen->driScreen->dri2.enabled) { + radeonUpdatePageFlipping(radeon); + } + radeonSetCliprects(radeon); +} + +void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + __DRIcontext *driContext = radeon->dri.context; + void (*old_viewport)(GLcontext *ctx, GLint x, GLint y, + GLsizei w, GLsizei h); + + if (!driContext->driScreenPriv->dri2.enabled) + return; + + radeonFlush(ctx); + radeon_update_renderbuffers(driContext, driContext->driDrawablePriv); + if (driContext->driDrawablePriv != driContext->driReadablePriv) + radeon_update_renderbuffers(driContext, driContext->driReadablePriv); + + old_viewport = ctx->Driver.Viewport; + ctx->Driver.Viewport = NULL; + radeon_window_moved(radeon); + radeon_draw_buffer(ctx, radeon->glCtx->DrawBuffer); + ctx->Driver.Viewport = old_viewport; +} + +static void radeon_print_state_atom(radeonContextPtr radeon, struct radeon_state_atom *state) +{ + int i, j, reg; + int dwords = (*state->check) (radeon->glCtx, state); + drm_r300_cmd_header_t cmd; + + fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size); + + if (RADEON_DEBUG & DEBUG_VERBOSE) { + for (i = 0; i < dwords;) { + cmd = *((drm_r300_cmd_header_t *) &state->cmd[i]); + reg = (cmd.packet0.reghi << 8) | cmd.packet0.reglo; + fprintf(stderr, " %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n", + state->name, i, reg, cmd.packet0.count); + ++i; + for (j = 0; j < cmd.packet0.count && i < dwords; j++) { + fprintf(stderr, " %s[%d]: 0x%04x = %08x\n", + state->name, i, reg, state->cmd[i]); + reg += 4; + ++i; + } + } + } +} + +static void radeon_print_state_atom_kmm(radeonContextPtr radeon, struct radeon_state_atom *state) +{ + int i, j, reg, count; + int dwords = (*state->check) (radeon->glCtx, state); + uint32_t packet0; + + fprintf(stderr, " emit %s %d/%d\n", state->name, dwords, state->cmd_size); + + if (RADEON_DEBUG & DEBUG_VERBOSE) { + for (i = 0; i < dwords;) { + packet0 = state->cmd[i]; + reg = (packet0 & 0x1FFF) << 2; + count = ((packet0 & 0x3FFF0000) >> 16) + 1; + fprintf(stderr, " %s[%d]: cmdpacket0 (first reg=0x%04x, count=%d)\n", + state->name, i, reg, count); + ++i; + for (j = 0; j < count && i < dwords; j++) { + fprintf(stderr, " %s[%d]: 0x%04x = %08x\n", + state->name, i, reg, state->cmd[i]); + reg += 4; + ++i; + } + } + } +} + +static INLINE void radeonEmitAtoms(radeonContextPtr radeon, GLboolean dirty) +{ + BATCH_LOCALS(radeon); + struct radeon_state_atom *atom; + int dwords; + + if (radeon->vtbl.pre_emit_atoms) + radeon->vtbl.pre_emit_atoms(radeon); + + /* Emit actual atoms */ + foreach(atom, &radeon->hw.atomlist) { + if ((atom->dirty || radeon->hw.all_dirty) == dirty) { + dwords = (*atom->check) (radeon->glCtx, atom); + if (dwords) { + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { + if (radeon->radeonScreen->kernel_mm) + radeon_print_state_atom_kmm(radeon, atom); + else + radeon_print_state_atom(radeon, atom); + } + if (atom->emit) { + (*atom->emit)(radeon->glCtx, atom); + } else { + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_BATCH_TABLE(atom->cmd, dwords); + END_BATCH(); + } + atom->dirty = GL_FALSE; + } else { + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_STATE) { + fprintf(stderr, " skip state %s\n", + atom->name); + } + } + } + } + + COMMIT_BATCH(); +} + +GLboolean radeon_revalidate_bos(GLcontext *ctx) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + int flushed = 0; + int ret; +again: + ret = radeon_cs_space_check(radeon->cmdbuf.cs, radeon->state.bos, radeon->state.validated_bo_count); + if (ret == RADEON_CS_SPACE_OP_TO_BIG) + return GL_FALSE; + if (ret == RADEON_CS_SPACE_FLUSH) { + radeonFlush(ctx); + if (flushed) + return GL_FALSE; + flushed = 1; + goto again; + } + return GL_TRUE; +} + +void radeon_validate_reset_bos(radeonContextPtr radeon) +{ + int i; + + for (i = 0; i < radeon->state.validated_bo_count; i++) { + radeon_bo_unref(radeon->state.bos[i].bo); + radeon->state.bos[i].bo = NULL; + radeon->state.bos[i].read_domains = 0; + radeon->state.bos[i].write_domain = 0; + radeon->state.bos[i].new_accounted = 0; + } + radeon->state.validated_bo_count = 0; +} + +void radeon_validate_bo(radeonContextPtr radeon, struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain) +{ + int i; + for (i = 0; i < radeon->state.validated_bo_count; i++) { + if (radeon->state.bos[i].bo == bo && + radeon->state.bos[i].read_domains == read_domains && + radeon->state.bos[i].write_domain == write_domain) + return; + } + radeon_bo_ref(bo); + radeon->state.bos[radeon->state.validated_bo_count].bo = bo; + radeon->state.bos[radeon->state.validated_bo_count].read_domains = read_domains; + radeon->state.bos[radeon->state.validated_bo_count].write_domain = write_domain; + radeon->state.bos[radeon->state.validated_bo_count].new_accounted = 0; + radeon->state.validated_bo_count++; + + assert(radeon->state.validated_bo_count < RADEON_MAX_BOS); +} + +void radeonEmitState(radeonContextPtr radeon) +{ + if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS)) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (radeon->vtbl.pre_emit_state) + radeon->vtbl.pre_emit_state(radeon); + + /* this code used to return here but now it emits zbs */ + if (radeon->cmdbuf.cs->cdw && !radeon->hw.is_dirty && !radeon->hw.all_dirty) + return; + + /* To avoid going across the entire set of states multiple times, just check + * for enough space for the case of emitting all state, and inline the + * radeonAllocCmdBuf code here without all the checks. + */ + rcommonEnsureCmdBufSpace(radeon, radeon->hw.max_state_size, __FUNCTION__); + + if (!radeon->cmdbuf.cs->cdw) { + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "Begin reemit state\n"); + + radeonEmitAtoms(radeon, GL_FALSE); + } + + if (RADEON_DEBUG & DEBUG_STATE) + fprintf(stderr, "Begin dirty state\n"); + + radeonEmitAtoms(radeon, GL_TRUE); + radeon->hw.is_dirty = GL_FALSE; + radeon->hw.all_dirty = GL_FALSE; + +} + + +void radeonFlush(GLcontext *ctx) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %d\n", __FUNCTION__, radeon->cmdbuf.cs->cdw); + + /* okay if we have no cmds in the buffer && + we have no DMA flush && + we have no DMA buffer allocated. + then no point flushing anything at all. + */ + if (!radeon->dma.flush && !radeon->cmdbuf.cs->cdw && !radeon->dma.current) + return; + + if (radeon->dma.flush) + radeon->dma.flush( ctx ); + + radeonEmitState(radeon); + + if (radeon->cmdbuf.cs->cdw) + rcommonFlushCmdBuf(radeon, __FUNCTION__); + + if ((ctx->DrawBuffer->Name == 0) && radeon->front_buffer_dirty) { + __DRIscreen *const screen = radeon->radeonScreen->driScreen; + + if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2) + && (screen->dri2.loader->flushFrontBuffer != NULL)) { + __DRIdrawablePrivate * drawable = radeon_get_drawable(radeon); + (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate); + + /* Only clear the dirty bit if front-buffer rendering is no longer + * enabled. This is done so that the dirty bit can only be set in + * glDrawBuffer. Otherwise the dirty bit would have to be set at + * each of N places that do rendering. This has worse performances, + * but it is much easier to get correct. + */ + if (radeon->is_front_buffer_rendering) { + radeon->front_buffer_dirty = GL_FALSE; + } + } + } +} + +/* Make sure all commands have been sent to the hardware and have + * completed processing. + */ +void radeonFinish(GLcontext * ctx) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct gl_framebuffer *fb = ctx->DrawBuffer; + int i; + + radeonFlush(ctx); + + if (radeon->radeonScreen->kernel_mm) { + for (i = 0; i < fb->_NumColorDrawBuffers; i++) { + struct radeon_renderbuffer *rrb; + rrb = radeon_renderbuffer(fb->_ColorDrawBuffers[i]); + if (rrb && rrb->bo) + radeon_bo_wait(rrb->bo); + } + { + struct radeon_renderbuffer *rrb; + rrb = radeon_get_depthbuffer(radeon); + if (rrb && rrb->bo) + radeon_bo_wait(rrb->bo); + } + } else if (radeon->do_irqs) { + LOCK_HARDWARE(radeon); + radeonEmitIrqLocked(radeon); + UNLOCK_HARDWARE(radeon); + radeonWaitIrq(radeon); + } else { + radeonWaitForIdle(radeon); + } +} + +/* cmdbuffer */ +/** + * Send the current command buffer via ioctl to the hardware. + */ +int rcommonFlushCmdBufLocked(radeonContextPtr rmesa, const char *caller) +{ + int ret = 0; + + if (rmesa->cmdbuf.flushing) { + fprintf(stderr, "Recursive call into r300FlushCmdBufLocked!\n"); + exit(-1); + } + rmesa->cmdbuf.flushing = 1; + + if (RADEON_DEBUG & DEBUG_IOCTL) { + fprintf(stderr, "%s from %s - %i cliprects\n", + __FUNCTION__, caller, rmesa->numClipRects); + } + + if (rmesa->cmdbuf.cs->cdw) { + ret = radeon_cs_emit(rmesa->cmdbuf.cs); + rmesa->hw.all_dirty = GL_TRUE; + } + radeon_cs_erase(rmesa->cmdbuf.cs); + rmesa->cmdbuf.flushing = 0; + + if (radeon_revalidate_bos(rmesa->glCtx) == GL_FALSE) { + fprintf(stderr,"failed to revalidate buffers\n"); + } + + return ret; +} + +int rcommonFlushCmdBuf(radeonContextPtr rmesa, const char *caller) +{ + int ret; + + radeonReleaseDmaRegion(rmesa); + + LOCK_HARDWARE(rmesa); + ret = rcommonFlushCmdBufLocked(rmesa, caller); + UNLOCK_HARDWARE(rmesa); + + if (ret) { + fprintf(stderr, "drmRadeonCmdBuffer: %d\n", ret); + _mesa_exit(ret); + } + + return ret; +} + +/** + * Make sure that enough space is available in the command buffer + * by flushing if necessary. + * + * \param dwords The number of dwords we need to be free on the command buffer + */ +void rcommonEnsureCmdBufSpace(radeonContextPtr rmesa, int dwords, const char *caller) +{ + if ((rmesa->cmdbuf.cs->cdw + dwords + 128) > rmesa->cmdbuf.size || + radeon_cs_need_flush(rmesa->cmdbuf.cs)) { + rcommonFlushCmdBuf(rmesa, caller); + } +} + +void rcommonInitCmdBuf(radeonContextPtr rmesa) +{ + GLuint size; + /* Initialize command buffer */ + size = 256 * driQueryOptioni(&rmesa->optionCache, + "command_buffer_size"); + if (size < 2 * rmesa->hw.max_state_size) { + size = 2 * rmesa->hw.max_state_size + 65535; + } + if (size > 64 * 256) + size = 64 * 256; + + if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) { + fprintf(stderr, "sizeof(drm_r300_cmd_header_t)=%zd\n", + sizeof(drm_r300_cmd_header_t)); + fprintf(stderr, "sizeof(drm_radeon_cmd_buffer_t)=%zd\n", + sizeof(drm_radeon_cmd_buffer_t)); + fprintf(stderr, + "Allocating %d bytes command buffer (max state is %d bytes)\n", + size * 4, rmesa->hw.max_state_size * 4); + } + + if (rmesa->radeonScreen->kernel_mm) { + int fd = rmesa->radeonScreen->driScreen->fd; + rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd); + } else { + rmesa->cmdbuf.csm = radeon_cs_manager_legacy_ctor(rmesa); + } + if (rmesa->cmdbuf.csm == NULL) { + /* FIXME: fatal error */ + return; + } + rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size); + assert(rmesa->cmdbuf.cs != NULL); + rmesa->cmdbuf.size = size; + + if (!rmesa->radeonScreen->kernel_mm) { + radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]); + radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size); + } else { + struct drm_radeon_gem_info mminfo = { 0 }; + + if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo))) + { + radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible); + radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size); + } + } + +} +/** + * Destroy the command buffer + */ +void rcommonDestroyCmdBuf(radeonContextPtr rmesa) +{ + radeon_cs_destroy(rmesa->cmdbuf.cs); + if (rmesa->radeonScreen->driScreen->dri2.enabled || rmesa->radeonScreen->kernel_mm) { + radeon_cs_manager_gem_dtor(rmesa->cmdbuf.csm); + } else { + radeon_cs_manager_legacy_dtor(rmesa->cmdbuf.csm); + } +} + +void rcommonBeginBatch(radeonContextPtr rmesa, int n, + int dostate, + const char *file, + const char *function, + int line) +{ + rcommonEnsureCmdBufSpace(rmesa, n, function); + if (!rmesa->cmdbuf.cs->cdw && dostate) { + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "Reemit state after flush (from %s)\n", function); + radeonEmitState(rmesa); + } + radeon_cs_begin(rmesa->cmdbuf.cs, n, file, function, line); + + if (DEBUG_CMDBUF && RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "BEGIN_BATCH(%d) at %d, from %s:%i\n", + n, rmesa->cmdbuf.cs->cdw, function, line); + +} + + + +static void +radeon_meta_set_passthrough_transform(radeonContextPtr radeon) +{ + GLcontext *ctx = radeon->glCtx; + + radeon->meta.saved_vp_x = ctx->Viewport.X; + radeon->meta.saved_vp_y = ctx->Viewport.Y; + radeon->meta.saved_vp_width = ctx->Viewport.Width; + radeon->meta.saved_vp_height = ctx->Viewport.Height; + radeon->meta.saved_matrix_mode = ctx->Transform.MatrixMode; + + _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height); + + _mesa_MatrixMode(GL_PROJECTION); + _mesa_PushMatrix(); + _mesa_LoadIdentity(); + _mesa_Ortho(0, ctx->DrawBuffer->Width, 0, ctx->DrawBuffer->Height, 1, -1); + + _mesa_MatrixMode(GL_MODELVIEW); + _mesa_PushMatrix(); + _mesa_LoadIdentity(); +} + +static void +radeon_meta_restore_transform(radeonContextPtr radeon) +{ + _mesa_MatrixMode(GL_PROJECTION); + _mesa_PopMatrix(); + _mesa_MatrixMode(GL_MODELVIEW); + _mesa_PopMatrix(); + + _mesa_MatrixMode(radeon->meta.saved_matrix_mode); + + _mesa_Viewport(radeon->meta.saved_vp_x, radeon->meta.saved_vp_y, + radeon->meta.saved_vp_width, radeon->meta.saved_vp_height); +} + + +/** + * Perform glClear where mask contains only color, depth, and/or stencil. + * + * The implementation is based on calling into Mesa to set GL state and + * performing normal triangle rendering. The intent of this path is to + * have as generic a path as possible, so that any driver could make use of + * it. + */ + + +void radeon_clear_tris(GLcontext *ctx, GLbitfield mask) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + GLfloat vertices[4][3]; + GLfloat color[4][4]; + GLfloat dst_z; + struct gl_framebuffer *fb = ctx->DrawBuffer; + int i; + GLboolean saved_fp_enable = GL_FALSE, saved_vp_enable = GL_FALSE; + GLboolean saved_shader_program = 0; + unsigned int saved_active_texture; + + assert((mask & ~(TRI_CLEAR_COLOR_BITS | BUFFER_BIT_DEPTH | + BUFFER_BIT_STENCIL)) == 0); + + _mesa_PushAttrib(GL_COLOR_BUFFER_BIT | + GL_CURRENT_BIT | + GL_DEPTH_BUFFER_BIT | + GL_ENABLE_BIT | + GL_POLYGON_BIT | + GL_STENCIL_BUFFER_BIT | + GL_TRANSFORM_BIT | + GL_CURRENT_BIT); + _mesa_PushClientAttrib(GL_CLIENT_VERTEX_ARRAY_BIT); + saved_active_texture = ctx->Texture.CurrentUnit; + + /* Disable existing GL state we don't want to apply to a clear. */ + _mesa_Disable(GL_ALPHA_TEST); + _mesa_Disable(GL_BLEND); + _mesa_Disable(GL_CULL_FACE); + _mesa_Disable(GL_FOG); + _mesa_Disable(GL_POLYGON_SMOOTH); + _mesa_Disable(GL_POLYGON_STIPPLE); + _mesa_Disable(GL_POLYGON_OFFSET_FILL); + _mesa_Disable(GL_LIGHTING); + _mesa_Disable(GL_CLIP_PLANE0); + _mesa_Disable(GL_CLIP_PLANE1); + _mesa_Disable(GL_CLIP_PLANE2); + _mesa_Disable(GL_CLIP_PLANE3); + _mesa_Disable(GL_CLIP_PLANE4); + _mesa_Disable(GL_CLIP_PLANE5); + _mesa_PolygonMode(GL_FRONT_AND_BACK, GL_FILL); + if (ctx->Extensions.ARB_fragment_program && ctx->FragmentProgram.Enabled) { + saved_fp_enable = GL_TRUE; + _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); + } + if (ctx->Extensions.ARB_vertex_program && ctx->VertexProgram.Enabled) { + saved_vp_enable = GL_TRUE; + _mesa_Disable(GL_VERTEX_PROGRAM_ARB); + } + if (ctx->Extensions.ARB_shader_objects && ctx->Shader.CurrentProgram) { + saved_shader_program = ctx->Shader.CurrentProgram->Name; + _mesa_UseProgramObjectARB(0); + } + + if (ctx->Texture._EnabledUnits != 0) { + int i; + + for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { + _mesa_ActiveTextureARB(GL_TEXTURE0 + i); + _mesa_Disable(GL_TEXTURE_1D); + _mesa_Disable(GL_TEXTURE_2D); + _mesa_Disable(GL_TEXTURE_3D); + if (ctx->Extensions.ARB_texture_cube_map) + _mesa_Disable(GL_TEXTURE_CUBE_MAP_ARB); + if (ctx->Extensions.NV_texture_rectangle) + _mesa_Disable(GL_TEXTURE_RECTANGLE_NV); + if (ctx->Extensions.MESA_texture_array) { + _mesa_Disable(GL_TEXTURE_1D_ARRAY_EXT); + _mesa_Disable(GL_TEXTURE_2D_ARRAY_EXT); + } + } + } + +#if FEATURE_ARB_vertex_buffer_object + _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + _mesa_BindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0); +#endif + + radeon_meta_set_passthrough_transform(rmesa); + + for (i = 0; i < 4; i++) { + color[i][0] = ctx->Color.ClearColor[0]; + color[i][1] = ctx->Color.ClearColor[1]; + color[i][2] = ctx->Color.ClearColor[2]; + color[i][3] = ctx->Color.ClearColor[3]; + } + + /* convert clear Z from [0,1] to NDC coord in [-1,1] */ + + dst_z = -1.0 + 2.0 * ctx->Depth.Clear; + /* Prepare the vertices, which are the same regardless of which buffer we're + * drawing to. + */ + vertices[0][0] = fb->_Xmin; + vertices[0][1] = fb->_Ymin; + vertices[0][2] = dst_z; + vertices[1][0] = fb->_Xmax; + vertices[1][1] = fb->_Ymin; + vertices[1][2] = dst_z; + vertices[2][0] = fb->_Xmax; + vertices[2][1] = fb->_Ymax; + vertices[2][2] = dst_z; + vertices[3][0] = fb->_Xmin; + vertices[3][1] = fb->_Ymax; + vertices[3][2] = dst_z; + + _mesa_ColorPointer(4, GL_FLOAT, 4 * sizeof(GLfloat), &color); + _mesa_VertexPointer(3, GL_FLOAT, 3 * sizeof(GLfloat), &vertices); + _mesa_Enable(GL_COLOR_ARRAY); + _mesa_Enable(GL_VERTEX_ARRAY); + + while (mask != 0) { + GLuint this_mask = 0; + GLuint color_bit; + + color_bit = _mesa_ffs(mask & TRI_CLEAR_COLOR_BITS); + if (color_bit != 0) + this_mask |= (1 << (color_bit - 1)); + + /* Clear depth/stencil in the same pass as color. */ + this_mask |= (mask & (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL)); + + /* Select the current color buffer and use the color write mask if + * we have one, otherwise don't write any color channels. + */ + if (this_mask & BUFFER_BIT_FRONT_LEFT) + _mesa_DrawBuffer(GL_FRONT_LEFT); + else if (this_mask & BUFFER_BIT_BACK_LEFT) + _mesa_DrawBuffer(GL_BACK_LEFT); + else if (color_bit != 0) + _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0 + + (color_bit - BUFFER_COLOR0 - 1)); + else + _mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + + /* Control writing of the depth clear value to depth. */ + if (this_mask & BUFFER_BIT_DEPTH) { + _mesa_DepthFunc(GL_ALWAYS); + _mesa_DepthMask(GL_TRUE); + _mesa_Enable(GL_DEPTH_TEST); + } else { + _mesa_Disable(GL_DEPTH_TEST); + _mesa_DepthMask(GL_FALSE); + } + + /* Control writing of the stencil clear value to stencil. */ + if (this_mask & BUFFER_BIT_STENCIL) { + _mesa_Enable(GL_STENCIL_TEST); + _mesa_StencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); + _mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS, ctx->Stencil.Clear, + ctx->Stencil.WriteMask[0]); + } else { + _mesa_Disable(GL_STENCIL_TEST); + } + + CALL_DrawArrays(ctx->Exec, (GL_TRIANGLE_FAN, 0, 4)); + + mask &= ~this_mask; + } + + radeon_meta_restore_transform(rmesa); + + _mesa_ActiveTextureARB(GL_TEXTURE0 + saved_active_texture); + if (saved_fp_enable) + _mesa_Enable(GL_FRAGMENT_PROGRAM_ARB); + if (saved_vp_enable) + _mesa_Enable(GL_VERTEX_PROGRAM_ARB); + + if (saved_shader_program) + _mesa_UseProgramObjectARB(saved_shader_program); + + _mesa_PopClientAttrib(); + _mesa_PopAttrib(); +} diff --git a/src/mesa/drivers/dri/radeon/radeon_common.h b/src/mesa/drivers/dri/radeon/radeon_common.h new file mode 100644 index 0000000000..b60792df0b --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_common.h @@ -0,0 +1,100 @@ +#ifndef COMMON_MISC_H +#define COMMON_MISC_H + +#include "radeon_common_context.h" +#include "radeon_dma.h" +#include "radeon_texture.h" + + +#define TRI_CLEAR_COLOR_BITS (BUFFER_BIT_BACK_LEFT | \ + BUFFER_BIT_FRONT_LEFT | \ + BUFFER_BIT_COLOR0 | \ + BUFFER_BIT_COLOR1 | \ + BUFFER_BIT_COLOR2 | \ + BUFFER_BIT_COLOR3 | \ + BUFFER_BIT_COLOR4 | \ + BUFFER_BIT_COLOR5 | \ + BUFFER_BIT_COLOR6 | \ + BUFFER_BIT_COLOR7) + +void radeonRecalcScissorRects(radeonContextPtr radeon); +void radeonSetCliprects(radeonContextPtr radeon); +void radeonUpdateScissor( GLcontext *ctx ); +void radeonScissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h); + +void radeonWaitForIdleLocked(radeonContextPtr radeon); +extern uint32_t radeonGetAge(radeonContextPtr radeon); +void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, + const drm_clip_rect_t *rect); +void radeonSwapBuffers(__DRIdrawablePrivate * dPriv); +void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, + int x, int y, int w, int h ); + +void radeonUpdatePageFlipping(radeonContextPtr rmesa); + +void radeonFlush(GLcontext *ctx); +void radeonFinish(GLcontext * ctx); +void radeonEmitState(radeonContextPtr radeon); + +void radeon_clear_tris(GLcontext *ctx, GLbitfield mask); + +void radeon_window_moved(radeonContextPtr radeon); +void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb); +void radeonDrawBuffer( GLcontext *ctx, GLenum mode ); +void radeonReadBuffer( GLcontext *ctx, GLenum mode ); +void radeon_viewport(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height); +void radeon_get_cliprects(radeonContextPtr radeon, + struct drm_clip_rect **cliprects, + unsigned int *num_cliprects, + int *x_off, int *y_off); +GLboolean radeon_revalidate_bos(GLcontext *ctx); +void radeon_validate_bo(radeonContextPtr radeon, struct radeon_bo *bo, uint32_t read_domains, uint32_t write_domain); +void radeon_validate_reset_bos(radeonContextPtr radeon); + +void radeon_fbo_init(struct radeon_context *radeon); +void +radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb, + struct radeon_bo *bo); +struct radeon_renderbuffer * +radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv); +static inline struct radeon_renderbuffer *radeon_renderbuffer(struct gl_renderbuffer *rb) +{ + struct radeon_renderbuffer *rrb = (struct radeon_renderbuffer *)rb; + if (rrb && rrb->base.ClassID == RADEON_RB_CLASS) + return rrb; + else + return NULL; +} + +static inline struct radeon_renderbuffer *radeon_get_renderbuffer(struct gl_framebuffer *fb, int att_index) +{ + if (att_index >= 0) + return radeon_renderbuffer(fb->Attachment[att_index].Renderbuffer); + else + return NULL; +} + +static inline struct radeon_renderbuffer *radeon_get_depthbuffer(radeonContextPtr rmesa) +{ + struct radeon_renderbuffer *rrb; + rrb = radeon_renderbuffer(rmesa->state.depth.rb); + if (!rrb) + return NULL; + + return rrb; +} + +static inline struct radeon_renderbuffer *radeon_get_colorbuffer(radeonContextPtr rmesa) +{ + struct radeon_renderbuffer *rrb; + + rrb = radeon_renderbuffer(rmesa->state.color.rb); + if (!rrb) + return NULL; + return rrb; +} + +#include "radeon_cmdbuf.h" + + +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c new file mode 100644 index 0000000000..009859feca --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -0,0 +1,737 @@ +/************************************************************************** + +Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and + VA Linux Systems Inc., Fremont, California. +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +#include "radeon_common.h" +#include "xmlpool.h" /* for symbolic values of enum-type options */ +#include "utils.h" +#include "vblank.h" +#include "drirenderbuffer.h" +#include "main/context.h" +#include "main/framebuffer.h" +#include "main/renderbuffer.h" +#include "main/state.h" +#include "main/simple_list.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" +#include "tnl/tnl.h" + +#define DRIVER_DATE "20090101" + +#ifndef RADEON_DEBUG +int RADEON_DEBUG = (0); +#endif + + +static const char* get_chip_family_name(int chip_family) +{ + switch(chip_family) { + case CHIP_FAMILY_R100: return "R100"; + case CHIP_FAMILY_RV100: return "RV100"; + case CHIP_FAMILY_RS100: return "RS100"; + case CHIP_FAMILY_RV200: return "RV200"; + case CHIP_FAMILY_RS200: return "RS200"; + case CHIP_FAMILY_R200: return "R200"; + case CHIP_FAMILY_RV250: return "RV250"; + case CHIP_FAMILY_RS300: return "RS300"; + case CHIP_FAMILY_RV280: return "RV280"; + case CHIP_FAMILY_R300: return "R300"; + case CHIP_FAMILY_R350: return "R350"; + case CHIP_FAMILY_RV350: return "RV350"; + case CHIP_FAMILY_RV380: return "RV380"; + case CHIP_FAMILY_R420: return "R420"; + case CHIP_FAMILY_RV410: return "RV410"; + case CHIP_FAMILY_RS400: return "RS400"; + case CHIP_FAMILY_RS600: return "RS600"; + case CHIP_FAMILY_RS690: return "RS690"; + case CHIP_FAMILY_RS740: return "RS740"; + case CHIP_FAMILY_RV515: return "RV515"; + case CHIP_FAMILY_R520: return "R520"; + case CHIP_FAMILY_RV530: return "RV530"; + case CHIP_FAMILY_R580: return "R580"; + case CHIP_FAMILY_RV560: return "RV560"; + case CHIP_FAMILY_RV570: return "RV570"; + default: return "unknown"; + } +} + + +/* Return various strings for glGetString(). + */ +static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + static char buffer[128]; + + switch (name) { + case GL_VENDOR: + if (IS_R300_CLASS(radeon->radeonScreen)) + return (GLubyte *) "DRI R300 Project"; + else + return (GLubyte *) "Tungsten Graphics, Inc."; + + case GL_RENDERER: + { + unsigned offset; + GLuint agp_mode = (radeon->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 : + radeon->radeonScreen->AGPMode; + const char* chipclass; + char hardwarename[32]; + + if (IS_R300_CLASS(radeon->radeonScreen)) + chipclass = "R300"; + else if (IS_R200_CLASS(radeon->radeonScreen)) + chipclass = "R200"; + else + chipclass = "R100"; + + sprintf(hardwarename, "%s (%s %04X)", + chipclass, + get_chip_family_name(radeon->radeonScreen->chip_family), + radeon->radeonScreen->device_id); + + offset = driGetRendererString(buffer, hardwarename, DRIVER_DATE, + agp_mode); + + if (IS_R300_CLASS(radeon->radeonScreen)) { + sprintf(&buffer[offset], " %sTCL", + (radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) + ? "" : "NO-"); + } else { + sprintf(&buffer[offset], " %sTCL", + !(radeon->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE) + ? "" : "NO-"); + } + + if (radeon->radeonScreen->driScreen->dri2.enabled) + strcat(buffer, " DRI2"); + + return (GLubyte *) buffer; + } + + default: + return NULL; + } +} + +/* Initialize the driver's misc functions. + */ +static void radeonInitDriverFuncs(struct dd_function_table *functions) +{ + functions->GetString = radeonGetString; +} + +/** + * Create and initialize all common fields of the context, + * including the Mesa context itself. + */ +GLboolean radeonInitContext(radeonContextPtr radeon, + struct dd_function_table* functions, + const __GLcontextModes * glVisual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate) +{ + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); + GLcontext* ctx; + GLcontext* shareCtx; + int fthrottle_mode; + + /* Fill in additional standard functions. */ + radeonInitDriverFuncs(functions); + + radeon->radeonScreen = screen; + /* Allocate and initialize the Mesa context */ + if (sharedContextPrivate) + shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx; + else + shareCtx = NULL; + radeon->glCtx = _mesa_create_context(glVisual, shareCtx, + functions, (void *)radeon); + if (!radeon->glCtx) + return GL_FALSE; + + ctx = radeon->glCtx; + driContextPriv->driverPrivate = radeon; + + /* DRI fields */ + radeon->dri.context = driContextPriv; + radeon->dri.screen = sPriv; + radeon->dri.hwContext = driContextPriv->hHWContext; + radeon->dri.hwLock = &sPriv->pSAREA->lock; + radeon->dri.fd = sPriv->fd; + radeon->dri.drmMinor = sPriv->drm_version.minor; + + radeon->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA + + screen->sarea_priv_offset); + + /* Setup IRQs */ + fthrottle_mode = driQueryOptioni(&radeon->optionCache, "fthrottle_mode"); + radeon->iw.irq_seq = -1; + radeon->irqsEmitted = 0; + radeon->do_irqs = (fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS && + radeon->radeonScreen->irq); + + radeon->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); + + if (!radeon->do_irqs) + fprintf(stderr, + "IRQ's not enabled, falling back to %s: %d %d\n", + radeon->do_usleeps ? "usleeps" : "busy waits", + fthrottle_mode, radeon->radeonScreen->irq); + + radeon->texture_depth = driQueryOptioni (&radeon->optionCache, + "texture_depth"); + if (radeon->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) + radeon->texture_depth = ( glVisual->rgbBits > 16 ) ? + DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; + + radeon->texture_row_align = 32; + + return GL_TRUE; +} + + + +/** + * Destroy the command buffer and state atoms. + */ +static void radeon_destroy_atom_list(radeonContextPtr radeon) +{ + struct radeon_state_atom *atom; + + foreach(atom, &radeon->hw.atomlist) { + FREE(atom->cmd); + if (atom->lastcmd) + FREE(atom->lastcmd); + } + +} + +/** + * Cleanup common context fields. + * Called by r200DestroyContext/r300DestroyContext + */ +void radeonDestroyContext(__DRIcontextPrivate *driContextPriv ) +{ +#ifdef RADEON_BO_TRACK + FILE *track; +#endif + GET_CURRENT_CONTEXT(ctx); + radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; + radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL; + + if (radeon == current) { + radeon_firevertices(radeon); + _mesa_make_current(NULL, NULL, NULL); + } + + assert(radeon); + if (radeon) { + + if (radeon->dma.current) { + rcommonFlushCmdBuf( radeon, __FUNCTION__ ); + } + + radeonReleaseArrays(radeon->glCtx, ~0); + + if (radeon->vtbl.free_context) + radeon->vtbl.free_context(radeon->glCtx); + _swsetup_DestroyContext( radeon->glCtx ); + _tnl_DestroyContext( radeon->glCtx ); + _vbo_DestroyContext( radeon->glCtx ); + _swrast_DestroyContext( radeon->glCtx ); + + /* free atom list */ + /* free the Mesa context */ + _mesa_destroy_context(radeon->glCtx); + + /* _mesa_destroy_context() might result in calls to functions that + * depend on the DriverCtx, so don't set it to NULL before. + * + * radeon->glCtx->DriverCtx = NULL; + */ + /* free the option cache */ + driDestroyOptionCache(&radeon->optionCache); + + rcommonDestroyCmdBuf(radeon); + + radeon_destroy_atom_list(radeon); + + if (radeon->state.scissor.pClipRects) { + FREE(radeon->state.scissor.pClipRects); + radeon->state.scissor.pClipRects = 0; + } + } +#ifdef RADEON_BO_TRACK + track = fopen("/tmp/tracklog", "w"); + if (track) { + radeon_tracker_print(&radeon->radeonScreen->bom->tracker, track); + fclose(track); + } +#endif + FREE(radeon); +} + +/* Force the context `c' to be unbound from its buffer. + */ +GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv) +{ + radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; + + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s ctx %p\n", __FUNCTION__, + radeon->glCtx); + + return GL_TRUE; +} + + +static void +radeon_make_kernel_renderbuffer_current(radeonContextPtr radeon, + struct radeon_framebuffer *draw) +{ + /* if radeon->fake */ + struct radeon_renderbuffer *rb; + + if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { + if (!rb->bo) { + rb->bo = radeon_bo_open(radeon->radeonScreen->bom, + radeon->radeonScreen->frontOffset, + 0, + 0, + RADEON_GEM_DOMAIN_VRAM, + 0); + } + rb->cpp = radeon->radeonScreen->cpp; + rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp; + } + if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) { + if (!rb->bo) { + rb->bo = radeon_bo_open(radeon->radeonScreen->bom, + radeon->radeonScreen->backOffset, + 0, + 0, + RADEON_GEM_DOMAIN_VRAM, + 0); + } + rb->cpp = radeon->radeonScreen->cpp; + rb->pitch = radeon->radeonScreen->backPitch * rb->cpp; + } + if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) { + if (!rb->bo) { + rb->bo = radeon_bo_open(radeon->radeonScreen->bom, + radeon->radeonScreen->depthOffset, + 0, + 0, + RADEON_GEM_DOMAIN_VRAM, + 0); + } + rb->cpp = radeon->radeonScreen->cpp; + rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; + } + if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) { + if (!rb->bo) { + rb->bo = radeon_bo_open(radeon->radeonScreen->bom, + radeon->radeonScreen->depthOffset, + 0, + 0, + RADEON_GEM_DOMAIN_VRAM, + 0); + } + rb->cpp = radeon->radeonScreen->cpp; + rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; + } +} + +static void +radeon_make_renderbuffer_current(radeonContextPtr radeon, + struct radeon_framebuffer *draw) +{ + int size = 4096*4096*4; + /* if radeon->fake */ + struct radeon_renderbuffer *rb; + + if (radeon->radeonScreen->kernel_mm) { + radeon_make_kernel_renderbuffer_current(radeon, draw); + return; + } + + + if ((rb = (void *)draw->base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer)) { + if (!rb->bo) { + rb->bo = radeon_bo_open(radeon->radeonScreen->bom, + radeon->radeonScreen->frontOffset + + radeon->radeonScreen->fbLocation, + size, + 4096, + RADEON_GEM_DOMAIN_VRAM, + 0); + } + rb->cpp = radeon->radeonScreen->cpp; + rb->pitch = radeon->radeonScreen->frontPitch * rb->cpp; + } + if ((rb = (void *)draw->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer)) { + if (!rb->bo) { + rb->bo = radeon_bo_open(radeon->radeonScreen->bom, + radeon->radeonScreen->backOffset + + radeon->radeonScreen->fbLocation, + size, + 4096, + RADEON_GEM_DOMAIN_VRAM, + 0); + } + rb->cpp = radeon->radeonScreen->cpp; + rb->pitch = radeon->radeonScreen->backPitch * rb->cpp; + } + if ((rb = (void *)draw->base.Attachment[BUFFER_DEPTH].Renderbuffer)) { + if (!rb->bo) { + rb->bo = radeon_bo_open(radeon->radeonScreen->bom, + radeon->radeonScreen->depthOffset + + radeon->radeonScreen->fbLocation, + size, + 4096, + RADEON_GEM_DOMAIN_VRAM, + 0); + } + rb->cpp = radeon->radeonScreen->cpp; + rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; + } + if ((rb = (void *)draw->base.Attachment[BUFFER_STENCIL].Renderbuffer)) { + if (!rb->bo) { + rb->bo = radeon_bo_open(radeon->radeonScreen->bom, + radeon->radeonScreen->depthOffset + + radeon->radeonScreen->fbLocation, + size, + 4096, + RADEON_GEM_DOMAIN_VRAM, + 0); + } + rb->cpp = radeon->radeonScreen->cpp; + rb->pitch = radeon->radeonScreen->depthPitch * rb->cpp; + } +} + +static unsigned +radeon_bits_per_pixel(const struct radeon_renderbuffer *rb) +{ + switch (rb->base._ActualFormat) { + case GL_RGB5: + case GL_DEPTH_COMPONENT16: + return 16; + case GL_RGB8: + case GL_RGBA8: + case GL_DEPTH_COMPONENT24: + case GL_DEPTH24_STENCIL8_EXT: + case GL_STENCIL_INDEX8_EXT: + return 32; + default: + return 0; + } +} + +void +radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable) +{ + unsigned int attachments[10]; + __DRIbuffer *buffers = NULL; + __DRIscreen *screen; + struct radeon_renderbuffer *rb; + int i, count; + struct radeon_framebuffer *draw; + radeonContextPtr radeon; + char *regname; + struct radeon_bo *depth_bo = NULL, *bo; + + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable); + + draw = drawable->driverPrivate; + screen = context->driScreenPriv; + radeon = (radeonContextPtr) context->driverPrivate; + + if (screen->dri2.loader + && (screen->dri2.loader->base.version > 2) + && (screen->dri2.loader->getBuffersWithFormat != NULL)) { + struct radeon_renderbuffer *depth_rb; + struct radeon_renderbuffer *stencil_rb; + + i = 0; + if (draw->color_rb[0]) { + attachments[i++] = __DRI_BUFFER_FRONT_LEFT; + attachments[i++] = radeon_bits_per_pixel(draw->color_rb[0]); + } + + if (draw->color_rb[1]) { + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + attachments[i++] = radeon_bits_per_pixel(draw->color_rb[1]); + } + + depth_rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH); + stencil_rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL); + + if ((depth_rb != NULL) && (stencil_rb != NULL)) { + attachments[i++] = __DRI_BUFFER_DEPTH_STENCIL; + attachments[i++] = radeon_bits_per_pixel(depth_rb); + } else if (depth_rb != NULL) { + attachments[i++] = __DRI_BUFFER_DEPTH; + attachments[i++] = radeon_bits_per_pixel(depth_rb); + } else if (stencil_rb != NULL) { + attachments[i++] = __DRI_BUFFER_STENCIL; + attachments[i++] = radeon_bits_per_pixel(stencil_rb); + } + + buffers = (*screen->dri2.loader->getBuffersWithFormat)(drawable, + &drawable->w, + &drawable->h, + attachments, i / 2, + &count, + drawable->loaderPrivate); + } else if (screen->dri2.loader) { + i = 0; + if (draw->color_rb[0]) + attachments[i++] = __DRI_BUFFER_FRONT_LEFT; + if (draw->color_rb[1]) + attachments[i++] = __DRI_BUFFER_BACK_LEFT; + if (radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH)) + attachments[i++] = __DRI_BUFFER_DEPTH; + if (radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL)) + attachments[i++] = __DRI_BUFFER_STENCIL; + + buffers = (*screen->dri2.loader->getBuffers)(drawable, + &drawable->w, + &drawable->h, + attachments, i, + &count, + drawable->loaderPrivate); + } + + if (buffers == NULL) + return; + + /* set one cliprect to cover the whole drawable */ + drawable->x = 0; + drawable->y = 0; + drawable->backX = 0; + drawable->backY = 0; + drawable->numClipRects = 1; + drawable->pClipRects[0].x1 = 0; + drawable->pClipRects[0].y1 = 0; + drawable->pClipRects[0].x2 = drawable->w; + drawable->pClipRects[0].y2 = drawable->h; + drawable->numBackClipRects = 1; + drawable->pBackClipRects[0].x1 = 0; + drawable->pBackClipRects[0].y1 = 0; + drawable->pBackClipRects[0].x2 = drawable->w; + drawable->pBackClipRects[0].y2 = drawable->h; + for (i = 0; i < count; i++) { + switch (buffers[i].attachment) { + case __DRI_BUFFER_FRONT_LEFT: + rb = draw->color_rb[0]; + regname = "dri2 front buffer"; + break; + case __DRI_BUFFER_FAKE_FRONT_LEFT: + rb = draw->color_rb[0]; + regname = "dri2 fake front buffer"; + break; + case __DRI_BUFFER_BACK_LEFT: + rb = draw->color_rb[1]; + regname = "dri2 back buffer"; + break; + case __DRI_BUFFER_DEPTH: + rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH); + regname = "dri2 depth buffer"; + break; + case __DRI_BUFFER_DEPTH_STENCIL: + rb = radeon_get_renderbuffer(&draw->base, BUFFER_DEPTH); + regname = "dri2 depth / stencil buffer"; + break; + case __DRI_BUFFER_STENCIL: + rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL); + regname = "dri2 stencil buffer"; + break; + case __DRI_BUFFER_ACCUM: + default: + fprintf(stderr, + "unhandled buffer attach event, attacment type %d\n", + buffers[i].attachment); + return; + } + + if (rb == NULL) + continue; + + if (rb->bo) { + uint32_t name = radeon_gem_name_bo(rb->bo); + if (name == buffers[i].name) + continue; + } + + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, + "attaching buffer %s, %d, at %d, cpp %d, pitch %d\n", + regname, buffers[i].name, buffers[i].attachment, + buffers[i].cpp, buffers[i].pitch); + + rb->cpp = buffers[i].cpp; + rb->pitch = buffers[i].pitch; + rb->width = drawable->w; + rb->height = drawable->h; + rb->has_surface = 0; + + if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_bo) { + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "(reusing depth buffer as stencil)\n"); + bo = depth_bo; + radeon_bo_ref(bo); + } else { + bo = radeon_bo_open(radeon->radeonScreen->bom, + buffers[i].name, + 0, + 0, + RADEON_GEM_DOMAIN_VRAM, + buffers[i].flags); + if (bo == NULL) { + + fprintf(stderr, "failed to attach %s %d\n", + regname, buffers[i].name); + + } + } + + if (buffers[i].attachment == __DRI_BUFFER_DEPTH) { + if (draw->base.Visual.depthBits == 16) + rb->cpp = 2; + depth_bo = bo; + } + + radeon_renderbuffer_set_bo(rb, bo); + radeon_bo_unref(bo); + + if (buffers[i].attachment == __DRI_BUFFER_DEPTH_STENCIL) { + rb = radeon_get_renderbuffer(&draw->base, BUFFER_STENCIL); + if (rb != NULL) { + struct radeon_bo *stencil_bo = NULL; + + if (rb->bo) { + uint32_t name = radeon_gem_name_bo(rb->bo); + if (name == buffers[i].name) + continue; + } + + stencil_bo = bo; + radeon_bo_ref(stencil_bo); + radeon_renderbuffer_set_bo(rb, stencil_bo); + radeon_bo_unref(stencil_bo); + } + } + } + + driUpdateFramebufferSize(radeon->glCtx, drawable); +} + +/* Force the context `c' to be the current context and associate with it + * buffer `b'. + */ +GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, + __DRIdrawablePrivate * driDrawPriv, + __DRIdrawablePrivate * driReadPriv) +{ + radeonContextPtr radeon; + struct radeon_framebuffer *drfb; + struct gl_framebuffer *readfb; + + if (!driContextPriv) { + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s ctx is null\n", __FUNCTION__); + _mesa_make_current(NULL, NULL, NULL); + return GL_TRUE; + } + + radeon = (radeonContextPtr) driContextPriv->driverPrivate; + drfb = driDrawPriv->driverPrivate; + readfb = driReadPriv->driverPrivate; + + if (driContextPriv->driScreenPriv->dri2.enabled) { + radeon_update_renderbuffers(driContextPriv, driDrawPriv); + if (driDrawPriv != driReadPriv) + radeon_update_renderbuffers(driContextPriv, driReadPriv); + _mesa_reference_renderbuffer(&radeon->state.color.rb, + &(radeon_get_renderbuffer(&drfb->base, BUFFER_BACK_LEFT)->base)); + _mesa_reference_renderbuffer(&radeon->state.depth.rb, + &(radeon_get_renderbuffer(&drfb->base, BUFFER_DEPTH)->base)); + } else { + radeon_make_renderbuffer_current(radeon, drfb); + } + + + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "%s ctx %p dfb %p rfb %p\n", __FUNCTION__, radeon->glCtx, drfb, readfb); + + driUpdateFramebufferSize(radeon->glCtx, driDrawPriv); + if (driReadPriv != driDrawPriv) + driUpdateFramebufferSize(radeon->glCtx, driReadPriv); + + _mesa_make_current(radeon->glCtx, &drfb->base, readfb); + + _mesa_update_state(radeon->glCtx); + + if (radeon->glCtx->DrawBuffer == &drfb->base) { + if (driDrawPriv->swap_interval == (unsigned)-1) { + int i; + driDrawPriv->vblFlags = + (radeon->radeonScreen->irq != 0) + ? driGetDefaultVBlankFlags(&radeon-> + optionCache) + : VBLANK_FLAG_NO_IRQ; + + driDrawableInitVBlank(driDrawPriv); + drfb->vbl_waited = driDrawPriv->vblSeq; + + for (i = 0; i < 2; i++) { + if (drfb->color_rb[i]) + drfb->color_rb[i]->vbl_pending = driDrawPriv->vblSeq; + } + + } + + radeon_window_moved(radeon); + radeon_draw_buffer(radeon->glCtx, &drfb->base); + } + + + if (RADEON_DEBUG & DEBUG_DRI) + fprintf(stderr, "End %s\n", __FUNCTION__); + return GL_TRUE; +} + diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h new file mode 100644 index 0000000000..061168fe96 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -0,0 +1,581 @@ + +#ifndef COMMON_CONTEXT_H +#define COMMON_CONTEXT_H + +#include "main/mm.h" +#include "math/m_vector.h" +#include "texmem.h" +#include "tnl/t_context.h" +#include "main/colormac.h" + +#include "radeon_screen.h" +#include "radeon_drm.h" +#include "dri_util.h" +#include "tnl/t_vertex.h" + +struct radeon_context; + +#include "radeon_bocs_wrapper.h" + +/* This union is used to avoid warnings/miscompilation + with float to uint32_t casts due to strict-aliasing */ +typedef union { GLfloat f; uint32_t ui32; } float_ui32_type; + +struct radeon_context; +typedef struct radeon_context radeonContextRec; +typedef struct radeon_context *radeonContextPtr; + + +#define TEX_0 0x1 +#define TEX_1 0x2 +#define TEX_2 0x4 +#define TEX_3 0x8 +#define TEX_4 0x10 +#define TEX_5 0x20 + +/* Rasterizing fallbacks */ +/* See correponding strings in r200_swtcl.c */ +#define RADEON_FALLBACK_TEXTURE 0x0001 +#define RADEON_FALLBACK_DRAW_BUFFER 0x0002 +#define RADEON_FALLBACK_STENCIL 0x0004 +#define RADEON_FALLBACK_RENDER_MODE 0x0008 +#define RADEON_FALLBACK_BLEND_EQ 0x0010 +#define RADEON_FALLBACK_BLEND_FUNC 0x0020 +#define RADEON_FALLBACK_DISABLE 0x0040 +#define RADEON_FALLBACK_BORDER_MODE 0x0080 +#define RADEON_FALLBACK_DEPTH_BUFFER 0x0100 +#define RADEON_FALLBACK_STENCIL_BUFFER 0x0200 + +#define R200_FALLBACK_TEXTURE 0x01 +#define R200_FALLBACK_DRAW_BUFFER 0x02 +#define R200_FALLBACK_STENCIL 0x04 +#define R200_FALLBACK_RENDER_MODE 0x08 +#define R200_FALLBACK_DISABLE 0x10 +#define R200_FALLBACK_BORDER_MODE 0x20 + +#define RADEON_TCL_FALLBACK_RASTER 0x1 /* rasterization */ +#define RADEON_TCL_FALLBACK_UNFILLED 0x2 /* unfilled tris */ +#define RADEON_TCL_FALLBACK_LIGHT_TWOSIDE 0x4 /* twoside tris */ +#define RADEON_TCL_FALLBACK_MATERIAL 0x8 /* material in vb */ +#define RADEON_TCL_FALLBACK_TEXGEN_0 0x10 /* texgen, unit 0 */ +#define RADEON_TCL_FALLBACK_TEXGEN_1 0x20 /* texgen, unit 1 */ +#define RADEON_TCL_FALLBACK_TEXGEN_2 0x40 /* texgen, unit 2 */ +#define RADEON_TCL_FALLBACK_TCL_DISABLE 0x80 /* user disable */ +#define RADEON_TCL_FALLBACK_FOGCOORDSPEC 0x100 /* fogcoord, sep. spec light */ + +/* The blit width for texture uploads + */ +#define BLIT_WIDTH_BYTES 1024 + +/* Use the templated vertex format: + */ +#define COLOR_IS_RGBA +#define TAG(x) radeon##x +#include "tnl_dd/t_dd_vertex.h" +#undef TAG + +#define RADEON_RB_CLASS 0xdeadbeef + +struct radeon_renderbuffer +{ + struct gl_renderbuffer base; + struct radeon_bo *bo; + unsigned int cpp; + /* unsigned int offset; */ + unsigned int pitch; + unsigned int width; + unsigned int height; + + uint32_t draw_offset; /* FBO */ + /* boo Xorg 6.8.2 compat */ + int has_surface; + + GLuint pf_pending; /**< sequence number of pending flip */ + GLuint vbl_pending; /**< vblank sequence number of pending flip */ + __DRIdrawablePrivate *dPriv; +}; + +struct radeon_framebuffer +{ + struct gl_framebuffer base; + + struct radeon_renderbuffer *color_rb[2]; + + GLuint vbl_waited; + + /* buffer swap */ + int64_t swap_ust; + int64_t swap_missed_ust; + + GLuint swap_count; + GLuint swap_missed_count; + + /* Drawable page flipping state */ + GLboolean pf_active; + GLint pf_current_page; + GLint pf_num_pages; + +}; + + +struct radeon_colorbuffer_state { + GLuint clear; + int roundEnable; + struct gl_renderbuffer *rb; + uint32_t draw_offset; /* offset into color renderbuffer - FBOs */ +}; + +struct radeon_depthbuffer_state { + GLuint clear; + struct gl_renderbuffer *rb; +}; + +struct radeon_scissor_state { + drm_clip_rect_t rect; + GLboolean enabled; + + GLuint numClipRects; /* Cliprects active */ + GLuint numAllocedClipRects; /* Cliprects available */ + drm_clip_rect_t *pClipRects; +}; + +struct radeon_stencilbuffer_state { + GLuint clear; /* rb3d_stencilrefmask value */ +}; + +struct radeon_stipple_state { + GLuint mask[32]; +}; + +struct radeon_state_atom { + struct radeon_state_atom *next, *prev; + const char *name; /* for debug */ + int cmd_size; /* size in bytes */ + GLuint idx; + GLuint is_tcl; + GLuint *cmd; /* one or more cmd's */ + GLuint *lastcmd; /* one or more cmd's */ + GLboolean dirty; /* dirty-mark in emit_state_list */ + int (*check) (GLcontext *, struct radeon_state_atom *atom); /* is this state active? */ + void (*emit) (GLcontext *, struct radeon_state_atom *atom); +}; + +struct radeon_hw_state { + /* Head of the linked list of state atoms. */ + struct radeon_state_atom atomlist; + int max_state_size; /* Number of bytes necessary for a full state emit. */ + GLboolean is_dirty, all_dirty; +}; + + +/* Texture related */ +typedef struct _radeon_texture_image radeon_texture_image; + +struct _radeon_texture_image { + struct gl_texture_image base; + + /** + * If mt != 0, the image is stored in hardware format in the + * given mipmap tree. In this case, base.Data may point into the + * mapping of the buffer object that contains the mipmap tree. + * + * If mt == 0, the image is stored in normal memory pointed to + * by base.Data. + */ + struct _radeon_mipmap_tree *mt; + struct radeon_bo *bo; + + int mtlevel; /** if mt != 0, this is the image's level in the mipmap tree */ + int mtface; /** if mt != 0, this is the image's face in the mipmap tree */ +}; + + +static INLINE radeon_texture_image *get_radeon_texture_image(struct gl_texture_image *image) +{ + return (radeon_texture_image*)image; +} + + +typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr; + +#define RADEON_TXO_MICRO_TILE (1 << 3) + +/* Texture object in locally shared texture space. + */ +struct radeon_tex_obj { + struct gl_texture_object base; + struct _radeon_mipmap_tree *mt; + + /** + * This is true if we've verified that the mipmap tree above is complete + * and so on. + */ + GLboolean validated; + + GLuint override_offset; + GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ + GLuint tile_bits; /* hw texture tile bits used on this texture */ + struct radeon_bo *bo; + + GLuint pp_txfilter; /* hardware register values */ + GLuint pp_txformat; + GLuint pp_txformat_x; + GLuint pp_txsize; /* npot only */ + GLuint pp_txpitch; /* npot only */ + GLuint pp_border_color; + GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ + + GLuint pp_txfilter_1; /* r300 */ + + GLboolean border_fallback; + + +}; + +static INLINE radeonTexObj* radeon_tex_obj(struct gl_texture_object *texObj) +{ + return (radeonTexObj*)texObj; +} + +/* Need refcounting on dma buffers: + */ +struct radeon_dma_buffer { + int refcount; /* the number of retained regions in buf */ + drmBufPtr buf; +}; + +struct radeon_aos { + struct radeon_bo *bo; /** Buffer object where vertex data is stored */ + int offset; /** Offset into buffer object, in bytes */ + int components; /** Number of components per vertex */ + int stride; /** Stride in dwords (may be 0 for repeating) */ + int count; /** Number of vertices */ +}; + +struct radeon_dma { + /* Active dma region. Allocations for vertices and retained + * regions come from here. Also used for emitting random vertices, + * these may be flushed by calling flush_current(); + */ + struct radeon_bo *current; /** Buffer that DMA memory is allocated from */ + int current_used; /** Number of bytes allocated and forgotten about */ + int current_vertexptr; /** End of active vertex region */ + + /** + * If current_vertexptr != current_used then flush must be non-zero. + * flush must be called before non-active vertex allocations can be + * performed. + */ + void (*flush) (GLcontext *); + + /* Number of "in-flight" DMA buffers, i.e. the number of buffers + * for which a DISCARD command is currently queued in the command buffer +. + */ + GLuint nr_released_bufs; +}; + +/* radeon_swtcl.c + */ +struct radeon_swtcl_info { + + GLuint RenderIndex; + GLuint vertex_size; + GLubyte *verts; + + /* Fallback rasterization functions + */ + GLuint hw_primitive; + GLenum render_primitive; + GLuint numverts; + + struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; + GLuint vertex_attr_count; + +}; + +#define RADEON_MAX_AOS_ARRAYS 16 +struct radeon_tcl_info { + struct radeon_aos aos[RADEON_MAX_AOS_ARRAYS]; + GLuint aos_count; + struct radeon_bo *elt_dma_bo; /** Buffer object that contains element indices */ + int elt_dma_offset; /** Offset into this buffer object, in bytes */ +}; + +struct radeon_ioctl { + GLuint vertex_offset; + struct radeon_bo *bo; + GLuint vertex_size; +}; + +#define RADEON_MAX_PRIMS 64 + +struct radeon_prim { + GLuint start; + GLuint end; + GLuint prim; +}; + +static INLINE GLuint radeonPackColor(GLuint cpp, + GLubyte r, GLubyte g, + GLubyte b, GLubyte a) +{ + switch (cpp) { + case 2: + return PACK_COLOR_565(r, g, b); + case 4: + return PACK_COLOR_8888(a, r, g, b); + default: + return 0; + } +} + +#define MAX_CMD_BUF_SZ (16*1024) + +#define MAX_DMA_BUF_SZ (64*1024) + +struct radeon_store { + GLuint statenr; + GLuint primnr; + char cmd_buf[MAX_CMD_BUF_SZ]; + int cmd_used; + int elts_start; +}; + +struct radeon_dri_mirror { + __DRIcontextPrivate *context; /* DRI context */ + __DRIscreenPrivate *screen; /* DRI screen */ + + drm_context_t hwContext; + drm_hw_lock_t *hwLock; + int fd; + int drmMinor; +}; + +#define DEBUG_TEXTURE 0x001 +#define DEBUG_STATE 0x002 +#define DEBUG_IOCTL 0x004 +#define DEBUG_PRIMS 0x008 +#define DEBUG_VERTS 0x010 +#define DEBUG_FALLBACKS 0x020 +#define DEBUG_VFMT 0x040 +#define DEBUG_CODEGEN 0x080 +#define DEBUG_VERBOSE 0x100 +#define DEBUG_DRI 0x200 +#define DEBUG_DMA 0x400 +#define DEBUG_SANITY 0x800 +#define DEBUG_SYNC 0x1000 +#define DEBUG_PIXEL 0x2000 +#define DEBUG_MEMORY 0x4000 + + +typedef void (*radeon_tri_func) (radeonContextPtr, + radeonVertex *, + radeonVertex *, radeonVertex *); + +typedef void (*radeon_line_func) (radeonContextPtr, + radeonVertex *, radeonVertex *); + +typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *); + +#define RADEON_MAX_BOS 32 +struct radeon_state { + struct radeon_colorbuffer_state color; + struct radeon_depthbuffer_state depth; + struct radeon_scissor_state scissor; + struct radeon_stencilbuffer_state stencil; + + struct radeon_cs_space_check bos[RADEON_MAX_BOS]; + int validated_bo_count; +}; + +/** + * This structure holds the command buffer while it is being constructed. + * + * The first batch of commands in the buffer is always the state that needs + * to be re-emitted when the context is lost. This batch can be skipped + * otherwise. + */ +struct radeon_cmdbuf { + struct radeon_cs_manager *csm; + struct radeon_cs *cs; + int size; /** # of dwords total */ + unsigned int flushing:1; /** whether we're currently in FlushCmdBufLocked */ +}; + +struct radeon_context { + GLcontext *glCtx; + radeonScreenPtr radeonScreen; /* Screen private DRI data */ + + /* Texture object bookkeeping + */ + int texture_depth; + float initialMaxAnisotropy; + uint32_t texture_row_align; + + struct radeon_dma dma; + struct radeon_hw_state hw; + /* Rasterization and vertex state: + */ + GLuint TclFallback; + GLuint Fallback; + GLuint NewGLState; + DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ + + /* Drawable, cliprect and scissor information */ + GLuint numClipRects; /* Cliprects for the draw buffer */ + drm_clip_rect_t *pClipRects; + unsigned int lastStamp; + GLboolean lost_context; + drm_radeon_sarea_t *sarea; /* Private SAREA data */ + + /* Mirrors of some DRI state */ + struct radeon_dri_mirror dri; + + /* Busy waiting */ + GLuint do_usleeps; + GLuint do_irqs; + GLuint irqsEmitted; + drm_radeon_irq_wait_t iw; + + /* Derived state - for r300 only */ + struct radeon_state state; + + struct radeon_swtcl_info swtcl; + struct radeon_tcl_info tcl; + /* Configuration cache + */ + driOptionCache optionCache; + + struct radeon_cmdbuf cmdbuf; + + drm_clip_rect_t fboRect; + GLboolean constant_cliprect; /* use for FBO or DRI2 rendering */ + GLboolean front_cliprects; + + /** + * Set if rendering has occured to the drawable's front buffer. + * + * This is used in the DRI2 case to detect that glFlush should also copy + * the contents of the fake front buffer to the real front buffer. + */ + GLboolean front_buffer_dirty; + + /** + * Track whether front-buffer rendering is currently enabled + * + * A separate flag is used to track this in order to support MRT more + * easily. + */ + GLboolean is_front_buffer_rendering; + + struct { + struct gl_fragment_program *bitmap_fp; + struct gl_vertex_program *passthrough_vp; + + struct gl_fragment_program *saved_fp; + GLboolean saved_fp_enable; + struct gl_vertex_program *saved_vp; + GLboolean saved_vp_enable; + + GLint saved_vp_x, saved_vp_y; + GLsizei saved_vp_width, saved_vp_height; + GLenum saved_matrix_mode; + } meta; + + struct { + void (*get_lock)(radeonContextPtr radeon); + void (*update_viewport_offset)(GLcontext *ctx); + void (*emit_cs_header)(struct radeon_cs *cs, radeonContextPtr rmesa); + void (*swtcl_flush)(GLcontext *ctx, uint32_t offset); + void (*pre_emit_atoms)(radeonContextPtr rmesa); + void (*pre_emit_state)(radeonContextPtr rmesa); + void (*fallback)(GLcontext *ctx, GLuint bit, GLboolean mode); + void (*free_context)(GLcontext *ctx); + } vtbl; +}; + +#define RADEON_CONTEXT(glctx) ((radeonContextPtr)(ctx->DriverCtx)) + +static inline __DRIdrawablePrivate* radeon_get_drawable(radeonContextPtr radeon) +{ + return radeon->dri.context->driDrawablePriv; +} + +static inline __DRIdrawablePrivate* radeon_get_readable(radeonContextPtr radeon) +{ + return radeon->dri.context->driReadablePriv; +} + + +/** + * This function takes a float and packs it into a uint32_t + */ +static INLINE uint32_t radeonPackFloat32(float fl) +{ + union { + float fl; + uint32_t u; + } u; + + u.fl = fl; + return u.u; +} + +/* This is probably wrong for some values, I need to test this + * some more. Range checking would be a good idea also.. + * + * But it works for most things. I'll fix it later if someone + * else with a better clue doesn't + */ +static INLINE uint32_t radeonPackFloat24(float f) +{ + float mantissa; + int exponent; + uint32_t float24 = 0; + + if (f == 0.0) + return 0; + + mantissa = frexpf(f, &exponent); + + /* Handle -ve */ + if (mantissa < 0) { + float24 |= (1 << 23); + mantissa = mantissa * -1.0; + } + /* Handle exponent, bias of 63 */ + exponent += 62; + float24 |= (exponent << 16); + /* Kill 7 LSB of mantissa */ + float24 |= (radeonPackFloat32(mantissa) & 0x7FFFFF) >> 7; + + return float24; +} + +GLboolean radeonInitContext(radeonContextPtr radeon, + struct dd_function_table* functions, + const __GLcontextModes * glVisual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate); + +void radeonCleanupContext(radeonContextPtr radeon); +GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv); +void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable); +GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, + __DRIdrawablePrivate * driDrawPriv, + __DRIdrawablePrivate * driReadPriv); +extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv); + +/* ================================================================ + * Debugging: + */ +#define DO_DEBUG 1 + +#if DO_DEBUG +extern int RADEON_DEBUG; +#else +#define RADEON_DEBUG 0 +#endif + +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_compat.c b/src/mesa/drivers/dri/radeon/radeon_compat.c deleted file mode 100644 index 46b490d61f..0000000000 --- a/src/mesa/drivers/dri/radeon/radeon_compat.c +++ /dev/null @@ -1,301 +0,0 @@ -/************************************************************************** - -Copyright 2002 ATI Technologies Inc., Ontario, Canada, and - Tungsten Graphics Inc., Austin, Texas. - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -on the rights to use, copy, modify, merge, publish, distribute, sub -license, and/or sell copies of the Software, and to permit persons to whom -the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next -paragraph) shall be included in all copies or substantial portions of the -Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -ATI, TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Keith Whitwell <keith@tungstengraphics.com> - * - */ - -#include "main/glheader.h" -#include "main/imports.h" - -#include "radeon_context.h" -#include "radeon_state.h" -#include "radeon_ioctl.h" - - -static struct { - int start; - int len; - const char *name; -} packet[RADEON_MAX_STATE_PACKETS] = { - { RADEON_PP_MISC,7,"RADEON_PP_MISC" }, - { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" }, - { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" }, - { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" }, - { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" }, - { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" }, - { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" }, - { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" }, - { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" }, - { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" }, - { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" }, - { RADEON_RE_MISC,1,"RADEON_RE_MISC" }, - { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" }, - { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" }, - { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" }, - { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" }, - { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" }, - { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" }, - { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" }, - { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" }, - { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" }, -}; - - -static void radeonCompatEmitPacket( radeonContextPtr rmesa, - struct radeon_state_atom *state ) -{ - drm_radeon_sarea_t *sarea = rmesa->sarea; - drm_radeon_context_regs_t *ctx = &sarea->context_state; - drm_radeon_texture_regs_t *tex0 = &sarea->tex_state[0]; - drm_radeon_texture_regs_t *tex1 = &sarea->tex_state[1]; - int i; - int *buf = state->cmd; - - for ( i = 0 ; i < state->cmd_size ; ) { - drm_radeon_cmd_header_t *header = (drm_radeon_cmd_header_t *)&buf[i++]; - - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "%s %d: %s\n", __FUNCTION__, header->packet.packet_id, - packet[(int)header->packet.packet_id].name); - - switch (header->packet.packet_id) { - case RADEON_EMIT_PP_MISC: - ctx->pp_misc = buf[i++]; - ctx->pp_fog_color = buf[i++]; - ctx->re_solid_color = buf[i++]; - ctx->rb3d_blendcntl = buf[i++]; - ctx->rb3d_depthoffset = buf[i++]; - ctx->rb3d_depthpitch = buf[i++]; - ctx->rb3d_zstencilcntl = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_CONTEXT; - break; - case RADEON_EMIT_PP_CNTL: - ctx->pp_cntl = buf[i++]; - ctx->rb3d_cntl = buf[i++]; - ctx->rb3d_coloroffset = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_CONTEXT; - break; - case RADEON_EMIT_RB3D_COLORPITCH: - ctx->rb3d_colorpitch = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_CONTEXT; - break; - case RADEON_EMIT_RE_LINE_PATTERN: - ctx->re_line_pattern = buf[i++]; - ctx->re_line_state = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_LINE; - break; - case RADEON_EMIT_SE_LINE_WIDTH: - ctx->se_line_width = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_LINE; - break; - case RADEON_EMIT_PP_LUM_MATRIX: - ctx->pp_lum_matrix = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_BUMPMAP; - break; - case RADEON_EMIT_PP_ROT_MATRIX_0: - ctx->pp_rot_matrix_0 = buf[i++]; - ctx->pp_rot_matrix_1 = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_BUMPMAP; - break; - case RADEON_EMIT_RB3D_STENCILREFMASK: - ctx->rb3d_stencilrefmask = buf[i++]; - ctx->rb3d_ropcntl = buf[i++]; - ctx->rb3d_planemask = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_MASKS; - break; - case RADEON_EMIT_SE_VPORT_XSCALE: - ctx->se_vport_xscale = buf[i++]; - ctx->se_vport_xoffset = buf[i++]; - ctx->se_vport_yscale = buf[i++]; - ctx->se_vport_yoffset = buf[i++]; - ctx->se_vport_zscale = buf[i++]; - ctx->se_vport_zoffset = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_VIEWPORT; - break; - case RADEON_EMIT_SE_CNTL: - ctx->se_cntl = buf[i++]; - ctx->se_coord_fmt = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_CONTEXT | RADEON_UPLOAD_VERTFMT; - break; - case RADEON_EMIT_SE_CNTL_STATUS: - ctx->se_cntl_status = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_SETUP; - break; - case RADEON_EMIT_RE_MISC: - ctx->re_misc = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_MISC; - break; - case RADEON_EMIT_PP_TXFILTER_0: - tex0->pp_txfilter = buf[i++]; - tex0->pp_txformat = buf[i++]; - tex0->pp_txoffset = buf[i++]; - tex0->pp_txcblend = buf[i++]; - tex0->pp_txablend = buf[i++]; - tex0->pp_tfactor = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_TEX0; - break; - case RADEON_EMIT_PP_BORDER_COLOR_0: - tex0->pp_border_color = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_TEX0; - break; - case RADEON_EMIT_PP_TXFILTER_1: - tex1->pp_txfilter = buf[i++]; - tex1->pp_txformat = buf[i++]; - tex1->pp_txoffset = buf[i++]; - tex1->pp_txcblend = buf[i++]; - tex1->pp_txablend = buf[i++]; - tex1->pp_tfactor = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_TEX1; - break; - case RADEON_EMIT_PP_BORDER_COLOR_1: - tex1->pp_border_color = buf[i++]; - sarea->dirty |= RADEON_UPLOAD_TEX1; - break; - - case RADEON_EMIT_SE_ZBIAS_FACTOR: - i++; - i++; - break; - - case RADEON_EMIT_PP_TXFILTER_2: - case RADEON_EMIT_PP_BORDER_COLOR_2: - case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT: - case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED: - default: - /* These states aren't understood by radeon drm 1.1 */ - fprintf(stderr, "Tried to emit unsupported state\n"); - return; - } - } -} - - - -static void radeonCompatEmitStateLocked( radeonContextPtr rmesa ) -{ - struct radeon_state_atom *atom; - - if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS)) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty) - return; - - foreach(atom, &rmesa->hw.atomlist) { - if (rmesa->hw.all_dirty) - atom->dirty = GL_TRUE; - if (atom->is_tcl) - atom->dirty = GL_FALSE; - if (atom->dirty) - radeonCompatEmitPacket(rmesa, atom); - } - - rmesa->hw.is_dirty = GL_FALSE; - rmesa->hw.all_dirty = GL_FALSE; -} - - -static void radeonCompatEmitPrimitiveLocked( radeonContextPtr rmesa, - GLuint hw_primitive, - GLuint nverts, - drm_clip_rect_t *pbox, - GLuint nbox ) -{ - int i; - - for ( i = 0 ; i < nbox ; ) { - int nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, nbox ); - drm_clip_rect_t *b = rmesa->sarea->boxes; - drm_radeon_vertex_t vtx; - - rmesa->sarea->dirty |= RADEON_UPLOAD_CLIPRECTS; - rmesa->sarea->nbox = nr - i; - - for ( ; i < nr ; i++) - *b++ = pbox[i]; - - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, - "RadeonFlushVertexBuffer: prim %x buf %d verts %d " - "disc %d nbox %d\n", - hw_primitive, - rmesa->dma.current.buf->buf->idx, - nverts, - nr == nbox, - rmesa->sarea->nbox ); - - vtx.prim = hw_primitive; - vtx.idx = rmesa->dma.current.buf->buf->idx; - vtx.count = nverts; - vtx.discard = (nr == nbox); - - drmCommandWrite( rmesa->dri.fd, - DRM_RADEON_VERTEX, - &vtx, sizeof(vtx)); - } -} - - - -/* No 'start' for 1.1 vertices ioctl: only one vertex prim/buffer! - */ -void radeonCompatEmitPrimitive( radeonContextPtr rmesa, - GLuint vertex_format, - GLuint hw_primitive, - GLuint nrverts ) -{ - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); - - LOCK_HARDWARE( rmesa ); - - radeonCompatEmitStateLocked( rmesa ); - rmesa->sarea->vc_format = vertex_format; - - if (rmesa->state.scissor.enabled) { - radeonCompatEmitPrimitiveLocked( rmesa, - hw_primitive, - nrverts, - rmesa->state.scissor.pClipRects, - rmesa->state.scissor.numClipRects ); - } - else { - radeonCompatEmitPrimitiveLocked( rmesa, - hw_primitive, - nrverts, - rmesa->pClipRects, - rmesa->numClipRects ); - } - - - UNLOCK_HARDWARE( rmesa ); -} - diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index b67bda7d06..46cba73e29 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -53,6 +53,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drivers/common/driverfuncs.h" +#include "radeon_common.h" #include "radeon_context.h" #include "radeon_ioctl.h" #include "radeon_state.h" @@ -65,6 +66,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define need_GL_EXT_blend_minmax #define need_GL_EXT_fog_coord #define need_GL_EXT_secondary_color +#define need_GL_EXT_framebuffer_object #include "extension_helper.h" #define DRIVER_DATE "20061018" @@ -72,40 +74,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "vblank.h" #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ -#ifndef RADEON_DEBUG -int RADEON_DEBUG = (0); -#endif - - -/* Return various strings for glGetString(). - */ -static const GLubyte *radeonGetString( GLcontext *ctx, GLenum name ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - static char buffer[128]; - unsigned offset; - GLuint agp_mode = (rmesa->radeonScreen->card_type==RADEON_CARD_PCI) ? 0 : - rmesa->radeonScreen->AGPMode; - - switch ( name ) { - case GL_VENDOR: - return (GLubyte *)"Tungsten Graphics, Inc."; - - case GL_RENDERER: - offset = driGetRendererString( buffer, "Radeon", DRIVER_DATE, - agp_mode ); - - sprintf( & buffer[ offset ], " %sTCL", - !(rmesa->TclFallback & RADEON_TCL_FALLBACK_TCL_DISABLE) - ? "" : "NO-" ); - - return (GLubyte *)buffer; - - default: - return NULL; - } -} - /* Extension strings exported by the R100 driver. */ @@ -121,6 +89,7 @@ const struct dri_extension card_extensions[] = { "GL_EXT_blend_logic_op", NULL }, { "GL_EXT_blend_subtract", GL_EXT_blend_minmax_functions }, { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + { "GL_EXT_packed_depth_stencil", NULL}, { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, { "GL_EXT_stencil_wrap", NULL }, { "GL_EXT_texture_edge_clamp", NULL }, @@ -137,6 +106,11 @@ const struct dri_extension card_extensions[] = { NULL, NULL } }; +const struct dri_extension mm_extensions[] = { + { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, + { NULL, NULL } +}; + extern const struct tnl_pipeline_stage _radeon_render_stage; extern const struct tnl_pipeline_stage _radeon_tcl_stage; @@ -160,15 +134,6 @@ static const struct tnl_pipeline_stage *radeon_pipeline[] = { NULL, }; - - -/* Initialize the driver's misc functions. - */ -static void radeonInitDriverFuncs( struct dd_function_table *functions ) -{ - functions->GetString = radeonGetString; -} - static const struct dri_debug_control debug_control[] = { { "fall", DEBUG_FALLBACKS }, @@ -188,19 +153,69 @@ static const struct dri_debug_control debug_control[] = { NULL, 0 } }; +static void r100_get_lock(radeonContextPtr radeon) +{ + r100ContextPtr rmesa = (r100ContextPtr)radeon; + drm_radeon_sarea_t *sarea = radeon->sarea; + + RADEON_STATECHANGE(rmesa, ctx); + if (rmesa->radeon.sarea->tiling_enabled) { + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= + RADEON_COLOR_TILE_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= + ~RADEON_COLOR_TILE_ENABLE; + } + + if (sarea->ctx_owner != rmesa->radeon.dri.hwContext) { + sarea->ctx_owner = rmesa->radeon.dri.hwContext; + + if (!radeon->radeonScreen->kernel_mm) + radeon_bo_legacy_texture_age(radeon->radeonScreen->bom); + } +} + +static void r100_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) +{ +} + +static void r100_vtbl_pre_emit_state(radeonContextPtr radeon) +{ + r100ContextPtr rmesa = (r100ContextPtr)radeon; + + /* r100 always needs to emit ZBS to avoid TCL lockups */ + rmesa->hw.zbs.dirty = 1; + radeon->hw.is_dirty = 1; +} + +static void r100_vtbl_free_context(GLcontext *ctx) +{ + r100ContextPtr rmesa = R100_CONTEXT(ctx); + _mesa_vector4f_free( &rmesa->tcl.ObjClean ); +} + +static void r100_init_vtbl(radeonContextPtr radeon) +{ + radeon->vtbl.get_lock = r100_get_lock; + radeon->vtbl.update_viewport_offset = radeonUpdateViewportOffset; + radeon->vtbl.emit_cs_header = r100_vtbl_emit_cs_header; + radeon->vtbl.swtcl_flush = r100_swtcl_flush; + radeon->vtbl.pre_emit_state = r100_vtbl_pre_emit_state; + radeon->vtbl.fallback = radeonFallback; +} /* Create the device specific context. */ GLboolean -radeonCreateContext( const __GLcontextModes *glVisual, +r100CreateContext( const __GLcontextModes *glVisual, __DRIcontextPrivate *driContextPriv, void *sharedContextPrivate) { __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; radeonScreenPtr screen = (radeonScreenPtr)(sPriv->private); struct dd_function_table functions; - radeonContextPtr rmesa; - GLcontext *ctx, *shareCtx; + r100ContextPtr rmesa; + GLcontext *ctx; int i; int tcl_mode, fthrottle_mode; @@ -209,10 +224,12 @@ radeonCreateContext( const __GLcontextModes *glVisual, assert(screen); /* Allocate the Radeon context */ - rmesa = (radeonContextPtr) CALLOC( sizeof(*rmesa) ); + rmesa = (r100ContextPtr) CALLOC( sizeof(*rmesa) ); if ( !rmesa ) return GL_FALSE; + r100_init_vtbl(&rmesa->radeon); + /* init exp fog table data */ radeonInitStaticFogData(); @@ -220,12 +237,12 @@ radeonCreateContext( const __GLcontextModes *glVisual, * Do this here so that initialMaxAnisotropy is set before we create * the default textures. */ - driParseConfigFiles (&rmesa->optionCache, &screen->optionCache, + driParseConfigFiles (&rmesa->radeon.optionCache, &screen->optionCache, screen->driScreen->myNum, "radeon"); - rmesa->initialMaxAnisotropy = driQueryOptionf(&rmesa->optionCache, + rmesa->radeon.initialMaxAnisotropy = driQueryOptionf(&rmesa->radeon.optionCache, "def_max_anisotropy"); - if ( driQueryOptionb( &rmesa->optionCache, "hyperz" ) ) { + if ( driQueryOptionb( &rmesa->radeon.optionCache, "hyperz" ) ) { if ( sPriv->drm_version.minor < 13 ) fprintf( stderr, "DRM version 1.%d too old to support HyperZ, " "disabling.\n", sPriv->drm_version.minor ); @@ -240,65 +257,17 @@ radeonCreateContext( const __GLcontextModes *glVisual, * (the texture functions are especially important) */ _mesa_init_driver_functions( &functions ); - radeonInitDriverFuncs( &functions ); radeonInitTextureFuncs( &functions ); - /* Allocate the Mesa context */ - if (sharedContextPrivate) - shareCtx = ((radeonContextPtr) sharedContextPrivate)->glCtx; - else - shareCtx = NULL; - rmesa->glCtx = _mesa_create_context(glVisual, shareCtx, - &functions, (void *) rmesa); - if (!rmesa->glCtx) { - FREE(rmesa); - return GL_FALSE; - } - driContextPriv->driverPrivate = rmesa; - - /* Init radeon context data */ - rmesa->dri.context = driContextPriv; - rmesa->dri.screen = sPriv; - rmesa->dri.drawable = NULL; - rmesa->dri.readable = NULL; - rmesa->dri.hwContext = driContextPriv->hHWContext; - rmesa->dri.hwLock = &sPriv->pSAREA->lock; - rmesa->dri.fd = sPriv->fd; - rmesa->dri.drmMinor = sPriv->drm_version.minor; - - rmesa->radeonScreen = screen; - rmesa->sarea = (drm_radeon_sarea_t *)((GLubyte *)sPriv->pSAREA + - screen->sarea_priv_offset); - - - rmesa->dma.buf0_address = rmesa->radeonScreen->buffers->list[0].address; - - (void) memset( rmesa->texture_heaps, 0, sizeof( rmesa->texture_heaps ) ); - make_empty_list( & rmesa->swapped ); - - rmesa->nr_heaps = screen->numTexHeaps; - for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { - rmesa->texture_heaps[i] = driCreateTextureHeap( i, rmesa, - screen->texSize[i], - 12, - RADEON_NR_TEX_REGIONS, - (drmTextureRegionPtr)rmesa->sarea->tex_list[i], - & rmesa->sarea->tex_age[i], - & rmesa->swapped, - sizeof( radeonTexObj ), - (destroy_texture_object_t *) radeonDestroyTexObj ); - - driSetTextureSwapCounterLocation( rmesa->texture_heaps[i], - & rmesa->c_textureSwaps ); + if (!radeonInitContext(&rmesa->radeon, &functions, + glVisual, driContextPriv, + sharedContextPrivate)) { + FREE(rmesa); + return GL_FALSE; } - rmesa->texture_depth = driQueryOptioni (&rmesa->optionCache, - "texture_depth"); - if (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FB) - rmesa->texture_depth = ( screen->cpp == 4 ) ? - DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16; - rmesa->swtcl.RenderIndex = ~0; - rmesa->hw.all_dirty = GL_TRUE; + rmesa->radeon.swtcl.RenderIndex = ~0; + rmesa->radeon.hw.all_dirty = GL_TRUE; /* Set the maximum texture size small enough that we can guarentee that * all texture units can bind a maximal texture and have all of them in @@ -306,26 +275,20 @@ radeonCreateContext( const __GLcontextModes *glVisual, * setting allow larger textures. */ - ctx = rmesa->glCtx; - ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->optionCache, + ctx = rmesa->radeon.glCtx; + ctx->Const.MaxTextureUnits = driQueryOptioni (&rmesa->radeon.optionCache, "texture_units"); ctx->Const.MaxTextureImageUnits = ctx->Const.MaxTextureUnits; ctx->Const.MaxTextureCoordUnits = ctx->Const.MaxTextureUnits; - i = driQueryOptioni( &rmesa->optionCache, "allow_large_textures"); - - driCalculateMaxTextureLevels( rmesa->texture_heaps, - rmesa->nr_heaps, - & ctx->Const, - 4, - 11, /* max 2D texture size is 2048x2048 */ - 8, /* 256^3 */ - 9, /* \todo: max cube texture size seems to be 512x512(x6) */ - 11, /* max rect texture size is 2048x2048. */ - 12, - GL_FALSE, - i ); + i = driQueryOptioni( &rmesa->radeon.optionCache, "allow_large_textures"); + /* FIXME: When no memory manager is available we should set this + * to some reasonable value based on texture memory pool size */ + ctx->Const.MaxTextureLevels = 12; + ctx->Const.Max3DTextureLevels = 9; + ctx->Const.MaxCubeTextureLevels = 12; + ctx->Const.MaxTextureRectSize = 2048; ctx->Const.MaxTextureMaxAnisotropy = 16.0; @@ -390,38 +353,39 @@ radeonCreateContext( const __GLcontextModes *glVisual, } driInitExtensions( ctx, card_extensions, GL_TRUE ); - if (rmesa->radeonScreen->drmSupportsCubeMapsR100) + if (rmesa->radeon.radeonScreen->kernel_mm) + driInitExtensions(ctx, mm_extensions, GL_FALSE); + if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100) _mesa_enable_extension( ctx, "GL_ARB_texture_cube_map" ); - if (rmesa->glCtx->Mesa_DXTn) { + if (rmesa->radeon.glCtx->Mesa_DXTn) { _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); _mesa_enable_extension( ctx, "GL_S3_s3tc" ); } - else if (driQueryOptionb (&rmesa->optionCache, "force_s3tc_enable")) { + else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) { _mesa_enable_extension( ctx, "GL_EXT_texture_compression_s3tc" ); } - if (rmesa->dri.drmMinor >= 9) + if (rmesa->radeon.dri.drmMinor >= 9) _mesa_enable_extension( ctx, "GL_NV_texture_rectangle"); /* XXX these should really go right after _mesa_init_driver_functions() */ + radeon_fbo_init(&rmesa->radeon); + radeonInitSpanFuncs( ctx ); radeonInitIoctlFuncs( ctx ); radeonInitStateFuncs( ctx ); - radeonInitSpanFuncs( ctx ); radeonInitState( rmesa ); radeonInitSwtcl( ctx ); _mesa_vector4f_alloc( &rmesa->tcl.ObjClean, 0, ctx->Const.MaxArrayLockSize, 32 ); - fthrottle_mode = driQueryOptioni(&rmesa->optionCache, "fthrottle_mode"); - rmesa->iw.irq_seq = -1; - rmesa->irqsEmitted = 0; - rmesa->do_irqs = (rmesa->radeonScreen->irq != 0 && - fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS); - - rmesa->do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); + fthrottle_mode = driQueryOptioni(&rmesa->radeon.optionCache, "fthrottle_mode"); + rmesa->radeon.iw.irq_seq = -1; + rmesa->radeon.irqsEmitted = 0; + rmesa->radeon.do_irqs = (rmesa->radeon.radeonScreen->irq != 0 && + fthrottle_mode == DRI_CONF_FTHROTTLE_IRQS); - (*sPriv->systemTime->getUST)( & rmesa->swap_ust ); + rmesa->radeon.do_usleeps = (fthrottle_mode == DRI_CONF_FTHROTTLE_USLEEPS); #if DO_DEBUG @@ -429,206 +393,21 @@ radeonCreateContext( const __GLcontextModes *glVisual, debug_control ); #endif - tcl_mode = driQueryOptioni(&rmesa->optionCache, "tcl_mode"); - if (driQueryOptionb(&rmesa->optionCache, "no_rast")) { + tcl_mode = driQueryOptioni(&rmesa->radeon.optionCache, "tcl_mode"); + if (driQueryOptionb(&rmesa->radeon.optionCache, "no_rast")) { fprintf(stderr, "disabling 3D acceleration\n"); FALLBACK(rmesa, RADEON_FALLBACK_DISABLE, 1); } else if (tcl_mode == DRI_CONF_TCL_SW || - !(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { - if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { - rmesa->radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL; + !(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { + if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + rmesa->radeon.radeonScreen->chip_flags &= ~RADEON_CHIPSET_TCL; fprintf(stderr, "Disabling HW TCL support\n"); } - TCL_FALLBACK(rmesa->glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1); + TCL_FALLBACK(rmesa->radeon.glCtx, RADEON_TCL_FALLBACK_TCL_DISABLE, 1); } - if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { /* _tnl_need_dlist_norm_lengths( ctx, GL_FALSE ); */ } return GL_TRUE; } - - -/* Destroy the device specific context. - */ -/* Destroy the Mesa and driver specific context data. - */ -void radeonDestroyContext( __DRIcontextPrivate *driContextPriv ) -{ - GET_CURRENT_CONTEXT(ctx); - radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate; - radeonContextPtr current = ctx ? RADEON_CONTEXT(ctx) : NULL; - - /* check if we're deleting the currently bound context */ - if (rmesa == current) { - RADEON_FIREVERTICES( rmesa ); - _mesa_make_current(NULL, NULL, NULL); - } - - /* Free radeon context resources */ - assert(rmesa); /* should never be null */ - if ( rmesa ) { - GLboolean release_texture_heaps; - - - release_texture_heaps = (rmesa->glCtx->Shared->RefCount == 1); - _swsetup_DestroyContext( rmesa->glCtx ); - _tnl_DestroyContext( rmesa->glCtx ); - _vbo_DestroyContext( rmesa->glCtx ); - _swrast_DestroyContext( rmesa->glCtx ); - - radeonDestroySwtcl( rmesa->glCtx ); - radeonReleaseArrays( rmesa->glCtx, ~0 ); - if (rmesa->dma.current.buf) { - radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ ); - radeonFlushCmdBuf( rmesa, __FUNCTION__ ); - } - - _mesa_vector4f_free( &rmesa->tcl.ObjClean ); - - if (rmesa->state.scissor.pClipRects) { - FREE(rmesa->state.scissor.pClipRects); - rmesa->state.scissor.pClipRects = NULL; - } - - if ( release_texture_heaps ) { - /* This share group is about to go away, free our private - * texture object data. - */ - int i; - - for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) { - driDestroyTextureHeap( rmesa->texture_heaps[ i ] ); - rmesa->texture_heaps[ i ] = NULL; - } - - assert( is_empty_list( & rmesa->swapped ) ); - } - - /* free the Mesa context */ - rmesa->glCtx->DriverCtx = NULL; - _mesa_destroy_context( rmesa->glCtx ); - - /* free the option cache */ - driDestroyOptionCache (&rmesa->optionCache); - - FREE( rmesa ); - } -} - - - - -void -radeonSwapBuffers( __DRIdrawablePrivate *dPriv ) -{ - - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - radeonContextPtr rmesa; - GLcontext *ctx; - rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; - ctx = rmesa->glCtx; - if (ctx->Visual.doubleBufferMode) { - _mesa_notifySwapBuffers( ctx ); /* flush pending rendering comands */ - - if ( rmesa->doPageFlip ) { - radeonPageFlip( dPriv ); - } - else { - radeonCopyBuffer( dPriv, NULL ); - } - } - } - else { - /* XXX this shouldn't be an error but we can't handle it for now */ - _mesa_problem(NULL, "%s: drawable has no context!", __FUNCTION__); - } -} - -void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, - int x, int y, int w, int h ) -{ - if (dPriv->driContextPriv && dPriv->driContextPriv->driverPrivate) { - radeonContextPtr radeon; - GLcontext *ctx; - - radeon = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; - ctx = radeon->glCtx; - - if (ctx->Visual.doubleBufferMode) { - drm_clip_rect_t rect; - rect.x1 = x + dPriv->x; - rect.y1 = (dPriv->h - y - h) + dPriv->y; - rect.x2 = rect.x1 + w; - rect.y2 = rect.y1 + h; - _mesa_notifySwapBuffers(ctx); /* flush pending rendering comands */ - radeonCopyBuffer(dPriv, &rect); - } - } else { - /* XXX this shouldn't be an error but we can't handle it for now */ - _mesa_problem(NULL, "%s: drawable has no context!", - __FUNCTION__); - } -} - -/* Make context `c' the current context and bind it to the given - * drawing and reading surfaces. - */ -GLboolean -radeonMakeCurrent( __DRIcontextPrivate *driContextPriv, - __DRIdrawablePrivate *driDrawPriv, - __DRIdrawablePrivate *driReadPriv ) -{ - if ( driContextPriv ) { - radeonContextPtr newCtx = - (radeonContextPtr) driContextPriv->driverPrivate; - - if (RADEON_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) newCtx->glCtx); - - newCtx->dri.readable = driReadPriv; - - if ( (newCtx->dri.drawable != driDrawPriv) || - newCtx->lastStamp != driDrawPriv->lastStamp ) { - if (driDrawPriv->swap_interval == (unsigned)-1) { - driDrawPriv->vblFlags = (newCtx->radeonScreen->irq != 0) - ? driGetDefaultVBlankFlags(&newCtx->optionCache) - : VBLANK_FLAG_NO_IRQ; - - driDrawableInitVBlank( driDrawPriv ); - } - - newCtx->dri.drawable = driDrawPriv; - - radeonSetCliprects(newCtx); - radeonUpdateViewportOffset( newCtx->glCtx ); - } - - _mesa_make_current( newCtx->glCtx, - (GLframebuffer *) driDrawPriv->driverPrivate, - (GLframebuffer *) driReadPriv->driverPrivate ); - - _mesa_update_state( newCtx->glCtx ); - } else { - if (RADEON_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s ctx is null\n", __FUNCTION__); - _mesa_make_current( NULL, NULL, NULL ); - } - - if (RADEON_DEBUG & DEBUG_DRI) - fprintf(stderr, "End %s\n", __FUNCTION__); - return GL_TRUE; -} - -/* Force the context `c' to be unbound from its buffer. - */ -GLboolean -radeonUnbindContext( __DRIcontextPrivate *driContextPriv ) -{ - radeonContextPtr rmesa = (radeonContextPtr) driContextPriv->driverPrivate; - - if (RADEON_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s ctx %p\n", __FUNCTION__, (void *) rmesa->glCtx); - - return GL_TRUE; -} diff --git a/src/mesa/drivers/dri/radeon/radeon_context.h b/src/mesa/drivers/dri/radeon/radeon_context.h index 53df766f8c..1795d8bdb6 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_context.h @@ -48,91 +48,23 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drm.h" #include "radeon_drm.h" #include "texmem.h" - #include "main/macros.h" #include "main/mtypes.h" #include "main/colormac.h" - -struct radeon_context; -typedef struct radeon_context radeonContextRec; -typedef struct radeon_context *radeonContextPtr; - -/* This union is used to avoid warnings/miscompilation - with float to uint32_t casts due to strict-aliasing */ -typedef union { - GLfloat f; - uint32_t ui32; -} float_ui32_type; - -#include "radeon_lock.h" #include "radeon_screen.h" -#include "main/mm.h" - -#include "math/m_vector.h" - -#define TEX_0 0x1 -#define TEX_1 0x2 -#define TEX_2 0x4 -#define TEX_ALL 0x7 - -/* Rasterizing fallbacks */ -/* See correponding strings in r200_swtcl.c */ -#define RADEON_FALLBACK_TEXTURE 0x0001 -#define RADEON_FALLBACK_DRAW_BUFFER 0x0002 -#define RADEON_FALLBACK_STENCIL 0x0004 -#define RADEON_FALLBACK_RENDER_MODE 0x0008 -#define RADEON_FALLBACK_BLEND_EQ 0x0010 -#define RADEON_FALLBACK_BLEND_FUNC 0x0020 -#define RADEON_FALLBACK_DISABLE 0x0040 -#define RADEON_FALLBACK_BORDER_MODE 0x0080 - -/* The blit width for texture uploads - */ -#define BLIT_WIDTH_BYTES 1024 -/* Use the templated vertex format: - */ -#define COLOR_IS_RGBA -#define TAG(x) radeon##x -#include "tnl_dd/t_dd_vertex.h" -#undef TAG - -typedef void (*radeon_tri_func) (radeonContextPtr, - radeonVertex *, - radeonVertex *, radeonVertex *); - -typedef void (*radeon_line_func) (radeonContextPtr, - radeonVertex *, radeonVertex *); +#include "radeon_common.h" -typedef void (*radeon_point_func) (radeonContextPtr, radeonVertex *); - -struct radeon_colorbuffer_state { - GLuint clear; - int roundEnable; -}; -struct radeon_depthbuffer_state { - GLuint clear; - GLfloat scale; -}; +struct r100_context; +typedef struct r100_context r100ContextRec; +typedef struct r100_context *r100ContextPtr; -struct radeon_scissor_state { - drm_clip_rect_t rect; - GLboolean enabled; +#include "radeon_lock.h" - GLuint numClipRects; /* Cliprects active */ - GLuint numAllocedClipRects; /* Cliprects available */ - drm_clip_rect_t *pClipRects; -}; -struct radeon_stencilbuffer_state { - GLboolean hwBuffer; - GLuint clear; /* rb3d_stencilrefmask value */ -}; -struct radeon_stipple_state { - GLuint mask[32]; -}; +#define R100_TEX_ALL 0x7 /* used for both tcl_vtx and vc_frmt tex bits (they are identical) */ #define RADEON_ST_BIT(unit) \ @@ -141,42 +73,6 @@ struct radeon_stipple_state { #define RADEON_Q_BIT(unit) \ (unit == 0 ? RADEON_CP_VC_FRMT_Q0 : (RADEON_CP_VC_FRMT_Q1 >> 2) << (2 * unit)) -typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr; - -/* Texture object in locally shared texture space. - */ -struct radeon_tex_obj { - driTextureObject base; - - GLuint bufAddr; /* Offset to start of locally - shared texture block */ - - GLuint dirty_state; /* Flags (1 per texunit) for - whether or not this texobj - has dirty hardware state - (pp_*) that needs to be - brought into the - texunit. */ - - drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; - /* Six, for the cube faces */ - - GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ - - GLuint pp_txfilter; /* hardware register values */ - GLuint pp_txformat; - GLuint pp_txoffset; /* Image location in texmem. - All cube faces follow. */ - GLuint pp_txsize; /* npot only */ - GLuint pp_txpitch; /* npot only */ - GLuint pp_border_color; - GLuint pp_cubic_faces; /* cube face 1,2,3,4 log2 sizes */ - - GLboolean border_fallback; - - GLuint tile_bits; /* hw texture tile bits used on this texture */ -}; - struct radeon_texture_env_state { radeonTexObjPtr texobj; GLenum format; @@ -187,17 +83,6 @@ struct radeon_texture_state { struct radeon_texture_env_state unit[RADEON_MAX_TEXTURE_UNITS]; }; -struct radeon_state_atom { - struct radeon_state_atom *next, *prev; - const char *name; /* for debug */ - int cmd_size; /* size in bytes */ - GLuint is_tcl; - int *cmd; /* one or more cmd's */ - int *lastcmd; /* one or more cmd's */ - GLboolean dirty; /* dirty-mark in emit_state_list */ - GLboolean(*check) (GLcontext *); /* is this state active? */ -}; - /* Trying to keep these relatively short as the variables are becoming * extravagently long. Drop the driver name prefix off the front of * everything - I think we know which driver we're in by now, and keep the @@ -410,10 +295,7 @@ struct radeon_state_atom { #define SHN_SHININESS 1 #define SHN_STATE_SIZE 2 -struct radeon_hw_state { - /* Head of the linked list of state atoms. */ - struct radeon_state_atom atomlist; - +struct r100_hw_state { /* Hardware state, stored as cmdbuf commands: * -- Need to doublebuffer for * - eliding noop statechange loops? (except line stipple count) @@ -438,89 +320,19 @@ struct radeon_hw_state { struct radeon_state_atom glt; struct radeon_state_atom txr[3]; /* for NPOT */ - int max_state_size; /* Number of bytes necessary for a full state emit. */ - GLboolean is_dirty, all_dirty; }; -struct radeon_state { - /* Derived state for internal purposes: - */ - struct radeon_colorbuffer_state color; - struct radeon_depthbuffer_state depth; - struct radeon_scissor_state scissor; - struct radeon_stencilbuffer_state stencil; + +struct r100_state { struct radeon_stipple_state stipple; struct radeon_texture_state texture; }; -/* Need refcounting on dma buffers: - */ -struct radeon_dma_buffer { - int refcount; /* the number of retained regions in buf */ - drmBufPtr buf; -}; - -#define GET_START(rvb) (rmesa->radeonScreen->gart_buffer_offset + \ - (rvb)->address - rmesa->dma.buf0_address + \ - (rvb)->start) - -/* A retained region, eg vertices for indexed vertices. - */ -struct radeon_dma_region { - struct radeon_dma_buffer *buf; - char *address; /* == buf->address */ - int start, end, ptr; /* offsets from start of buf */ - int aos_start; - int aos_stride; - int aos_size; -}; - -struct radeon_dma { - /* Active dma region. Allocations for vertices and retained - * regions come from here. Also used for emitting random vertices, - * these may be flushed by calling flush_current(); - */ - struct radeon_dma_region current; - - void (*flush) (radeonContextPtr); - - char *buf0_address; /* start of buf[0], for index calcs */ - GLuint nr_released_bufs; /* flush after so many buffers released */ -}; - -struct radeon_dri_mirror { - __DRIcontextPrivate *context; /* DRI context */ - __DRIscreenPrivate *screen; /* DRI screen */ - - /** - * DRI drawable bound to this context for drawing. - */ - __DRIdrawablePrivate *drawable; - - /** - * DRI drawable bound to this context for reading. - */ - __DRIdrawablePrivate *readable; - - drm_context_t hwContext; - drm_hw_lock_t *hwLock; - int fd; - int drmMinor; -}; - #define RADEON_CMD_BUF_SZ (8*1024) - -struct radeon_store { - GLuint statenr; - GLuint primnr; - char cmd_buf[RADEON_CMD_BUF_SZ]; - int cmd_used; - int elts_start; -}; - +#define R200_ELT_BUF_SZ (8*1024) /* radeon_tcl.c */ -struct radeon_tcl_info { +struct r100_tcl_info { GLuint vertex_format; GLuint hw_primitive; @@ -529,30 +341,18 @@ struct radeon_tcl_info { */ GLvector4f ObjClean; - struct radeon_dma_region *aos_components[8]; - GLuint nr_aos_components; - GLuint *Elts; - struct radeon_dma_region indexed_verts; - struct radeon_dma_region obj; - struct radeon_dma_region rgba; - struct radeon_dma_region spec; - struct radeon_dma_region fog; - struct radeon_dma_region tex[RADEON_MAX_TEXTURE_UNITS]; - struct radeon_dma_region norm; + int elt_cmd_offset; + int elt_cmd_start; + int elt_used; }; /* radeon_swtcl.c */ -struct radeon_swtcl_info { - GLuint RenderIndex; - GLuint vertex_size; +struct r100_swtcl_info { GLuint vertex_format; - struct tnl_attr_map vertex_attrs[VERT_ATTRIB_MAX]; - GLuint vertex_attr_count; - GLubyte *verts; /* Fallback rasterization functions @@ -561,10 +361,6 @@ struct radeon_swtcl_info { radeon_line_func draw_line; radeon_tri_func draw_tri; - GLuint hw_primitive; - GLenum render_primitive; - GLuint numverts; - /** * Offset of the 4UB color data within a hardware (swtcl) vertex. */ @@ -576,22 +372,9 @@ struct radeon_swtcl_info { GLuint specoffset; GLboolean needproj; - - struct radeon_dma_region indexed_verts; }; -struct radeon_ioctl { - GLuint vertex_offset; - GLuint vertex_size; -}; - -#define RADEON_MAX_PRIMS 64 -struct radeon_prim { - GLuint start; - GLuint end; - GLuint prim; -}; /* A maximum total of 20 elements per vertex: 3 floats for position, 3 * floats for normal, 4 floats for color, 4 bytes for secondary color, @@ -602,59 +385,18 @@ struct radeon_prim { */ #define RADEON_MAX_VERTEX_SIZE 20 -struct radeon_context { - GLcontext *glCtx; /* Mesa context */ +struct r100_context { + struct radeon_context radeon; /* Driver and hardware state management */ - struct radeon_hw_state hw; - struct radeon_state state; - - /* Texture object bookkeeping - */ - unsigned nr_heaps; - driTexHeap *texture_heaps[RADEON_NR_TEX_HEAPS]; - driTextureObject swapped; - int texture_depth; - float initialMaxAnisotropy; - - /* Rasterization and vertex state: - */ - GLuint TclFallback; - GLuint Fallback; - GLuint NewGLState; - DECLARE_RENDERINPUTS(tnl_index_bitset); /* index of bits for last tnl_install_attrs */ + struct r100_hw_state hw; + struct r100_state state; /* Vertex buffers */ struct radeon_ioctl ioctl; - struct radeon_dma dma; struct radeon_store store; - /* A full state emit as of the first state emit in the main store, in case - * the context is lost. - */ - struct radeon_store backup_store; - - /* Page flipping - */ - GLuint doPageFlip; - - /* Busy waiting - */ - GLuint do_usleeps; - GLuint do_irqs; - GLuint irqsEmitted; - drm_radeon_irq_wait_t iw; - - /* Drawable, cliprect and scissor information - */ - GLuint numClipRects; /* Cliprects for the draw buffer */ - drm_clip_rect_t *pClipRects; - unsigned int lastStamp; - GLboolean lost_context; - GLboolean save_on_next_emit; - radeonScreenPtr radeonScreen; /* Screen private DRI data */ - drm_radeon_sarea_t *sarea; /* Private SAREA data */ /* TCL stuff */ @@ -667,29 +409,13 @@ struct radeon_context { GLmatrix tmpmat[RADEON_MAX_TEXTURE_UNITS]; GLuint last_ReallyEnabled; - /* VBI - */ - int64_t swap_ust; - int64_t swap_missed_ust; - - GLuint swap_count; - GLuint swap_missed_count; - /* radeon_tcl.c */ - struct radeon_tcl_info tcl; + struct r100_tcl_info tcl; /* radeon_swtcl.c */ - struct radeon_swtcl_info swtcl; - - /* Mirrors of some DRI state - */ - struct radeon_dri_mirror dri; - - /* Configuration cache - */ - driOptionCache optionCache; + struct r100_swtcl_info swtcl; GLboolean using_hyperz; GLboolean texmicrotile; @@ -703,61 +429,19 @@ struct radeon_context { GLuint c_textureSwaps; GLuint c_textureBytes; GLuint c_vertexBuffers; + }; -#define RADEON_CONTEXT(ctx) ((radeonContextPtr)(ctx->DriverCtx)) - -static INLINE GLuint radeonPackColor(GLuint cpp, - GLubyte r, GLubyte g, - GLubyte b, GLubyte a) -{ - switch (cpp) { - case 2: - return PACK_COLOR_565(r, g, b); - case 4: - return PACK_COLOR_8888(a, r, g, b); - default: - return 0; - } -} + +#define R100_CONTEXT(ctx) ((r100ContextPtr)(ctx->DriverCtx)) + #define RADEON_OLD_PACKETS 1 -extern void radeonDestroyContext(__DRIcontextPrivate * driContextPriv); -extern GLboolean radeonCreateContext(const __GLcontextModes * glVisual, - __DRIcontextPrivate * driContextPriv, - void *sharedContextPrivate); -extern void radeonSwapBuffers(__DRIdrawablePrivate * dPriv); -extern void radeonCopySubBuffer(__DRIdrawablePrivate * dPriv, - int x, int y, int w, int h); -extern GLboolean radeonMakeCurrent(__DRIcontextPrivate * driContextPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv); -extern GLboolean radeonUnbindContext(__DRIcontextPrivate * driContextPriv); - -/* ================================================================ - * Debugging: - */ -#define DO_DEBUG 1 - -#if DO_DEBUG -extern int RADEON_DEBUG; -#else -#define RADEON_DEBUG 0 -#endif - -#define DEBUG_TEXTURE 0x0001 -#define DEBUG_STATE 0x0002 -#define DEBUG_IOCTL 0x0004 -#define DEBUG_PRIMS 0x0008 -#define DEBUG_VERTS 0x0010 -#define DEBUG_FALLBACKS 0x0020 -#define DEBUG_VFMT 0x0040 -#define DEBUG_CODEGEN 0x0080 -#define DEBUG_VERBOSE 0x0100 -#define DEBUG_DRI 0x0200 -#define DEBUG_DMA 0x0400 -#define DEBUG_SANITY 0x0800 -#define DEBUG_SYNC 0x1000 +extern GLboolean r100CreateContext( const __GLcontextModes *glVisual, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate); + + #endif /* __RADEON_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_drm.h b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h new file mode 100644 index 0000000000..984725a6c9 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_cs_drm.h @@ -0,0 +1,207 @@ +/* + * Copyright © 2008 Nicolai Haehnle + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Aapo Tahkola <aet@rasterburn.org> + * Nicolai Haehnle <prefect_@gmx.net> + * Jérôme Glisse <glisse@freedesktop.org> + */ +#ifndef RADEON_CS_H +#define RADEON_CS_H + +#include <stdint.h> +#include <string.h> +#include "drm.h" +#include "radeon_drm.h" + +struct radeon_cs_reloc { + struct radeon_bo *bo; + uint32_t read_domain; + uint32_t write_domain; + uint32_t flags; +}; + + +#define RADEON_CS_SPACE_OK 0 +#define RADEON_CS_SPACE_OP_TO_BIG 1 +#define RADEON_CS_SPACE_FLUSH 2 + +struct radeon_cs_space_check { + struct radeon_bo *bo; + uint32_t read_domains; + uint32_t write_domain; + uint32_t new_accounted; +}; + +struct radeon_cs_manager; + +struct radeon_cs { + struct radeon_cs_manager *csm; + void *relocs; + uint32_t *packets; + unsigned crelocs; + unsigned relocs_total_size; + unsigned cdw; + unsigned ndw; + int section; + unsigned section_ndw; + unsigned section_cdw; + const char *section_file; + const char *section_func; + int section_line; + +}; + +/* cs functions */ +struct radeon_cs_funcs { + struct radeon_cs *(*cs_create)(struct radeon_cs_manager *csm, + uint32_t ndw); + int (*cs_write_reloc)(struct radeon_cs *cs, + struct radeon_bo *bo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t flags); + int (*cs_begin)(struct radeon_cs *cs, + uint32_t ndw, + const char *file, + const char *func, + int line); + int (*cs_end)(struct radeon_cs *cs, + const char *file, + const char *func, + int line); + int (*cs_emit)(struct radeon_cs *cs); + int (*cs_destroy)(struct radeon_cs *cs); + int (*cs_erase)(struct radeon_cs *cs); + int (*cs_need_flush)(struct radeon_cs *cs); + void (*cs_print)(struct radeon_cs *cs, FILE *file); + int (*cs_space_check)(struct radeon_cs *cs, struct radeon_cs_space_check *bos, + int num_bo); +}; + +struct radeon_cs_manager { + struct radeon_cs_funcs *funcs; + int fd; + uint32_t vram_limit, gart_limit; + uint32_t vram_write_used, gart_write_used; + uint32_t read_used; +}; + +static inline struct radeon_cs *radeon_cs_create(struct radeon_cs_manager *csm, + uint32_t ndw) +{ + return csm->funcs->cs_create(csm, ndw); +} + +static inline int radeon_cs_write_reloc(struct radeon_cs *cs, + struct radeon_bo *bo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t flags) +{ + return cs->csm->funcs->cs_write_reloc(cs, + bo, + read_domain, + write_domain, + flags); +} + +static inline int radeon_cs_begin(struct radeon_cs *cs, + uint32_t ndw, + const char *file, + const char *func, + int line) +{ + return cs->csm->funcs->cs_begin(cs, ndw, file, func, line); +} + +static inline int radeon_cs_end(struct radeon_cs *cs, + const char *file, + const char *func, + int line) +{ + return cs->csm->funcs->cs_end(cs, file, func, line); +} + +static inline int radeon_cs_emit(struct radeon_cs *cs) +{ + return cs->csm->funcs->cs_emit(cs); +} + +static inline int radeon_cs_destroy(struct radeon_cs *cs) +{ + return cs->csm->funcs->cs_destroy(cs); +} + +static inline int radeon_cs_erase(struct radeon_cs *cs) +{ + return cs->csm->funcs->cs_erase(cs); +} + +static inline int radeon_cs_need_flush(struct radeon_cs *cs) +{ + return cs->csm->funcs->cs_need_flush(cs); +} + +static inline void radeon_cs_print(struct radeon_cs *cs, FILE *file) +{ + cs->csm->funcs->cs_print(cs, file); +} + +static inline int radeon_cs_space_check(struct radeon_cs *cs, + struct radeon_cs_space_check *bos, + int num_bo) +{ + return cs->csm->funcs->cs_space_check(cs, bos, num_bo); +} + +static inline void radeon_cs_set_limit(struct radeon_cs *cs, uint32_t domain, uint32_t limit) +{ + + if (domain == RADEON_GEM_DOMAIN_VRAM) + cs->csm->vram_limit = limit; + else + cs->csm->gart_limit = limit; +} + +static inline void radeon_cs_write_dword(struct radeon_cs *cs, uint32_t dword) +{ + cs->packets[cs->cdw++] = dword; + if (cs->section) { + cs->section_cdw++; + } +} + +static inline void radeon_cs_write_qword(struct radeon_cs *cs, uint64_t qword) +{ + + memcpy(cs->packets + cs->cdw, &qword, sizeof(qword)); + cs->cdw+=2; + if (cs->section) { + cs->section_cdw+=2; + } +} +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c new file mode 100644 index 0000000000..ac94789417 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.c @@ -0,0 +1,500 @@ +/* + * Copyright © 2008 Nicolai Haehnle + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Aapo Tahkola <aet@rasterburn.org> + * Nicolai Haehnle <prefect_@gmx.net> + * Jérôme Glisse <glisse@freedesktop.org> + */ +#include <errno.h> + +#include "radeon_bocs_wrapper.h" + +struct cs_manager_legacy { + struct radeon_cs_manager base; + struct radeon_context *ctx; + /* hack for scratch stuff */ + uint32_t pending_age; + uint32_t pending_count; + + +}; + +struct cs_reloc_legacy { + struct radeon_cs_reloc base; + uint32_t cindices; + uint32_t *indices; +}; + + +static struct radeon_cs *cs_create(struct radeon_cs_manager *csm, + uint32_t ndw) +{ + struct radeon_cs *cs; + + cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs)); + if (cs == NULL) { + return NULL; + } + cs->csm = csm; + cs->ndw = (ndw + 0x3FF) & (~0x3FF); + cs->packets = (uint32_t*)malloc(4*cs->ndw); + if (cs->packets == NULL) { + free(cs); + return NULL; + } + cs->relocs_total_size = 0; + return cs; +} + +static int cs_write_reloc(struct radeon_cs *cs, + struct radeon_bo *bo, + uint32_t read_domain, + uint32_t write_domain, + uint32_t flags) +{ + struct cs_reloc_legacy *relocs; + int i; + + relocs = (struct cs_reloc_legacy *)cs->relocs; + /* check domains */ + if ((read_domain && write_domain) || (!read_domain && !write_domain)) { + /* in one CS a bo can only be in read or write domain but not + * in read & write domain at the same sime + */ + return -EINVAL; + } + if (read_domain == RADEON_GEM_DOMAIN_CPU) { + return -EINVAL; + } + if (write_domain == RADEON_GEM_DOMAIN_CPU) { + return -EINVAL; + } + /* check if bo is already referenced */ + for(i = 0; i < cs->crelocs; i++) { + uint32_t *indices; + + if (relocs[i].base.bo->handle == bo->handle) { + /* Check domains must be in read or write. As we check already + * checked that in argument one of the read or write domain was + * set we only need to check that if previous reloc as the read + * domain set then the read_domain should also be set for this + * new relocation. + */ + if (relocs[i].base.read_domain && !read_domain) { + return -EINVAL; + } + if (relocs[i].base.write_domain && !write_domain) { + return -EINVAL; + } + relocs[i].base.read_domain |= read_domain; + relocs[i].base.write_domain |= write_domain; + /* save indice */ + relocs[i].cindices++; + indices = (uint32_t*)realloc(relocs[i].indices, + relocs[i].cindices * 4); + if (indices == NULL) { + relocs[i].cindices -= 1; + return -ENOMEM; + } + relocs[i].indices = indices; + relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1; + return 0; + } + } + /* add bo to reloc */ + relocs = (struct cs_reloc_legacy*) + realloc(cs->relocs, + sizeof(struct cs_reloc_legacy) * (cs->crelocs + 1)); + if (relocs == NULL) { + return -ENOMEM; + } + cs->relocs = relocs; + relocs[cs->crelocs].base.bo = bo; + relocs[cs->crelocs].base.read_domain = read_domain; + relocs[cs->crelocs].base.write_domain = write_domain; + relocs[cs->crelocs].base.flags = flags; + relocs[cs->crelocs].indices = (uint32_t*)malloc(4); + if (relocs[cs->crelocs].indices == NULL) { + return -ENOMEM; + } + relocs[cs->crelocs].indices[0] = cs->cdw - 1; + relocs[cs->crelocs].cindices = 1; + cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo); + cs->crelocs++; + radeon_bo_ref(bo); + return 0; +} + +static int cs_begin(struct radeon_cs *cs, + uint32_t ndw, + const char *file, + const char *func, + int line) +{ + if (cs->section) { + fprintf(stderr, "CS already in a section(%s,%s,%d)\n", + cs->section_file, cs->section_func, cs->section_line); + fprintf(stderr, "CS can't start section(%s,%s,%d)\n", + file, func, line); + return -EPIPE; + } + cs->section = 1; + cs->section_ndw = ndw; + cs->section_cdw = 0; + cs->section_file = file; + cs->section_func = func; + cs->section_line = line; + + + if (cs->cdw + ndw > cs->ndw) { + uint32_t tmp, *ptr; + int num = (ndw > 0x3FF) ? ndw : 0x3FF; + + tmp = (cs->cdw + 1 + num) & (~num); + ptr = (uint32_t*)realloc(cs->packets, 4 * tmp); + if (ptr == NULL) { + return -ENOMEM; + } + cs->packets = ptr; + cs->ndw = tmp; + } + + return 0; +} + +static int cs_end(struct radeon_cs *cs, + const char *file, + const char *func, + int line) + +{ + if (!cs->section) { + fprintf(stderr, "CS no section to end at (%s,%s,%d)\n", + file, func, line); + return -EPIPE; + } + cs->section = 0; + if (cs->section_ndw != cs->section_cdw) { + fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n", + cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw); + fprintf(stderr, "CS section end at (%s,%s,%d)\n", + file, func, line); + return -EPIPE; + } + return 0; +} + +static int cs_process_relocs(struct radeon_cs *cs) +{ + struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; + struct cs_reloc_legacy *relocs; + int i, j, r; + + csm = (struct cs_manager_legacy*)cs->csm; + relocs = (struct cs_reloc_legacy *)cs->relocs; + restart: + for (i = 0; i < cs->crelocs; i++) { + for (j = 0; j < relocs[i].cindices; j++) { + uint32_t soffset, eoffset; + + r = radeon_bo_legacy_validate(relocs[i].base.bo, + &soffset, &eoffset); + if (r == -EAGAIN) + goto restart; + if (r) { + fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", + relocs[i].base.bo, soffset, eoffset); + return r; + } + cs->packets[relocs[i].indices[j]] += soffset; + if (cs->packets[relocs[i].indices[j]] >= eoffset) { + /* radeon_bo_debug(relocs[i].base.bo, 12); */ + fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", + relocs[i].base.bo, soffset, eoffset); + fprintf(stderr, "above end: %p 0x%08X 0x%08X\n", + relocs[i].base.bo, + cs->packets[relocs[i].indices[j]], + eoffset); + exit(0); + return -EINVAL; + } + } + } + return 0; +} + +static int cs_set_age(struct radeon_cs *cs) +{ + struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; + struct cs_reloc_legacy *relocs; + int i; + + relocs = (struct cs_reloc_legacy *)cs->relocs; + for (i = 0; i < cs->crelocs; i++) { + radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age); + radeon_bo_unref(relocs[i].base.bo); + } + return 0; +} + +static int cs_emit(struct radeon_cs *cs) +{ + struct cs_manager_legacy *csm = (struct cs_manager_legacy*)cs->csm; + drm_radeon_cmd_buffer_t cmd; + drm_r300_cmd_header_t age; + uint64_t ull; + int r; + + csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); + + /* append buffer age */ + if (IS_R300_CLASS(csm->ctx->radeonScreen)) { + age.scratch.cmd_type = R300_CMD_SCRATCH; + /* Scratch register 2 corresponds to what radeonGetAge polls */ + csm->pending_age = 0; + csm->pending_count = 1; + ull = (uint64_t) (intptr_t) &csm->pending_age; + age.scratch.reg = 2; + age.scratch.n_bufs = 1; + age.scratch.flags = 0; + radeon_cs_write_dword(cs, age.u); + radeon_cs_write_qword(cs, ull); + radeon_cs_write_dword(cs, 0); + } + + r = cs_process_relocs(cs); + if (r) { + return 0; + } + + cmd.buf = (char *)cs->packets; + cmd.bufsz = cs->cdw * 4; + if (csm->ctx->state.scissor.enabled) { + cmd.nbox = csm->ctx->state.scissor.numClipRects; + cmd.boxes = (drm_clip_rect_t *) csm->ctx->state.scissor.pClipRects; + } else { + cmd.nbox = csm->ctx->numClipRects; + cmd.boxes = (drm_clip_rect_t *) csm->ctx->pClipRects; + } + + //dump_cmdbuf(cs); + + r = drmCommandWrite(cs->csm->fd, DRM_RADEON_CMDBUF, &cmd, sizeof(cmd)); + if (r) { + return r; + } + if (!IS_R300_CLASS(csm->ctx->radeonScreen)) { + drm_radeon_irq_emit_t emit_cmd; + emit_cmd.irq_seq = &csm->pending_age; + r = drmCommandWrite(cs->csm->fd, DRM_RADEON_IRQ_EMIT, &emit_cmd, sizeof(emit_cmd)); + if (r) { + return r; + } + } + cs_set_age(cs); + + cs->csm->read_used = 0; + cs->csm->vram_write_used = 0; + cs->csm->gart_write_used = 0; + return 0; +} + +static void inline cs_free_reloc(void *relocs_p, int crelocs) +{ + struct cs_reloc_legacy *relocs = relocs_p; + int i; + if (!relocs_p) + return; + for (i = 0; i < crelocs; i++) + free(relocs[i].indices); +} + +static int cs_destroy(struct radeon_cs *cs) +{ + cs_free_reloc(cs->relocs, cs->crelocs); + free(cs->relocs); + free(cs->packets); + free(cs); + return 0; +} + +static int cs_erase(struct radeon_cs *cs) +{ + cs_free_reloc(cs->relocs, cs->crelocs); + free(cs->relocs); + cs->relocs_total_size = 0; + cs->relocs = NULL; + cs->crelocs = 0; + cs->cdw = 0; + cs->section = 0; + return 0; +} + +static int cs_need_flush(struct radeon_cs *cs) +{ + /* this function used to flush when the BO usage got to + * a certain size, now the higher levels handle this better */ + return 0; +} + +static void cs_print(struct radeon_cs *cs, FILE *file) +{ +} + +static int cs_check_space(struct radeon_cs *cs, struct radeon_cs_space_check *bos, int num_bo) +{ + struct radeon_cs_manager *csm = cs->csm; + int this_op_read = 0, this_op_gart_write = 0, this_op_vram_write = 0; + uint32_t read_domains, write_domain; + int i; + struct radeon_bo *bo; + + /* check the totals for this operation */ + + if (num_bo == 0) + return 0; + + /* prepare */ + for (i = 0; i < num_bo; i++) { + bo = bos[i].bo; + + bos[i].new_accounted = 0; + read_domains = bos[i].read_domains; + write_domain = bos[i].write_domain; + + /* pinned bos don't count */ + if (radeon_legacy_bo_is_static(bo)) + continue; + + /* already accounted this bo */ + if (write_domain && (write_domain == bo->space_accounted)) { + bos[i].new_accounted = bo->space_accounted; + continue; + } + + if (read_domains && ((read_domains << 16) == bo->space_accounted)) { + bos[i].new_accounted = bo->space_accounted; + continue; + } + + if (bo->space_accounted == 0) { + if (write_domain == RADEON_GEM_DOMAIN_VRAM) + this_op_vram_write += bo->size; + else if (write_domain == RADEON_GEM_DOMAIN_GTT) + this_op_gart_write += bo->size; + else + this_op_read += bo->size; + bos[i].new_accounted = (read_domains << 16) | write_domain; + } else { + uint16_t old_read, old_write; + + old_read = bo->space_accounted >> 16; + old_write = bo->space_accounted & 0xffff; + + if (write_domain && (old_read & write_domain)) { + bos[i].new_accounted = write_domain; + /* moving from read to a write domain */ + if (write_domain == RADEON_GEM_DOMAIN_VRAM) { + this_op_read -= bo->size; + this_op_vram_write += bo->size; + } else if (write_domain == RADEON_GEM_DOMAIN_VRAM) { + this_op_read -= bo->size; + this_op_gart_write += bo->size; + } + } else if (read_domains & old_write) { + bos[i].new_accounted = bo->space_accounted & 0xffff; + } else { + /* rewrite the domains */ + if (write_domain != old_write) + fprintf(stderr,"WRITE DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, write_domain, old_write); + if (read_domains != old_read) + fprintf(stderr,"READ DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, read_domains, old_read); + return RADEON_CS_SPACE_FLUSH; + } + } + } + + if (this_op_read < 0) + this_op_read = 0; + + /* check sizes - operation first */ + if ((this_op_read + this_op_gart_write > csm->gart_limit) || + (this_op_vram_write > csm->vram_limit)) { + return RADEON_CS_SPACE_OP_TO_BIG; + } + + if (((csm->vram_write_used + this_op_vram_write) > csm->vram_limit) || + ((csm->read_used + csm->gart_write_used + this_op_gart_write + this_op_read) > csm->gart_limit)) { + return RADEON_CS_SPACE_FLUSH; + } + + csm->gart_write_used += this_op_gart_write; + csm->vram_write_used += this_op_vram_write; + csm->read_used += this_op_read; + /* commit */ + for (i = 0; i < num_bo; i++) { + bo = bos[i].bo; + bo->space_accounted = bos[i].new_accounted; + } + + return RADEON_CS_SPACE_OK; +} + +static struct radeon_cs_funcs radeon_cs_legacy_funcs = { + cs_create, + cs_write_reloc, + cs_begin, + cs_end, + cs_emit, + cs_destroy, + cs_erase, + cs_need_flush, + cs_print, + cs_check_space +}; + +struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx) +{ + struct cs_manager_legacy *csm; + + csm = (struct cs_manager_legacy*) + calloc(1, sizeof(struct cs_manager_legacy)); + if (csm == NULL) { + return NULL; + } + csm->base.funcs = &radeon_cs_legacy_funcs; + csm->base.fd = ctx->dri.fd; + csm->ctx = ctx; + csm->pending_age = 1; + return (struct radeon_cs_manager*)csm; +} + +void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm) +{ + free(csm); +} + diff --git a/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h new file mode 100644 index 0000000000..e177b4bafe --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_cs_legacy.h @@ -0,0 +1,40 @@ +/* + * Copyright © 2008 Nicolai Haehnle + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Aapo Tahkola <aet@rasterburn.org> + * Nicolai Haehnle <prefect_@gmx.net> + * Jérôme Glisse <glisse@freedesktop.org> + */ +#ifndef RADEON_CS_LEGACY_H +#define RADEON_CS_LEGACY_H + +#include "radeon_common.h" + +struct radeon_cs_manager *radeon_cs_manager_legacy_ctor(struct radeon_context *ctx); +void radeon_cs_manager_legacy_dtor(struct radeon_cs_manager *csm); + +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c new file mode 100644 index 0000000000..48b0d63818 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_dma.c @@ -0,0 +1,341 @@ +/************************************************************************** + +Copyright (C) 2004 Nicolai Haehnle. +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +#include "radeon_common.h" + +#if defined(USE_X86_ASM) +#define COPY_DWORDS( dst, src, nr ) \ +do { \ + int __tmp; \ + __asm__ __volatile__( "rep ; movsl" \ + : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ + : "0" (nr), \ + "D" ((long)dst), \ + "S" ((long)src) ); \ +} while (0) +#else +#define COPY_DWORDS( dst, src, nr ) \ +do { \ + int j; \ + for ( j = 0 ; j < nr ; j++ ) \ + dst[j] = ((int *)src)[j]; \ + dst += nr; \ +} while (0) +#endif + +static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count) +{ + int i; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); + + if (stride == 4) + COPY_DWORDS(out, data, count); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out++; + data += stride; + } +} + +void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count) +{ + int i; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); + + if (stride == 8) + COPY_DWORDS(out, data, count * 2); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data + 4); + out += 2; + data += stride; + } +} + +void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count) +{ + int i; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); + + if (stride == 12) { + COPY_DWORDS(out, data, count * 3); + } + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data + 4); + out[2] = *(int *)(data + 8); + out += 3; + data += stride; + } +} + +static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count) +{ + int i; + + if (RADEON_DEBUG & DEBUG_VERTS) + fprintf(stderr, "%s count %d stride %d out %p data %p\n", + __FUNCTION__, count, stride, (void *)out, (void *)data); + + if (stride == 16) + COPY_DWORDS(out, data, count * 4); + else + for (i = 0; i < count; i++) { + out[0] = *(int *)data; + out[1] = *(int *)(data + 4); + out[2] = *(int *)(data + 8); + out[3] = *(int *)(data + 12); + out += 4; + data += stride; + } +} + +void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, + GLvoid * data, int size, int stride, int count) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + uint32_t *out; + + if (stride == 0) { + radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); + count = 1; + aos->stride = 0; + } else { + radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32); + aos->stride = size; + } + + aos->components = size; + aos->count = count; + + out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); + switch (size) { + case 1: radeonEmitVec4(out, data, stride, count); break; + case 2: radeonEmitVec8(out, data, stride, count); break; + case 3: radeonEmitVec12(out, data, stride, count); break; + case 4: radeonEmitVec16(out, data, stride, count); break; + default: + assert(0); + break; + } +} + +void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size) +{ + + size = MAX2(size, MAX_DMA_BUF_SZ); + + if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA)) + fprintf(stderr, "%s\n", __FUNCTION__); + + if (rmesa->dma.flush) { + rmesa->dma.flush(rmesa->glCtx); + } + + if (rmesa->dma.nr_released_bufs > 4) { + rcommonFlushCmdBuf(rmesa, __FUNCTION__); + rmesa->dma.nr_released_bufs = 0; + } + + if (rmesa->dma.current) { + radeon_bo_unmap(rmesa->dma.current); + radeon_bo_unref(rmesa->dma.current); + rmesa->dma.current = 0; + } + +again_alloc: + rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom, + 0, size, 4, RADEON_GEM_DOMAIN_GTT, + 0); + + if (!rmesa->dma.current) { + rcommonFlushCmdBuf(rmesa, __FUNCTION__); + rmesa->dma.nr_released_bufs = 0; + goto again_alloc; + } + + rmesa->dma.current_used = 0; + rmesa->dma.current_vertexptr = 0; + + radeon_validate_bo(rmesa, rmesa->dma.current, RADEON_GEM_DOMAIN_GTT, 0); + + if (radeon_revalidate_bos(rmesa->glCtx) == GL_FALSE) + fprintf(stderr,"failure to revalidate BOs - badness\n"); + + if (!rmesa->dma.current) { + /* Cmd buff have been flushed in radeon_revalidate_bos */ + rmesa->dma.nr_released_bufs = 0; + goto again_alloc; + } + + radeon_bo_map(rmesa->dma.current, 1); +} + +/* Allocates a region from rmesa->dma.current. If there isn't enough + * space in current, grab a new buffer (and discard what was left of current) + */ +void radeonAllocDmaRegion(radeonContextPtr rmesa, + struct radeon_bo **pbo, int *poffset, + int bytes, int alignment) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); + + if (rmesa->dma.flush) + rmesa->dma.flush(rmesa->glCtx); + + assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr); + + alignment--; + rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment; + + if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size) + radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15); + + *poffset = rmesa->dma.current_used; + *pbo = rmesa->dma.current; + radeon_bo_ref(*pbo); + + /* Always align to at least 16 bytes */ + rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15; + rmesa->dma.current_vertexptr = rmesa->dma.current_used; + + assert(rmesa->dma.current_used <= rmesa->dma.current->size); +} + +void radeonReleaseDmaRegion(radeonContextPtr rmesa) +{ + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current); + if (rmesa->dma.current) { + rmesa->dma.nr_released_bufs++; + radeon_bo_unmap(rmesa->dma.current); + radeon_bo_unref(rmesa->dma.current); + } + rmesa->dma.current = NULL; +} + + +/* Flush vertices in the current dma region. + */ +void rcommon_flush_last_swtcl_prim( GLcontext *ctx ) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + struct radeon_dma *dma = &rmesa->dma; + + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, "%s %p\n", __FUNCTION__, dma->current); + dma->flush = NULL; + + if (dma->current) { + GLuint current_offset = dma->current_used; + + assert (dma->current_used + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + dma->current_vertexptr); + + if (dma->current_used != dma->current_vertexptr) { + dma->current_used = dma->current_vertexptr; + + rmesa->vtbl.swtcl_flush(ctx, current_offset); + } + rmesa->swtcl.numverts = 0; + } +} +/* Alloc space in the current dma region. + */ +void * +rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) +{ + GLuint bytes = vsize * nverts; + void *head; +restart: + if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) { + radeonRefillCurrentDmaRegion(rmesa, bytes); + } + + if (!rmesa->dma.flush) { + /* make sure we have enough space to use this in cmdbuf */ + rcommonEnsureCmdBufSpace(rmesa, + rmesa->hw.max_state_size + (12*sizeof(int)), + __FUNCTION__); + /* if cmdbuf flushed DMA restart */ + if (!rmesa->dma.current) + goto restart; + rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + rmesa->dma.flush = rcommon_flush_last_swtcl_prim; + } + + ASSERT( vsize == rmesa->swtcl.vertex_size * 4 ); + ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim ); + ASSERT( rmesa->dma.current_used + + rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == + rmesa->dma.current_vertexptr ); + + head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr); + rmesa->dma.current_vertexptr += bytes; + rmesa->swtcl.numverts += nverts; + return head; +} + +void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) +{ + radeonContextPtr radeon = RADEON_CONTEXT( ctx ); + int i; + + if (radeon->dma.flush) { + radeon->dma.flush(radeon->glCtx); + } + if (radeon->tcl.elt_dma_bo) { + radeon_bo_unref(radeon->tcl.elt_dma_bo); + radeon->tcl.elt_dma_bo = NULL; + } + for (i = 0; i < radeon->tcl.aos_count; i++) { + if (radeon->tcl.aos[i].bo) { + radeon_bo_unref(radeon->tcl.aos[i].bo); + radeon->tcl.aos[i].bo = NULL; + } + } +} diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.h b/src/mesa/drivers/dri/radeon/radeon_dma.h new file mode 100644 index 0000000000..06e388fc1d --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_dma.h @@ -0,0 +1,52 @@ +/************************************************************************** + +Copyright (C) 2004 Nicolai Haehnle. +Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + +The Weather Channel (TM) funded Tungsten Graphics to develop the +initial release of the Radeon 8500 driver under the XFree86 license. +This notice must be preserved. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +#ifndef RADEON_DMA_H +#define RADEON_DMA_H + +void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count); +void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count); + +void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos, + GLvoid * data, int size, int stride, int count); + +void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size); +void radeonAllocDmaRegion(radeonContextPtr rmesa, + struct radeon_bo **pbo, int *poffset, + int bytes, int alignment); +void radeonReleaseDmaRegion(radeonContextPtr rmesa); + +void rcommon_flush_last_swtcl_prim(GLcontext *ctx); + +void *rcommonAllocDmaLowVerts(radeonContextPtr rmesa, int nverts, int vsize); +void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ); +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c new file mode 100644 index 0000000000..f62ca7f9eb --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -0,0 +1,588 @@ +/************************************************************************** + * + * Copyright 2008 Red Hat Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/imports.h" +#include "main/macros.h" +#include "main/mtypes.h" +#include "main/fbobject.h" +#include "main/framebuffer.h" +#include "main/renderbuffer.h" +#include "main/context.h" +#include "main/texformat.h" +#include "main/texrender.h" + +#include "radeon_common.h" +#include "radeon_mipmap_tree.h" + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE +#define DBG(...) do { \ + if (RADEON_DEBUG & FILE_DEBUG_FLAG) \ + _mesa_printf(__VA_ARGS__); \ +} while(0) + +static struct gl_framebuffer * +radeon_new_framebuffer(GLcontext *ctx, GLuint name) +{ + return _mesa_new_framebuffer(ctx, name); +} + +static void +radeon_delete_renderbuffer(struct gl_renderbuffer *rb) +{ + struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb); + + ASSERT(rrb); + + if (rrb && rrb->bo) { + radeon_bo_unref(rrb->bo); + } + _mesa_free(rrb); +} + +static void * +radeon_get_pointer(GLcontext *ctx, struct gl_renderbuffer *rb, + GLint x, GLint y) +{ + return NULL; +} + +/** + * Called via glRenderbufferStorageEXT() to set the format and allocate + * storage for a user-created renderbuffer. + */ +static GLboolean +radeon_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, + GLuint width, GLuint height) +{ + struct radeon_context *radeon = RADEON_CONTEXT(ctx); + struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb); + GLboolean software_buffer = GL_FALSE; + int cpp; + + ASSERT(rb->Name != 0); + switch (internalFormat) { + case GL_R3_G3_B2: + case GL_RGB4: + case GL_RGB5: + rb->_ActualFormat = GL_RGB5; + rb->DataType = GL_UNSIGNED_BYTE; + rb->RedBits = 5; + rb->GreenBits = 6; + rb->BlueBits = 5; + cpp = 2; + break; + case GL_RGB: + case GL_RGB8: + case GL_RGB10: + case GL_RGB12: + case GL_RGB16: + rb->_ActualFormat = GL_RGB8; + rb->DataType = GL_UNSIGNED_BYTE; + rb->RedBits = 8; + rb->GreenBits = 8; + rb->BlueBits = 8; + rb->AlphaBits = 0; + cpp = 4; + break; + case GL_RGBA: + case GL_RGBA2: + case GL_RGBA4: + case GL_RGB5_A1: + case GL_RGBA8: + case GL_RGB10_A2: + case GL_RGBA12: + case GL_RGBA16: + rb->_ActualFormat = GL_RGBA8; + rb->DataType = GL_UNSIGNED_BYTE; + rb->RedBits = 8; + rb->GreenBits = 8; + rb->BlueBits = 8; + rb->AlphaBits = 8; + cpp = 4; + break; + case GL_STENCIL_INDEX: + case GL_STENCIL_INDEX1_EXT: + case GL_STENCIL_INDEX4_EXT: + case GL_STENCIL_INDEX8_EXT: + case GL_STENCIL_INDEX16_EXT: + /* alloc a depth+stencil buffer */ + rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rb->DataType = GL_UNSIGNED_INT_24_8_EXT; + rb->StencilBits = 8; + cpp = 4; + break; + case GL_DEPTH_COMPONENT16: + rb->_ActualFormat = GL_DEPTH_COMPONENT16; + rb->DataType = GL_UNSIGNED_SHORT; + rb->DepthBits = 16; + cpp = 2; + break; + case GL_DEPTH_COMPONENT: + case GL_DEPTH_COMPONENT24: + case GL_DEPTH_COMPONENT32: + rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rb->DataType = GL_UNSIGNED_INT_24_8_EXT; + rb->DepthBits = 24; + cpp = 4; + break; + case GL_DEPTH_STENCIL_EXT: + case GL_DEPTH24_STENCIL8_EXT: + rb->_ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rb->DataType = GL_UNSIGNED_INT_24_8_EXT; + rb->DepthBits = 24; + rb->StencilBits = 8; + cpp = 4; + break; + default: + _mesa_problem(ctx, + "Unexpected format in intel_alloc_renderbuffer_storage"); + return GL_FALSE; + } + + radeonFlush(ctx); + + if (rrb->bo) + radeon_bo_unref(rrb->bo); + + + if (software_buffer) { + return _mesa_soft_renderbuffer_storage(ctx, rb, internalFormat, + width, height); + } + else { + uint32_t size = width * height * cpp; + uint32_t pitch = ((cpp * width + 63) & ~63) / cpp; + + fprintf(stderr,"Allocating %d x %d radeon RBO (pitch %d)\n", width, + height, pitch); + + rrb->pitch = pitch * cpp; + rrb->cpp = cpp; + rrb->bo = radeon_bo_open(radeon->radeonScreen->bom, + 0, + size, + 0, + RADEON_GEM_DOMAIN_VRAM, + 0); + rb->Width = width; + rb->Height = height; + return GL_TRUE; + } + +} + + +/** + * Called for each hardware renderbuffer when a _window_ is resized. + * Just update fields. + * Not used for user-created renderbuffers! + */ +static GLboolean +radeon_alloc_window_storage(GLcontext * ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, GLuint width, GLuint height) +{ + ASSERT(rb->Name == 0); + rb->Width = width; + rb->Height = height; + rb->_ActualFormat = internalFormat; + + return GL_TRUE; +} + + +static void +radeon_resize_buffers(GLcontext *ctx, struct gl_framebuffer *fb, + GLuint width, GLuint height) +{ + struct radeon_framebuffer *radeon_fb = (struct radeon_framebuffer*)fb; + int i; + + _mesa_resize_framebuffer(ctx, fb, width, height); + + fb->Initialized = GL_TRUE; /* XXX remove someday */ + + if (fb->Name != 0) { + return; + } + + /* Make sure all window system renderbuffers are up to date */ + for (i = 0; i < 2; i++) { + struct gl_renderbuffer *rb = &radeon_fb->color_rb[i]->base; + + /* only resize if size is changing */ + if (rb && (rb->Width != width || rb->Height != height)) { + rb->AllocStorage(ctx, rb, rb->InternalFormat, width, height); + } + } +} + + +/** Dummy function for gl_renderbuffer::AllocStorage() */ +static GLboolean +radeon_nop_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, GLuint width, GLuint height) +{ + _mesa_problem(ctx, "radeon_op_alloc_storage should never be called."); + return GL_FALSE; +} + +struct radeon_renderbuffer * +radeon_create_renderbuffer(GLenum format, __DRIdrawablePrivate *driDrawPriv) +{ + struct radeon_renderbuffer *rrb; + + rrb = CALLOC_STRUCT(radeon_renderbuffer); + if (!rrb) + return NULL; + + _mesa_init_renderbuffer(&rrb->base, 0); + rrb->base.ClassID = RADEON_RB_CLASS; + + /* XXX format junk */ + switch (format) { + case GL_RGB5: + rrb->base._ActualFormat = GL_RGB5; + rrb->base._BaseFormat = GL_RGBA; + rrb->base.RedBits = 5; + rrb->base.GreenBits = 6; + rrb->base.BlueBits = 5; + rrb->base.DataType = GL_UNSIGNED_BYTE; + break; + case GL_RGB8: + rrb->base._ActualFormat = GL_RGB8; + rrb->base._BaseFormat = GL_RGB; + rrb->base.RedBits = 8; + rrb->base.GreenBits = 8; + rrb->base.BlueBits = 8; + rrb->base.AlphaBits = 8; + rrb->base.DataType = GL_UNSIGNED_BYTE; + break; + case GL_RGBA8: + rrb->base._ActualFormat = GL_RGBA8; + rrb->base._BaseFormat = GL_RGBA; + rrb->base.RedBits = 8; + rrb->base.GreenBits = 8; + rrb->base.BlueBits = 8; + rrb->base.AlphaBits = 8; + rrb->base.DataType = GL_UNSIGNED_BYTE; + break; + case GL_STENCIL_INDEX8_EXT: + rrb->base._ActualFormat = GL_STENCIL_INDEX8_EXT; + rrb->base._BaseFormat = GL_STENCIL_INDEX; + rrb->base.StencilBits = 8; + rrb->base.DataType = GL_UNSIGNED_BYTE; + break; + case GL_DEPTH_COMPONENT16: + rrb->base._ActualFormat = GL_DEPTH_COMPONENT16; + rrb->base._BaseFormat = GL_DEPTH_COMPONENT; + rrb->base.DepthBits = 16; + rrb->base.DataType = GL_UNSIGNED_SHORT; + break; + case GL_DEPTH_COMPONENT24: + rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rrb->base._BaseFormat = GL_DEPTH_COMPONENT; + rrb->base.DepthBits = 24; + rrb->base.DataType = GL_UNSIGNED_INT; + break; + case GL_DEPTH24_STENCIL8_EXT: + rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rrb->base._BaseFormat = GL_DEPTH_STENCIL_EXT; + rrb->base.DepthBits = 24; + rrb->base.StencilBits = 8; + rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT; + break; + default: + fprintf(stderr, "%s: Unknown format 0x%04x\n", __FUNCTION__, format); + _mesa_delete_renderbuffer(&rrb->base); + return NULL; + } + + rrb->dPriv = driDrawPriv; + rrb->base.InternalFormat = format; + + rrb->base.Delete = radeon_delete_renderbuffer; + rrb->base.AllocStorage = radeon_alloc_window_storage; + rrb->base.GetPointer = radeon_get_pointer; + + rrb->bo = NULL; + return rrb; +} + +static struct gl_renderbuffer * +radeon_new_renderbuffer(GLcontext * ctx, GLuint name) +{ + struct radeon_renderbuffer *rrb; + + rrb = CALLOC_STRUCT(radeon_renderbuffer); + if (!rrb) + return NULL; + + _mesa_init_renderbuffer(&rrb->base, name); + rrb->base.ClassID = RADEON_RB_CLASS; + + rrb->base.Delete = radeon_delete_renderbuffer; + rrb->base.AllocStorage = radeon_alloc_renderbuffer_storage; + rrb->base.GetPointer = radeon_get_pointer; + + return &rrb->base; +} + +static void +radeon_bind_framebuffer(GLcontext * ctx, GLenum target, + struct gl_framebuffer *fb, struct gl_framebuffer *fbread) +{ + if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) { + radeon_draw_buffer(ctx, fb); + } + else { + /* don't need to do anything if target == GL_READ_FRAMEBUFFER_EXT */ + } +} + +static void +radeon_framebuffer_renderbuffer(GLcontext * ctx, + struct gl_framebuffer *fb, + GLenum attachment, struct gl_renderbuffer *rb) +{ + + radeonFlush(ctx); + + _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); + radeon_draw_buffer(ctx, fb); +} + + +static GLboolean +radeon_update_wrapper(GLcontext *ctx, struct radeon_renderbuffer *rrb, + struct gl_texture_image *texImage) +{ + int retry = 0; +restart: + if (texImage->TexFormat == &_mesa_texformat_argb8888) { + rrb->cpp = 4; + rrb->base._ActualFormat = GL_RGBA8; + rrb->base._BaseFormat = GL_RGBA; + rrb->base.DataType = GL_UNSIGNED_BYTE; + DBG("Render to RGBA8 texture OK\n"); + } + else if (texImage->TexFormat == &_mesa_texformat_rgb565) { + rrb->cpp = 2; + rrb->base._ActualFormat = GL_RGB5; + rrb->base._BaseFormat = GL_RGB; + rrb->base.DataType = GL_UNSIGNED_SHORT; + DBG("Render to RGB5 texture OK\n"); + } + else if (texImage->TexFormat == &_mesa_texformat_z16) { + rrb->cpp = 2; + rrb->base._ActualFormat = GL_DEPTH_COMPONENT16; + rrb->base._BaseFormat = GL_DEPTH_COMPONENT; + rrb->base.DataType = GL_UNSIGNED_SHORT; + DBG("Render to DEPTH16 texture OK\n"); + } + else if (texImage->TexFormat == &_mesa_texformat_s8_z24) { + rrb->cpp = 4; + rrb->base._ActualFormat = GL_DEPTH24_STENCIL8_EXT; + rrb->base._BaseFormat = GL_DEPTH_STENCIL_EXT; + rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT; + DBG("Render to DEPTH_STENCIL texture OK\n"); + } + else { + /* try redoing the FBO */ + if (retry == 1) { + DBG("Render to texture BAD FORMAT %d\n", + texImage->TexFormat->MesaFormat); + return GL_FALSE; + } + texImage->TexFormat = radeonChooseTextureFormat(ctx, texImage->InternalFormat, 0, + texImage->TexFormat->DataType, + 1); + + retry++; + goto restart; + } + + rrb->pitch = texImage->Width * rrb->cpp; + rrb->base.InternalFormat = rrb->base._ActualFormat; + rrb->base.Width = texImage->Width; + rrb->base.Height = texImage->Height; + rrb->base.RedBits = texImage->TexFormat->RedBits; + rrb->base.GreenBits = texImage->TexFormat->GreenBits; + rrb->base.BlueBits = texImage->TexFormat->BlueBits; + rrb->base.AlphaBits = texImage->TexFormat->AlphaBits; + rrb->base.DepthBits = texImage->TexFormat->DepthBits; + + rrb->base.Delete = radeon_delete_renderbuffer; + rrb->base.AllocStorage = radeon_nop_alloc_storage; + + return GL_TRUE; +} + + +static struct radeon_renderbuffer * +radeon_wrap_texture(GLcontext * ctx, struct gl_texture_image *texImage) +{ + const GLuint name = ~0; /* not significant, but distinct for debugging */ + struct radeon_renderbuffer *rrb; + + /* make an radeon_renderbuffer to wrap the texture image */ + rrb = CALLOC_STRUCT(radeon_renderbuffer); + if (!rrb) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glFramebufferTexture"); + return NULL; + } + + _mesa_init_renderbuffer(&rrb->base, name); + rrb->base.ClassID = RADEON_RB_CLASS; + + if (!radeon_update_wrapper(ctx, rrb, texImage)) { + _mesa_free(rrb); + return NULL; + } + + return rrb; + +} +static void +radeon_render_texture(GLcontext * ctx, + struct gl_framebuffer *fb, + struct gl_renderbuffer_attachment *att) +{ + struct gl_texture_image *newImage + = att->Texture->Image[att->CubeMapFace][att->TextureLevel]; + struct radeon_renderbuffer *rrb = radeon_renderbuffer(att->Renderbuffer); + radeon_texture_image *radeon_image; + GLuint imageOffset; + + (void) fb; + + ASSERT(newImage); + + if (newImage->Border != 0) { + /* Fallback on drawing to a texture with a border, which won't have a + * miptree. + */ + _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); + _mesa_render_texture(ctx, fb, att); + return; + } + else if (!rrb) { + rrb = radeon_wrap_texture(ctx, newImage); + if (rrb) { + /* bind the wrapper to the attachment point */ + _mesa_reference_renderbuffer(&att->Renderbuffer, &rrb->base); + } + else { + /* fallback to software rendering */ + _mesa_render_texture(ctx, fb, att); + return; + } + } + + if (!radeon_update_wrapper(ctx, rrb, newImage)) { + _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); + _mesa_render_texture(ctx, fb, att); + return; + } + + DBG("Begin render texture tid %x tex=%u w=%d h=%d refcount=%d\n", + _glthread_GetID(), + att->Texture->Name, newImage->Width, newImage->Height, + rrb->base.RefCount); + + /* point the renderbufer's region to the texture image region */ + radeon_image = (radeon_texture_image *)newImage; + if (rrb->bo != radeon_image->mt->bo) { + if (rrb->bo) + radeon_bo_unref(rrb->bo); + rrb->bo = radeon_image->mt->bo; + radeon_bo_ref(rrb->bo); + } + + /* compute offset of the particular 2D image within the texture region */ + imageOffset = radeon_miptree_image_offset(radeon_image->mt, + att->CubeMapFace, + att->TextureLevel); + + if (att->Texture->Target == GL_TEXTURE_3D) { + GLuint offsets[6]; + radeon_miptree_depth_offsets(radeon_image->mt, att->TextureLevel, + offsets); + imageOffset += offsets[att->Zoffset]; + } + + /* store that offset in the region */ + rrb->draw_offset = imageOffset; + + /* update drawing region, etc */ + radeon_draw_buffer(ctx, fb); +} + +static void +radeon_finish_render_texture(GLcontext * ctx, + struct gl_renderbuffer_attachment *att) +{ + +} +static void +radeon_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) +{ +} + +static void +radeon_blit_framebuffer(GLcontext *ctx, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ +} + +void radeon_fbo_init(struct radeon_context *radeon) +{ + radeon->glCtx->Driver.NewFramebuffer = radeon_new_framebuffer; + radeon->glCtx->Driver.NewRenderbuffer = radeon_new_renderbuffer; + radeon->glCtx->Driver.BindFramebuffer = radeon_bind_framebuffer; + radeon->glCtx->Driver.FramebufferRenderbuffer = radeon_framebuffer_renderbuffer; + radeon->glCtx->Driver.RenderTexture = radeon_render_texture; + radeon->glCtx->Driver.FinishRenderTexture = radeon_finish_render_texture; + radeon->glCtx->Driver.ResizeBuffers = radeon_resize_buffers; + radeon->glCtx->Driver.ValidateFramebuffer = radeon_validate_framebuffer; + radeon->glCtx->Driver.BlitFramebuffer = radeon_blit_framebuffer; +} + + +void radeon_renderbuffer_set_bo(struct radeon_renderbuffer *rb, + struct radeon_bo *bo) +{ + struct radeon_bo *old; + old = rb->bo; + rb->bo = bo; + radeon_bo_ref(bo); + if (old) + radeon_bo_unref(old); +} diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.c b/src/mesa/drivers/dri/radeon/radeon_ioctl.c index 09acf6b4f8..01c45df2df 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.c +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.c @@ -35,7 +35,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include <sched.h> -#include <errno.h> +#include <errno.h> + +#include "main/attrib.h" +#include "main/enable.h" +#include "main/blend.h" +#include "main/bufferobj.h" +#include "main/buffers.h" +#include "main/depth.h" +#include "main/shaders.h" +#include "main/texstate.h" +#include "main/varray.h" +#include "glapi/dispatch.h" +#include "swrast/swrast.h" +#include "main/stencil.h" +#include "main/matrix.h" #include "main/glheader.h" #include "main/imports.h" @@ -43,6 +57,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast/swrast.h" #include "radeon_context.h" +#include "radeon_common.h" #include "radeon_state.h" #include "radeon_ioctl.h" #include "radeon_tcl.h" @@ -58,75 +73,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define RADEON_IDLE_RETRY 16 -static void radeonWaitForIdle( radeonContextPtr rmesa ); -static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, - const char * caller ); - -static void print_state_atom( struct radeon_state_atom *state ) -{ - int i; - - fprintf(stderr, "emit %s/%d\n", state->name, state->cmd_size); - - if (RADEON_DEBUG & DEBUG_VERBOSE) - for (i = 0 ; i < state->cmd_size ; i++) - fprintf(stderr, "\t%s[%d]: %x\n", state->name, i, state->cmd[i]); - -} - -static void radeonSaveHwState( radeonContextPtr rmesa ) -{ - struct radeon_state_atom *atom; - char * dest = rmesa->backup_store.cmd_buf; - - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "%s\n", __FUNCTION__); - - rmesa->backup_store.cmd_used = 0; - - foreach( atom, &rmesa->hw.atomlist ) { - if ( atom->check( rmesa->glCtx ) ) { - int size = atom->cmd_size * 4; - memcpy( dest, atom->cmd, size); - dest += size; - rmesa->backup_store.cmd_used += size; - if (RADEON_DEBUG & DEBUG_STATE) - print_state_atom( atom ); - } - } - - assert( rmesa->backup_store.cmd_used <= RADEON_CMD_BUF_SZ ); - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "Returning to radeonEmitState\n"); -} - -/* At this point we were in FlushCmdBufLocked but we had lost our context, so - * we need to unwire our current cmdbuf, hook the one with the saved state in - * it, flush it, and then put the current one back. This is so commands at the - * start of a cmdbuf can rely on the state being kept from the previous one. - */ -static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa ) -{ - GLuint nr_released_bufs; - struct radeon_store saved_store; - - if (rmesa->backup_store.cmd_used == 0) - return; - - if (RADEON_DEBUG & DEBUG_STATE) - fprintf(stderr, "Emitting backup state on lost context\n"); - - rmesa->lost_context = GL_FALSE; - - nr_released_bufs = rmesa->dma.nr_released_bufs; - saved_store = rmesa->store; - rmesa->dma.nr_released_bufs = 0; - rmesa->store = rmesa->backup_store; - radeonFlushCmdBufLocked( rmesa, __FUNCTION__ ); - rmesa->dma.nr_released_bufs = nr_released_bufs; - rmesa->store = saved_store; -} - /* ============================================================= * Kernel command buffer handling */ @@ -134,965 +80,386 @@ static void radeonBackUpAndEmitLostStateLocked( radeonContextPtr rmesa ) /* The state atoms will be emitted in the order they appear in the atom list, * so this step is important. */ -void radeonSetUpAtomList( radeonContextPtr rmesa ) +void radeonSetUpAtomList( r100ContextPtr rmesa ) { - int i, mtu = rmesa->glCtx->Const.MaxTextureUnits; - - make_empty_list(&rmesa->hw.atomlist); - rmesa->hw.atomlist.name = "atom-list"; - - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ctx); - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.set); - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lin); - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msk); - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.vpt); - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tcl); - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.msc); + int i, mtu = rmesa->radeon.glCtx->Const.MaxTextureUnits; + + make_empty_list(&rmesa->radeon.hw.atomlist); + rmesa->radeon.hw.atomlist.name = "atom-list"; + + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ctx); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.set); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lin); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msk); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.vpt); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tcl); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.msc); for (i = 0; i < mtu; ++i) { - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.tex[i]); - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.txr[i]); - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.cube[i]); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.tex[i]); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.txr[i]); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.cube[i]); } - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.zbs); - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mtl); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.zbs); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mtl); for (i = 0; i < 3 + mtu; ++i) - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.mat[i]); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.mat[i]); for (i = 0; i < 8; ++i) - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.lit[i]); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.lit[i]); for (i = 0; i < 6; ++i) - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.ucp[i]); - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.eye); - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.grd); - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.fog); - insert_at_tail(&rmesa->hw.atomlist, &rmesa->hw.glt); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.ucp[i]); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.eye); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.grd); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.fog); + insert_at_tail(&rmesa->radeon.hw.atomlist, &rmesa->hw.glt); } -void radeonEmitState( radeonContextPtr rmesa ) +void radeonEmitScissor(r100ContextPtr rmesa) { - struct radeon_state_atom *atom; - char *dest; - - if (RADEON_DEBUG & (DEBUG_STATE|DEBUG_PRIMS)) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (rmesa->save_on_next_emit) { - radeonSaveHwState(rmesa); - rmesa->save_on_next_emit = GL_FALSE; - } - - /* this code used to return here but now it emits zbs */ - - /* To avoid going across the entire set of states multiple times, just check - * for enough space for the case of emitting all state, and inline the - * radeonAllocCmdBuf code here without all the checks. - */ - radeonEnsureCmdBufSpace(rmesa, rmesa->hw.max_state_size); - dest = rmesa->store.cmd_buf + rmesa->store.cmd_used; - - /* We always always emit zbs, this is due to a bug found by keithw in - the hardware and rediscovered after Erics changes by me. - if you ever touch this code make sure you emit zbs otherwise - you get tcl lockups on at least M7/7500 class of chips - airlied */ - rmesa->hw.zbs.dirty=1; - - if (RADEON_DEBUG & DEBUG_STATE) { - foreach(atom, &rmesa->hw.atomlist) { - if (atom->dirty || rmesa->hw.all_dirty) { - if (atom->check(rmesa->glCtx)) - print_state_atom(atom); - else - fprintf(stderr, "skip state %s\n", atom->name); - } - } - } - - foreach(atom, &rmesa->hw.atomlist) { - if (rmesa->hw.all_dirty) - atom->dirty = GL_TRUE; - if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) && - atom->is_tcl) - atom->dirty = GL_FALSE; - if (atom->dirty) { - if (atom->check(rmesa->glCtx)) { - int size = atom->cmd_size * 4; - memcpy(dest, atom->cmd, size); - dest += size; - rmesa->store.cmd_used += size; - atom->dirty = GL_FALSE; - } - } - } - - assert(rmesa->store.cmd_used <= RADEON_CMD_BUF_SZ); - - rmesa->hw.is_dirty = GL_FALSE; - rmesa->hw.all_dirty = GL_FALSE; + BATCH_LOCALS(&rmesa->radeon); + if (!rmesa->radeon.radeonScreen->kernel_mm) { + return; + } + if (rmesa->radeon.state.scissor.enabled) { + BEGIN_BATCH(6); + OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0)); + OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] | RADEON_SCISSOR_ENABLE); + OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0)); + OUT_BATCH((rmesa->radeon.state.scissor.rect.y1 << 16) | + rmesa->radeon.state.scissor.rect.x1); + OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0)); + OUT_BATCH(((rmesa->radeon.state.scissor.rect.y2 - 1) << 16) | + (rmesa->radeon.state.scissor.rect.x2 - 1)); + END_BATCH(); + } else { + BEGIN_BATCH(2); + OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 0)); + OUT_BATCH(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & ~RADEON_SCISSOR_ENABLE); + END_BATCH(); + } } /* Fire a section of the retained (indexed_verts) buffer as a regular - * primtive. + * primtive. */ -extern void radeonEmitVbufPrim( radeonContextPtr rmesa, +extern void radeonEmitVbufPrim( r100ContextPtr rmesa, GLuint vertex_format, GLuint primitive, GLuint vertex_nr ) { - drm_radeon_cmd_header_t *cmd; - + BATCH_LOCALS(&rmesa->radeon); assert(!(primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); - - radeonEmitState( rmesa ); - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s cmd_used/4: %d\n", __FUNCTION__, - rmesa->store.cmd_used/4); - - cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VBUF_BUFSZ, - __FUNCTION__ ); + radeonEmitState(&rmesa->radeon); + radeonEmitScissor(rmesa); + #if RADEON_OLD_PACKETS - cmd[0].i = 0; - cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; - cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM | (3 << 16); - cmd[2].i = rmesa->ioctl.vertex_offset; - cmd[3].i = vertex_nr; - cmd[4].i = vertex_format; - cmd[5].i = (primitive | - RADEON_CP_VC_CNTL_PRIM_WALK_LIST | - RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | - RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | - (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT)); + BEGIN_BATCH(8); + OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 3); + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + } else { + OUT_BATCH(rmesa->ioctl.vertex_offset); + } - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s: header 0x%x offt 0x%x vfmt 0x%x vfcntl %x \n", - __FUNCTION__, - cmd[1].i, cmd[2].i, cmd[4].i, cmd[5].i); -#else - cmd[0].i = 0; - cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; - cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_VBUF | (1 << 16); - cmd[2].i = vertex_format; - cmd[3].i = (primitive | - RADEON_CP_VC_CNTL_PRIM_WALK_LIST | - RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | - RADEON_CP_VC_CNTL_MAOS_ENABLE | - RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | - (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT)); + OUT_BATCH(vertex_nr); + OUT_BATCH(vertex_format); + OUT_BATCH(primitive | RADEON_CP_VC_CNTL_PRIM_WALK_LIST | + RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | + (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT)); + + if (rmesa->radeon.radeonScreen->kernel_mm) { + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->ioctl.bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } + END_BATCH(); - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s: header 0x%x vfmt 0x%x vfcntl %x \n", - __FUNCTION__, - cmd[1].i, cmd[2].i, cmd[3].i); +#else + BEGIN_BATCH(4); + OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_DRAW_VBUF, 1); + OUT_BATCH(vertex_format); + OUT_BATCH(primitive | + RADEON_CP_VC_CNTL_PRIM_WALK_LIST | + RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | + RADEON_CP_VC_CNTL_MAOS_ENABLE | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | + (vertex_nr << RADEON_CP_VC_CNTL_NUM_SHIFT)); + END_BATCH(); #endif } - -void radeonFlushElts( radeonContextPtr rmesa ) +void radeonFlushElts( GLcontext *ctx ) { - int *cmd = (int *)(rmesa->store.cmd_buf + rmesa->store.elts_start); - int dwords; -#if RADEON_OLD_PACKETS - int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 24)) / 2; -#else - int nr = (rmesa->store.cmd_used - (rmesa->store.elts_start + 16)) / 2; -#endif + r100ContextPtr rmesa = R100_CONTEXT(ctx); + BATCH_LOCALS(&rmesa->radeon); + int nr; + uint32_t *cmd = (uint32_t *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_start); + int dwords = (rmesa->radeon.cmdbuf.cs->section_ndw - rmesa->radeon.cmdbuf.cs->section_cdw); if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); - assert( rmesa->dma.flush == radeonFlushElts ); - rmesa->dma.flush = NULL; + assert( rmesa->radeon.dma.flush == radeonFlushElts ); + rmesa->radeon.dma.flush = NULL; - /* Cope with odd number of elts: - */ - rmesa->store.cmd_used = (rmesa->store.cmd_used + 2) & ~2; - dwords = (rmesa->store.cmd_used - rmesa->store.elts_start) / 4; + nr = rmesa->tcl.elt_used; + +#if RADEON_OLD_PACKETS + if (rmesa->radeon.radeonScreen->kernel_mm) { + dwords -= 2; + } +#endif #if RADEON_OLD_PACKETS - cmd[1] |= (dwords - 3) << 16; + cmd[1] |= (dwords + 3) << 16; cmd[5] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT; #else - cmd[1] |= (dwords - 3) << 16; + cmd[1] |= (dwords + 2) << 16; cmd[3] |= nr << RADEON_CP_VC_CNTL_NUM_SHIFT; #endif + rmesa->radeon.cmdbuf.cs->cdw += dwords; + rmesa->radeon.cmdbuf.cs->section_cdw += dwords; + +#if RADEON_OLD_PACKETS + if (rmesa->radeon.radeonScreen->kernel_mm) { + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->ioctl.bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + } +#endif + + END_BATCH(); + if (RADEON_DEBUG & DEBUG_SYNC) { fprintf(stderr, "%s: Syncing\n", __FUNCTION__); - radeonFinish( rmesa->glCtx ); + radeonFinish( rmesa->radeon.glCtx ); } -} +} -GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa, +GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, GLuint vertex_format, GLuint primitive, GLuint min_nr ) { - drm_radeon_cmd_header_t *cmd; GLushort *retval; + int align_min_nr; + BATCH_LOCALS(&rmesa->radeon); if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %d\n", __FUNCTION__, min_nr); + fprintf(stderr, "%s %d prim %x\n", __FUNCTION__, min_nr, primitive); assert((primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); - - radeonEmitState( rmesa ); - - cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, - ELTS_BUFSZ(min_nr), - __FUNCTION__ ); + + radeonEmitState(&rmesa->radeon); + radeonEmitScissor(rmesa); + + rmesa->tcl.elt_cmd_start = rmesa->radeon.cmdbuf.cs->cdw; + + /* round up min_nr to align the state */ + align_min_nr = (min_nr + 1) & ~1; + #if RADEON_OLD_PACKETS - cmd[0].i = 0; - cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; - cmd[1].i = RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM; - cmd[2].i = rmesa->ioctl.vertex_offset; - cmd[3].i = 0xffff; - cmd[4].i = vertex_format; - cmd[5].i = (primitive | - RADEON_CP_VC_CNTL_PRIM_WALK_IND | - RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | - RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); - - retval = (GLushort *)(cmd+6); -#else - cmd[0].i = 0; - cmd[0].header.cmd_type = RADEON_CMD_PACKET3_CLIP; - cmd[1].i = RADEON_CP_PACKET3_3D_DRAW_INDX; - cmd[2].i = vertex_format; - cmd[3].i = (primitive | - RADEON_CP_VC_CNTL_PRIM_WALK_IND | - RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | - RADEON_CP_VC_CNTL_MAOS_ENABLE | - RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); - - retval = (GLushort *)(cmd+4); + BEGIN_BATCH_NO_AUTOSTATE(2+ELTS_BUFSZ(align_min_nr)/4); + OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_3D_RNDR_GEN_INDX_PRIM, 0); + if (!rmesa->radeon.radeonScreen->kernel_mm) { + OUT_BATCH_RELOC(rmesa->ioctl.vertex_offset, rmesa->ioctl.bo, rmesa->ioctl.vertex_offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + } else { + OUT_BATCH(rmesa->ioctl.vertex_offset); + } + OUT_BATCH(0xffff); + OUT_BATCH(vertex_format); + OUT_BATCH(primitive | + RADEON_CP_VC_CNTL_PRIM_WALK_IND | + RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); +#else + BEGIN_BATCH_NO_AUTOSTATE(ELTS_BUFSZ(align_min_nr)/4); + OUT_BATCH_PACKET3_CLIP(RADEON_CP_PACKET3_DRAW_INDX, 0); + OUT_BATCH(vertex_format); + OUT_BATCH(primitive | + RADEON_CP_VC_CNTL_PRIM_WALK_IND | + RADEON_CP_VC_CNTL_COLOR_ORDER_RGBA | + RADEON_CP_VC_CNTL_MAOS_ENABLE | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE); #endif - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s: header 0x%x vfmt 0x%x prim %x \n", - __FUNCTION__, - cmd[1].i, vertex_format, primitive); - assert(!rmesa->dma.flush); - rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; - rmesa->dma.flush = radeonFlushElts; + rmesa->tcl.elt_cmd_offset = rmesa->radeon.cmdbuf.cs->cdw; + rmesa->tcl.elt_used = min_nr; - rmesa->store.elts_start = ((char *)cmd) - rmesa->store.cmd_buf; + retval = (GLushort *)(rmesa->radeon.cmdbuf.cs->packets + rmesa->tcl.elt_cmd_offset); - return retval; -} + if (RADEON_DEBUG & DEBUG_PRIMS) + fprintf(stderr, "%s: header prim %x \n", + __FUNCTION__, primitive); + assert(!rmesa->radeon.dma.flush); + rmesa->radeon.glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; + rmesa->radeon.dma.flush = radeonFlushElts; + return retval; +} -void radeonEmitVertexAOS( radeonContextPtr rmesa, +void radeonEmitVertexAOS( r100ContextPtr rmesa, GLuint vertex_size, + struct radeon_bo *bo, GLuint offset ) { #if RADEON_OLD_PACKETS - rmesa->ioctl.vertex_size = vertex_size; rmesa->ioctl.vertex_offset = offset; + rmesa->ioctl.bo = bo; #else - drm_radeon_cmd_header_t *cmd; + BATCH_LOCALS(&rmesa->radeon); if (RADEON_DEBUG & (DEBUG_PRIMS|DEBUG_IOCTL)) fprintf(stderr, "%s: vertex_size 0x%x offset 0x%x \n", __FUNCTION__, vertex_size, offset); - cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, VERT_AOS_BUFSZ, - __FUNCTION__ ); + BEGIN_BATCH(7); + OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, 2); + OUT_BATCH(1); + OUT_BATCH(vertex_size | (vertex_size << 8)); + OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); - cmd[0].i = 0; - cmd[0].header.cmd_type = RADEON_CMD_PACKET3; - cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (2 << 16); - cmd[2].i = 1; - cmd[3].i = vertex_size | (vertex_size << 8); - cmd[4].i = offset; #endif } - -void radeonEmitAOS( radeonContextPtr rmesa, - struct radeon_dma_region **component, + +void radeonEmitAOS( r100ContextPtr rmesa, GLuint nr, GLuint offset ) { #if RADEON_OLD_PACKETS assert( nr == 1 ); - assert( component[0]->aos_size == component[0]->aos_stride ); - rmesa->ioctl.vertex_size = component[0]->aos_size; - rmesa->ioctl.vertex_offset = - (component[0]->aos_start + offset * component[0]->aos_stride * 4); + rmesa->ioctl.bo = rmesa->radeon.tcl.aos[0].bo; + rmesa->ioctl.vertex_offset = + (rmesa->radeon.tcl.aos[0].offset + offset * rmesa->radeon.tcl.aos[0].stride * 4); #else - drm_radeon_cmd_header_t *cmd; - int sz = AOS_BUFSZ(nr); + BATCH_LOCALS(&rmesa->radeon); + uint32_t voffset; + // int sz = AOS_BUFSZ(nr); + int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2; int i; - int *tmp; if (RADEON_DEBUG & DEBUG_IOCTL) fprintf(stderr, "%s\n", __FUNCTION__); - - cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sz, - __FUNCTION__ ); - cmd[0].i = 0; - cmd[0].header.cmd_type = RADEON_CMD_PACKET3; - cmd[1].i = RADEON_CP_PACKET3_3D_LOAD_VBPNTR | (((sz / sizeof(int))-3) << 16); - cmd[2].i = nr; - tmp = &cmd[0].i; - cmd += 3; - - for (i = 0 ; i < nr ; i++) { - if (i & 1) { - cmd[0].i |= ((component[i]->aos_stride << 24) | - (component[i]->aos_size << 16)); - cmd[2].i = (component[i]->aos_start + - offset * component[i]->aos_stride * 4); - cmd += 3; + BEGIN_BATCH(sz+2+(nr * 2)); + OUT_BATCH_PACKET3(RADEON_CP_PACKET3_3D_LOAD_VBPNTR, sz - 1); + OUT_BATCH(nr); + + if (!rmesa->radeon.radeonScreen->kernel_mm) { + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) | + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[i].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[i+1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); } - else { - cmd[0].i = ((component[i]->aos_stride << 8) | - (component[i]->aos_size << 0)); - cmd[1].i = (component[i]->aos_start + - offset * component[i]->aos_stride * 4); - } - } - - if (RADEON_DEBUG & DEBUG_VERTS) { - fprintf(stderr, "%s:\n", __FUNCTION__); - for (i = 0 ; i < sz ; i++) - fprintf(stderr, " %d: %x\n", i, tmp[i]); - } -#endif -} - -/* using already shifted color_fmt! */ -void radeonEmitBlit( radeonContextPtr rmesa, /* FIXME: which drmMinor is required? */ - GLuint color_fmt, - GLuint src_pitch, - GLuint src_offset, - GLuint dst_pitch, - GLuint dst_offset, - GLint srcx, GLint srcy, - GLint dstx, GLint dsty, - GLuint w, GLuint h ) -{ - drm_radeon_cmd_header_t *cmd; - - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s src %x/%x %d,%d dst: %x/%x %d,%d sz: %dx%d\n", - __FUNCTION__, - src_pitch, src_offset, srcx, srcy, - dst_pitch, dst_offset, dstx, dsty, - w, h); - - assert( (src_pitch & 63) == 0 ); - assert( (dst_pitch & 63) == 0 ); - assert( (src_offset & 1023) == 0 ); - assert( (dst_offset & 1023) == 0 ); - assert( w < (1<<16) ); - assert( h < (1<<16) ); - - cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 8 * sizeof(int), - __FUNCTION__ ); - - - cmd[0].i = 0; - cmd[0].header.cmd_type = RADEON_CMD_PACKET3; - cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16); - cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL | - RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_NONE | - color_fmt | - RADEON_GMC_SRC_DATATYPE_COLOR | - RADEON_ROP3_S | - RADEON_DP_SRC_SOURCE_MEMORY | - RADEON_GMC_CLR_CMP_CNTL_DIS | - RADEON_GMC_WR_MSK_DIS ); - - cmd[3].i = ((src_pitch/64)<<22) | (src_offset >> 10); - cmd[4].i = ((dst_pitch/64)<<22) | (dst_offset >> 10); - cmd[5].i = (srcx << 16) | srcy; - cmd[6].i = (dstx << 16) | dsty; /* dst */ - cmd[7].i = (w << 16) | h; -} - - -void radeonEmitWait( radeonContextPtr rmesa, GLuint flags ) -{ - drm_radeon_cmd_header_t *cmd; - - assert( !(flags & ~(RADEON_WAIT_2D|RADEON_WAIT_3D)) ); - - cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, 1 * sizeof(int), - __FUNCTION__ ); - cmd[0].i = 0; - cmd[0].wait.cmd_type = RADEON_CMD_WAIT; - cmd[0].wait.flags = flags; -} - -static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, - const char * caller ) -{ - int ret, i; - drm_radeon_cmd_buffer_t cmd; - - if (rmesa->lost_context) - radeonBackUpAndEmitLostStateLocked(rmesa); - - if (RADEON_DEBUG & DEBUG_IOCTL) { - fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); - - if (RADEON_DEBUG & DEBUG_VERBOSE) - for (i = 0 ; i < rmesa->store.cmd_used ; i += 4 ) - fprintf(stderr, "%d: %x\n", i/4, - *(int *)(&rmesa->store.cmd_buf[i])); - } - - if (RADEON_DEBUG & DEBUG_DMA) - fprintf(stderr, "%s: Releasing %d buffers\n", __FUNCTION__, - rmesa->dma.nr_released_bufs); - - - if (RADEON_DEBUG & DEBUG_SANITY) { - if (rmesa->state.scissor.enabled) - ret = radeonSanityCmdBuffer( rmesa, - rmesa->state.scissor.numClipRects, - rmesa->state.scissor.pClipRects); - else - ret = radeonSanityCmdBuffer( rmesa, - rmesa->numClipRects, - rmesa->pClipRects); - if (ret) { - fprintf(stderr, "drmSanityCommandWrite: %d\n", ret); - goto out; + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + OUT_BATCH_RELOC(voffset, + rmesa->radeon.tcl.aos[nr - 1].bo, + voffset, + RADEON_GEM_DOMAIN_GTT, + 0, 0); } - } - - - cmd.bufsz = rmesa->store.cmd_used; - cmd.buf = rmesa->store.cmd_buf; - - if (rmesa->state.scissor.enabled) { - cmd.nbox = rmesa->state.scissor.numClipRects; - cmd.boxes = rmesa->state.scissor.pClipRects; } else { - cmd.nbox = rmesa->numClipRects; - cmd.boxes = rmesa->pClipRects; - } - - ret = drmCommandWrite( rmesa->dri.fd, - DRM_RADEON_CMDBUF, - &cmd, sizeof(cmd) ); - - if (ret) - fprintf(stderr, "drmCommandWrite: %d\n", ret); - - if (RADEON_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "\nSyncing in %s\n\n", __FUNCTION__); - radeonWaitForIdleLocked( rmesa ); - } - - out: - rmesa->store.primnr = 0; - rmesa->store.statenr = 0; - rmesa->store.cmd_used = 0; - rmesa->dma.nr_released_bufs = 0; - rmesa->save_on_next_emit = 1; - - return ret; -} - - -/* Note: does not emit any commands to avoid recursion on - * radeonAllocCmdBuf. - */ -void radeonFlushCmdBuf( radeonContextPtr rmesa, const char *caller ) -{ - int ret; - - - LOCK_HARDWARE( rmesa ); - - ret = radeonFlushCmdBufLocked( rmesa, caller ); - - UNLOCK_HARDWARE( rmesa ); - - if (ret) { - fprintf(stderr, "drm_radeon_cmd_buffer_t: %d (exiting)\n", ret); - exit(ret); - } -} - -/* ============================================================= - * Hardware vertex buffer handling - */ - - -void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa ) -{ - struct radeon_dma_buffer *dmabuf; - int fd = rmesa->dri.fd; - int index = 0; - int size = 0; - drmDMAReq dma; - int ret; - - if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) - fprintf(stderr, "%s\n", __FUNCTION__); - - if (rmesa->dma.flush) { - rmesa->dma.flush( rmesa ); - } - - if (rmesa->dma.current.buf) - radeonReleaseDmaRegion( rmesa, &rmesa->dma.current, __FUNCTION__ ); - - if (rmesa->dma.nr_released_bufs > 4) - radeonFlushCmdBuf( rmesa, __FUNCTION__ ); - - dma.context = rmesa->dri.hwContext; - dma.send_count = 0; - dma.send_list = NULL; - dma.send_sizes = NULL; - dma.flags = 0; - dma.request_count = 1; - dma.request_size = RADEON_BUFFER_SIZE; - dma.request_list = &index; - dma.request_sizes = &size; - dma.granted_count = 0; - - LOCK_HARDWARE(rmesa); /* no need to validate */ - - ret = drmDMA( fd, &dma ); - - if (ret != 0) { - /* Free some up this way? - */ - if (rmesa->dma.nr_released_bufs) { - radeonFlushCmdBufLocked( rmesa, __FUNCTION__ ); - } - - if (RADEON_DEBUG & DEBUG_DMA) - fprintf(stderr, "Waiting for buffers\n"); - - radeonWaitForIdleLocked( rmesa ); - ret = drmDMA( fd, &dma ); - - if ( ret != 0 ) { - UNLOCK_HARDWARE( rmesa ); - fprintf( stderr, "Error: Could not get dma buffer... exiting\n" ); - exit( -1 ); - } - } - - UNLOCK_HARDWARE(rmesa); - - if (RADEON_DEBUG & DEBUG_DMA) - fprintf(stderr, "Allocated buffer %d\n", index); - - dmabuf = CALLOC_STRUCT( radeon_dma_buffer ); - dmabuf->buf = &rmesa->radeonScreen->buffers->list[index]; - dmabuf->refcount = 1; - - rmesa->dma.current.buf = dmabuf; - rmesa->dma.current.address = dmabuf->buf->address; - rmesa->dma.current.end = dmabuf->buf->total; - rmesa->dma.current.start = 0; - rmesa->dma.current.ptr = 0; - - rmesa->c_vertexBuffers++; -} - -void radeonReleaseDmaRegion( radeonContextPtr rmesa, - struct radeon_dma_region *region, - const char *caller ) -{ - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); - - if (!region->buf) - return; - - if (rmesa->dma.flush) - rmesa->dma.flush( rmesa ); - - if (--region->buf->refcount == 0) { - drm_radeon_cmd_header_t *cmd; - - if (RADEON_DEBUG & (DEBUG_IOCTL|DEBUG_DMA)) - fprintf(stderr, "%s -- DISCARD BUF %d\n", __FUNCTION__, - region->buf->buf->idx); - - cmd = (drm_radeon_cmd_header_t *)radeonAllocCmdBuf( rmesa, sizeof(*cmd), - __FUNCTION__ ); - cmd->dma.cmd_type = RADEON_CMD_DMA_DISCARD; - cmd->dma.buf_idx = region->buf->buf->idx; - FREE(region->buf); - rmesa->dma.nr_released_bufs++; - } - - region->buf = NULL; - region->start = 0; -} - -/* Allocates a region from rmesa->dma.current. If there isn't enough - * space in current, grab a new buffer (and discard what was left of current) - */ -void radeonAllocDmaRegion( radeonContextPtr rmesa, - struct radeon_dma_region *region, - int bytes, - int alignment ) -{ - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s %d\n", __FUNCTION__, bytes); - - if (rmesa->dma.flush) - rmesa->dma.flush( rmesa ); - - if (region->buf) - radeonReleaseDmaRegion( rmesa, region, __FUNCTION__ ); - - alignment--; - rmesa->dma.current.start = rmesa->dma.current.ptr = - (rmesa->dma.current.ptr + alignment) & ~alignment; - - if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) - radeonRefillCurrentDmaRegion( rmesa ); - - region->start = rmesa->dma.current.start; - region->ptr = rmesa->dma.current.start; - region->end = rmesa->dma.current.start + bytes; - region->address = rmesa->dma.current.address; - region->buf = rmesa->dma.current.buf; - region->buf->refcount++; - - rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */ - rmesa->dma.current.start = - rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7; -} - -/* ================================================================ - * SwapBuffers with client-side throttling - */ - -static uint32_t radeonGetLastFrame (radeonContextPtr rmesa) -{ - drm_radeon_getparam_t gp; - int ret; - uint32_t frame; - - gp.param = RADEON_PARAM_LAST_FRAME; - gp.value = (int *)&frame; - ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_GETPARAM, - &gp, sizeof(gp) ); - - if ( ret ) { - fprintf( stderr, "%s: drm_radeon_getparam_t: %d\n", __FUNCTION__, ret ); - exit(1); - } - - return frame; -} - -static void radeonEmitIrqLocked( radeonContextPtr rmesa ) -{ - drm_radeon_irq_emit_t ie; - int ret; - - ie.irq_seq = &rmesa->iw.irq_seq; - ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_IRQ_EMIT, - &ie, sizeof(ie) ); - if ( ret ) { - fprintf( stderr, "%s: drm_radeon_irq_emit_t: %d\n", __FUNCTION__, ret ); - exit(1); - } -} - - -static void radeonWaitIrq( radeonContextPtr rmesa ) -{ - int ret; - - do { - ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_IRQ_WAIT, - &rmesa->iw, sizeof(rmesa->iw) ); - } while (ret && (errno == EINTR || errno == EBUSY)); - - if ( ret ) { - fprintf( stderr, "%s: drmRadeonIrqWait: %d\n", __FUNCTION__, ret ); - exit(1); - } -} - - -static void radeonWaitForFrameCompletion( radeonContextPtr rmesa ) -{ - drm_radeon_sarea_t *sarea = rmesa->sarea; - - if (rmesa->do_irqs) { - if (radeonGetLastFrame(rmesa) < sarea->last_frame) { - if (!rmesa->irqsEmitted) { - while (radeonGetLastFrame (rmesa) < sarea->last_frame) - ; - } - else { - UNLOCK_HARDWARE( rmesa ); - radeonWaitIrq( rmesa ); - LOCK_HARDWARE( rmesa ); - } - rmesa->irqsEmitted = 10; + for (i = 0; i + 1 < nr; i += 2) { + OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) | + (rmesa->radeon.tcl.aos[i].stride << 8) | + (rmesa->radeon.tcl.aos[i + 1].components << 16) | + (rmesa->radeon.tcl.aos[i + 1].stride << 24)); + + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + OUT_BATCH(voffset); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + OUT_BATCH(voffset); } - if (rmesa->irqsEmitted) { - radeonEmitIrqLocked( rmesa ); - rmesa->irqsEmitted--; + if (nr & 1) { + OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) | + (rmesa->radeon.tcl.aos[nr - 1].stride << 8)); + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + OUT_BATCH(voffset); } - } - else { - while (radeonGetLastFrame (rmesa) < sarea->last_frame) { - UNLOCK_HARDWARE( rmesa ); - if (rmesa->do_usleeps) - DO_USLEEP( 1 ); - LOCK_HARDWARE( rmesa ); + for (i = 0; i + 1 < nr; i += 2) { + voffset = rmesa->radeon.tcl.aos[i + 0].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[i+0].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); + voffset = rmesa->radeon.tcl.aos[i + 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[i+1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); } - } -} - -/* Copy the back color buffer to the front color buffer. - */ -void radeonCopyBuffer( __DRIdrawablePrivate *dPriv, - const drm_clip_rect_t *rect) -{ - radeonContextPtr rmesa; - GLint nbox, i, ret; - GLboolean missed_target; - int64_t ust; - __DRIscreenPrivate *psp; - - assert(dPriv); - assert(dPriv->driContextPriv); - assert(dPriv->driContextPriv->driverPrivate); - - rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; - - if ( RADEON_DEBUG & DEBUG_IOCTL ) { - fprintf( stderr, "\n%s( %p )\n\n", __FUNCTION__, (void *) rmesa->glCtx ); - } - - RADEON_FIREVERTICES( rmesa ); - LOCK_HARDWARE( rmesa ); - - /* Throttle the frame rate -- only allow one pending swap buffers - * request at a time. - */ - radeonWaitForFrameCompletion( rmesa ); - if (!rect) - { - UNLOCK_HARDWARE( rmesa ); - driWaitForVBlank( dPriv, & missed_target ); - LOCK_HARDWARE( rmesa ); - } - - nbox = dPriv->numClipRects; /* must be in locked region */ - - for ( i = 0 ; i < nbox ; ) { - GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS , nbox ); - drm_clip_rect_t *box = dPriv->pClipRects; - drm_clip_rect_t *b = rmesa->sarea->boxes; - GLint n = 0; - - for ( ; i < nr ; i++ ) { - - *b = box[i]; - - if (rect) - { - if (rect->x1 > b->x1) - b->x1 = rect->x1; - if (rect->y1 > b->y1) - b->y1 = rect->y1; - if (rect->x2 < b->x2) - b->x2 = rect->x2; - if (rect->y2 < b->y2) - b->y2 = rect->y2; - - if (b->x1 >= b->x2 || b->y1 >= b->y2) - continue; - } - - b++; - n++; - } - rmesa->sarea->nbox = n; - - if (!n) - continue; - - ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_SWAP ); - - if ( ret ) { - fprintf( stderr, "DRM_RADEON_SWAP_BUFFERS: return = %d\n", ret ); - UNLOCK_HARDWARE( rmesa ); - exit( 1 ); + if (nr & 1) { + voffset = rmesa->radeon.tcl.aos[nr - 1].offset + + offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride; + radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs, + rmesa->radeon.tcl.aos[nr-1].bo, + RADEON_GEM_DOMAIN_GTT, + 0, 0); } } + END_BATCH(); - UNLOCK_HARDWARE( rmesa ); - if (!rect) - { - psp = dPriv->driScreenPriv; - rmesa->swap_count++; - (*psp->systemTime->getUST)( & ust ); - if ( missed_target ) { - rmesa->swap_missed_count++; - rmesa->swap_missed_ust = ust - rmesa->swap_ust; - } - - rmesa->swap_ust = ust; - rmesa->hw.all_dirty = GL_TRUE; - } -} - -void radeonPageFlip( __DRIdrawablePrivate *dPriv ) -{ - radeonContextPtr rmesa; - GLint ret; - GLboolean missed_target; - __DRIscreenPrivate *psp; - - assert(dPriv); - assert(dPriv->driContextPriv); - assert(dPriv->driContextPriv->driverPrivate); - - rmesa = (radeonContextPtr) dPriv->driContextPriv->driverPrivate; - psp = dPriv->driScreenPriv; - - if ( RADEON_DEBUG & DEBUG_IOCTL ) { - fprintf(stderr, "%s: pfCurrentPage: %d\n", __FUNCTION__, - rmesa->sarea->pfCurrentPage); - } - - RADEON_FIREVERTICES( rmesa ); - LOCK_HARDWARE( rmesa ); - - /* Need to do this for the perf box placement: - */ - if (dPriv->numClipRects) - { - drm_clip_rect_t *box = dPriv->pClipRects; - drm_clip_rect_t *b = rmesa->sarea->boxes; - b[0] = box[0]; - rmesa->sarea->nbox = 1; - } - - /* Throttle the frame rate -- only allow a few pending swap buffers - * request at a time. - */ - radeonWaitForFrameCompletion( rmesa ); - UNLOCK_HARDWARE( rmesa ); - driWaitForVBlank( dPriv, & missed_target ); - if ( missed_target ) { - rmesa->swap_missed_count++; - (void) (*psp->systemTime->getUST)( & rmesa->swap_missed_ust ); - } - LOCK_HARDWARE( rmesa ); - - ret = drmCommandNone( rmesa->dri.fd, DRM_RADEON_FLIP ); - - UNLOCK_HARDWARE( rmesa ); - - if ( ret ) { - fprintf( stderr, "DRM_RADEON_FLIP: return = %d\n", ret ); - exit( 1 ); - } - - rmesa->swap_count++; - (void) (*psp->systemTime->getUST)( & rmesa->swap_ust ); - - /* Get ready for drawing next frame. Update the renderbuffers' - * flippedOffset/Pitch fields so we draw into the right place. - */ - driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, - rmesa->sarea->pfCurrentPage); - - radeonUpdateDrawBuffer(rmesa->glCtx); +#endif } - /* ================================================================ * Buffer clear */ #define RADEON_MAX_CLEARS 256 -static void radeonClear( GLcontext *ctx, GLbitfield mask ) +static void radeonUserClear(GLcontext *ctx, GLuint mask) +{ + radeon_clear_tris(ctx, mask); +} + +static void radeonKernelClear(GLcontext *ctx, GLuint flags) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; - drm_radeon_sarea_t *sarea = rmesa->sarea; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + drm_radeon_sarea_t *sarea = rmesa->radeon.sarea; uint32_t clear; - GLuint flags = 0; - GLuint color_mask = 0; GLint ret, i; GLint cx, cy, cw, ch; - if ( RADEON_DEBUG & DEBUG_IOCTL ) { - fprintf( stderr, "radeonClear\n"); - } - - { - LOCK_HARDWARE( rmesa ); - UNLOCK_HARDWARE( rmesa ); - if ( dPriv->numClipRects == 0 ) - return; - } - - radeonFlush( ctx ); - - if ( mask & BUFFER_BIT_FRONT_LEFT ) { - flags |= RADEON_FRONT; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; - mask &= ~BUFFER_BIT_FRONT_LEFT; - } - - if ( mask & BUFFER_BIT_BACK_LEFT ) { - flags |= RADEON_BACK; - color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; - mask &= ~BUFFER_BIT_BACK_LEFT; - } - - if ( mask & BUFFER_BIT_DEPTH ) { - flags |= RADEON_DEPTH; - mask &= ~BUFFER_BIT_DEPTH; - } - - if ( (mask & BUFFER_BIT_STENCIL) && rmesa->state.stencil.hwBuffer ) { - flags |= RADEON_STENCIL; - mask &= ~BUFFER_BIT_STENCIL; - } - - if ( mask ) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask); - _swrast_Clear( ctx, mask ); - } - - if ( !flags ) - return; - - if (rmesa->using_hyperz) { - flags |= RADEON_USE_COMP_ZBUF; -/* if (rmesa->radeonScreen->chipset & RADEON_CHIPSET_TCL) - flags |= RADEON_USE_HIERZ; */ - if (!(rmesa->state.stencil.hwBuffer) || - ((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) && - ((rmesa->state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) { - flags |= RADEON_CLEAR_FASTZ; - } - } - - LOCK_HARDWARE( rmesa ); + LOCK_HARDWARE( &rmesa->radeon ); /* compute region after locking: */ cx = ctx->DrawBuffer->_Xmin; @@ -1112,7 +479,7 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) gp.param = RADEON_PARAM_LAST_CLEAR; gp.value = (int *)&clear; - ret = drmCommandWriteRead( rmesa->dri.fd, + ret = drmCommandWriteRead( rmesa->radeon.dri.fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp) ); if ( ret ) { @@ -1124,20 +491,20 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) break; } - if ( rmesa->do_usleeps ) { - UNLOCK_HARDWARE( rmesa ); + if ( rmesa->radeon.do_usleeps ) { + UNLOCK_HARDWARE( &rmesa->radeon ); DO_USLEEP( 1 ); - LOCK_HARDWARE( rmesa ); + LOCK_HARDWARE( &rmesa->radeon ); } } /* Send current state to the hardware */ - radeonFlushCmdBufLocked( rmesa, __FUNCTION__ ); + rcommonFlushCmdBufLocked( &rmesa->radeon, __FUNCTION__ ); for ( i = 0 ; i < dPriv->numClipRects ; ) { GLint nr = MIN2( i + RADEON_NR_SAREA_CLIPRECTS, dPriv->numClipRects ); drm_clip_rect_t *box = dPriv->pClipRects; - drm_clip_rect_t *b = rmesa->sarea->boxes; + drm_clip_rect_t *b = rmesa->radeon.sarea->boxes; drm_radeon_clear_t clear; drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS]; GLint n = 0; @@ -1172,105 +539,107 @@ static void radeonClear( GLcontext *ctx, GLbitfield mask ) } } - rmesa->sarea->nbox = n; + rmesa->radeon.sarea->nbox = n; clear.flags = flags; - clear.clear_color = rmesa->state.color.clear; - clear.clear_depth = rmesa->state.depth.clear; + clear.clear_color = rmesa->radeon.state.color.clear; + clear.clear_depth = rmesa->radeon.state.depth.clear; clear.color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; - clear.depth_mask = rmesa->state.stencil.clear; + clear.depth_mask = rmesa->radeon.state.stencil.clear; clear.depth_boxes = depth_boxes; n--; - b = rmesa->sarea->boxes; + b = rmesa->radeon.sarea->boxes; for ( ; n >= 0 ; n-- ) { depth_boxes[n].f[CLEAR_X1] = (float)b[n].x1; depth_boxes[n].f[CLEAR_Y1] = (float)b[n].y1; depth_boxes[n].f[CLEAR_X2] = (float)b[n].x2; depth_boxes[n].f[CLEAR_Y2] = (float)b[n].y2; - depth_boxes[n].f[CLEAR_DEPTH] = - (float)rmesa->state.depth.clear; + depth_boxes[n].f[CLEAR_DEPTH] = + (float)rmesa->radeon.state.depth.clear; } - ret = drmCommandWrite( rmesa->dri.fd, DRM_RADEON_CLEAR, + ret = drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_CLEAR, &clear, sizeof(drm_radeon_clear_t)); if ( ret ) { - UNLOCK_HARDWARE( rmesa ); + UNLOCK_HARDWARE( &rmesa->radeon ); fprintf( stderr, "DRM_RADEON_CLEAR: return = %d\n", ret ); exit( 1 ); } } - - UNLOCK_HARDWARE( rmesa ); - rmesa->hw.all_dirty = GL_TRUE; + UNLOCK_HARDWARE( &rmesa->radeon ); } - -void radeonWaitForIdleLocked( radeonContextPtr rmesa ) +static void radeonClear( GLcontext *ctx, GLbitfield mask ) { - int fd = rmesa->dri.fd; - int to = 0; - int ret, i = 0; - - rmesa->c_drawWaits++; - - do { - do { - ret = drmCommandNone( fd, DRM_RADEON_CP_IDLE); - } while ( ret && errno == EBUSY && i++ < RADEON_IDLE_RETRY ); - } while ( ( ret == -EBUSY ) && ( to++ < RADEON_TIMEOUT ) ); - - if ( ret < 0 ) { - UNLOCK_HARDWARE( rmesa ); - fprintf( stderr, "Error: Radeon timed out... exiting\n" ); - exit( -1 ); - } -} + r100ContextPtr rmesa = R100_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + GLuint flags = 0; + GLuint color_mask = 0; + GLuint orig_mask = mask; + + if ( RADEON_DEBUG & DEBUG_IOCTL ) { + fprintf( stderr, "radeonClear\n"); + } + { + LOCK_HARDWARE( &rmesa->radeon ); + UNLOCK_HARDWARE( &rmesa->radeon ); + if ( dPriv->numClipRects == 0 ) + return; + } -static void radeonWaitForIdle( radeonContextPtr rmesa ) -{ - LOCK_HARDWARE(rmesa); - radeonWaitForIdleLocked( rmesa ); - UNLOCK_HARDWARE(rmesa); -} + radeon_firevertices(&rmesa->radeon); + if ( mask & BUFFER_BIT_FRONT_LEFT ) { + flags |= RADEON_FRONT; + color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; + mask &= ~BUFFER_BIT_FRONT_LEFT; + } -void radeonFlush( GLcontext *ctx ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + if ( mask & BUFFER_BIT_BACK_LEFT ) { + flags |= RADEON_BACK; + color_mask = rmesa->hw.msk.cmd[MSK_RB3D_PLANEMASK]; + mask &= ~BUFFER_BIT_BACK_LEFT; + } - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); + if ( mask & BUFFER_BIT_DEPTH ) { + flags |= RADEON_DEPTH; + mask &= ~BUFFER_BIT_DEPTH; + } - if (rmesa->dma.flush) - rmesa->dma.flush( rmesa ); + if ( (mask & BUFFER_BIT_STENCIL) ) { + flags |= RADEON_STENCIL; + mask &= ~BUFFER_BIT_STENCIL; + } - radeonEmitState( rmesa ); - - if (rmesa->store.cmd_used) - radeonFlushCmdBuf( rmesa, __FUNCTION__ ); -} + if ( mask ) { + if (RADEON_DEBUG & DEBUG_FALLBACKS) + fprintf(stderr, "%s: swrast clear, mask: %x\n", __FUNCTION__, mask); + _swrast_Clear( ctx, mask ); + } -/* Make sure all commands have been sent to the hardware and have - * completed processing. - */ -void radeonFinish( GLcontext *ctx ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - radeonFlush( ctx ); - - if (rmesa->do_irqs) { - LOCK_HARDWARE( rmesa ); - radeonEmitIrqLocked( rmesa ); - UNLOCK_HARDWARE( rmesa ); - radeonWaitIrq( rmesa ); + if ( !flags ) + return; + + if (rmesa->using_hyperz) { + flags |= RADEON_USE_COMP_ZBUF; +/* if (rmesa->radeon.radeonScreen->chipset & RADEON_CHIPSET_TCL) + flags |= RADEON_USE_HIERZ; */ + if (((flags & RADEON_DEPTH) && (flags & RADEON_STENCIL) && + ((rmesa->radeon.state.stencil.clear & RADEON_STENCIL_WRITE_MASK) == RADEON_STENCIL_WRITE_MASK))) { + flags |= RADEON_CLEAR_FASTZ; + } } - else - radeonWaitForIdle( rmesa ); -} + if (rmesa->radeon.radeonScreen->kernel_mm) + radeonUserClear(ctx, orig_mask); + else { + radeonKernelClear(ctx, flags); + rmesa->radeon.hw.all_dirty = GL_TRUE; + } +} void radeonInitIoctlFuncs( GLcontext *ctx ) { diff --git a/src/mesa/drivers/dri/radeon/radeon_ioctl.h b/src/mesa/drivers/dri/radeon/radeon_ioctl.h index 4e3a44df07..18805d4c57 100644 --- a/src/mesa/drivers/dri/radeon/radeon_ioctl.h +++ b/src/mesa/drivers/dri/radeon/radeon_ioctl.h @@ -38,31 +38,32 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/simple_list.h" #include "radeon_lock.h" +#include "radeon_bocs_wrapper.h" - -extern void radeonEmitState( radeonContextPtr rmesa ); -extern void radeonEmitVertexAOS( radeonContextPtr rmesa, +extern void radeonEmitVertexAOS( r100ContextPtr rmesa, GLuint vertex_size, + struct radeon_bo *bo, GLuint offset ); -extern void radeonEmitVbufPrim( radeonContextPtr rmesa, +extern void radeonEmitVbufPrim( r100ContextPtr rmesa, GLuint vertex_format, GLuint primitive, GLuint vertex_nr ); -extern void radeonFlushElts( radeonContextPtr rmesa ); +extern void radeonFlushElts( GLcontext *ctx ); + -extern GLushort *radeonAllocEltsOpenEnded( radeonContextPtr rmesa, +extern GLushort *radeonAllocEltsOpenEnded( r100ContextPtr rmesa, GLuint vertex_format, GLuint primitive, GLuint min_nr ); -extern void radeonEmitAOS( radeonContextPtr rmesa, - struct radeon_dma_region **regions, + +extern void radeonEmitAOS( r100ContextPtr rmesa, GLuint n, GLuint offset ); -extern void radeonEmitBlit( radeonContextPtr rmesa, +extern void radeonEmitBlit( r100ContextPtr rmesa, GLuint color_fmt, GLuint src_pitch, GLuint src_offset, @@ -72,30 +73,15 @@ extern void radeonEmitBlit( radeonContextPtr rmesa, GLint dstx, GLint dsty, GLuint w, GLuint h ); -extern void radeonEmitWait( radeonContextPtr rmesa, GLuint flags ); - -extern void radeonFlushCmdBuf( radeonContextPtr rmesa, const char * ); -extern void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa ); +extern void radeonEmitWait( r100ContextPtr rmesa, GLuint flags ); -extern void radeonAllocDmaRegion( radeonContextPtr rmesa, - struct radeon_dma_region *region, - int bytes, - int alignment ); +extern void radeonFlushCmdBuf( r100ContextPtr rmesa, const char * ); -extern void radeonReleaseDmaRegion( radeonContextPtr rmesa, - struct radeon_dma_region *region, - const char *caller ); - -extern void radeonCopyBuffer( __DRIdrawablePrivate *drawable, - const drm_clip_rect_t *rect); -extern void radeonPageFlip( __DRIdrawablePrivate *drawable ); extern void radeonFlush( GLcontext *ctx ); extern void radeonFinish( GLcontext *ctx ); -extern void radeonWaitForIdleLocked( radeonContextPtr rmesa ); -extern void radeonWaitForVBlank( radeonContextPtr rmesa ); extern void radeonInitIoctlFuncs( GLcontext *ctx ); -extern void radeonGetAllParams( radeonContextPtr rmesa ); -extern void radeonSetUpAtomList( radeonContextPtr rmesa ); +extern void radeonGetAllParams( r100ContextPtr rmesa ); +extern void radeonSetUpAtomList( r100ContextPtr rmesa ); /* ================================================================ * Helper macros: @@ -105,33 +91,33 @@ extern void radeonSetUpAtomList( radeonContextPtr rmesa ); */ #define RADEON_NEWPRIM( rmesa ) \ do { \ - if ( rmesa->dma.flush ) \ - rmesa->dma.flush( rmesa ); \ + if ( rmesa->radeon.dma.flush ) \ + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \ } while (0) /* Can accomodate several state changes and primitive changes without * actually firing the buffer. */ + #define RADEON_STATECHANGE( rmesa, ATOM ) \ do { \ RADEON_NEWPRIM( rmesa ); \ rmesa->hw.ATOM.dirty = GL_TRUE; \ - rmesa->hw.is_dirty = GL_TRUE; \ + rmesa->radeon.hw.is_dirty = GL_TRUE; \ } while (0) -#define RADEON_DB_STATE( ATOM ) \ +#define RADEON_DB_STATE( ATOM ) \ memcpy( rmesa->hw.ATOM.lastcmd, rmesa->hw.ATOM.cmd, \ rmesa->hw.ATOM.cmd_size * 4) -static INLINE int RADEON_DB_STATECHANGE( - radeonContextPtr rmesa, - struct radeon_state_atom *atom ) +static INLINE int RADEON_DB_STATECHANGE(r100ContextPtr rmesa, + struct radeon_state_atom *atom ) { if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) { - int *tmp; + GLuint *tmp; RADEON_NEWPRIM( rmesa ); atom->dirty = GL_TRUE; - rmesa->hw.is_dirty = GL_TRUE; + rmesa->radeon.hw.is_dirty = GL_TRUE; tmp = atom->cmd; atom->cmd = atom->lastcmd; atom->lastcmd = tmp; @@ -141,16 +127,6 @@ static INLINE int RADEON_DB_STATECHANGE( return 0; } - -/* Fire the buffered vertices no matter what. - */ -#define RADEON_FIREVERTICES( rmesa ) \ -do { \ - if ( rmesa->store.cmd_used || rmesa->dma.flush ) { \ - radeonFlush( rmesa->glCtx ); \ - } \ -} while (0) - /* Command lengths. Note that any time you ensure ELTS_BUFSZ or VBUF_BUFSZ * are available, you will also be adding an rmesa->state.max_state_size because * r200EmitState is called from within r200EmitVbufPrim and r200FlushElts. @@ -167,36 +143,37 @@ do { \ #define VBUF_BUFSZ (4 * sizeof(int)) #endif -/* Ensure that a minimum amount of space is available in the command buffer. - * This is used to ensure atomicity of state updates with the rendering requests - * that rely on them. - * - * An alternative would be to implement a "soft lock" such that when the buffer - * wraps at an inopportune time, we grab the lock, flush the current buffer, - * and hang on to the lock until the critical section is finished and we flush - * the buffer again and unlock. - */ -static INLINE void radeonEnsureCmdBufSpace( radeonContextPtr rmesa, - int bytes ) -{ - if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ) - radeonFlushCmdBuf( rmesa, __FUNCTION__ ); - assert( bytes <= RADEON_CMD_BUF_SZ ); -} -/* Alloc space in the command buffer - */ -static INLINE char *radeonAllocCmdBuf( radeonContextPtr rmesa, - int bytes, const char *where ) +static inline uint32_t cmdpacket3(int cmd_type) { - if (rmesa->store.cmd_used + bytes > RADEON_CMD_BUF_SZ) - radeonFlushCmdBuf( rmesa, __FUNCTION__ ); + drm_radeon_cmd_header_t cmd; + + cmd.i = 0; + cmd.header.cmd_type = cmd_type; + + return (uint32_t)cmd.i; - { - char *head = rmesa->store.cmd_buf + rmesa->store.cmd_used; - rmesa->store.cmd_used += bytes; - return head; - } } +#define OUT_BATCH_PACKET3(packet, num_extra) do { \ + if (!b_l_rmesa->radeonScreen->kernel_mm) { \ + OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3)); \ + OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ + } else { \ + OUT_BATCH(CP_PACKET2); \ + OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ + } \ + } while(0) + +#define OUT_BATCH_PACKET3_CLIP(packet, num_extra) do { \ + if (!b_l_rmesa->radeonScreen->kernel_mm) { \ + OUT_BATCH(cmdpacket3(RADEON_CMD_PACKET3_CLIP)); \ + OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ + } else { \ + OUT_BATCH(CP_PACKET2); \ + OUT_BATCH(CP_PACKET3((packet), (num_extra))); \ + } \ + } while(0) + + #endif /* __RADEON_IOCTL_H__ */ diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.c b/src/mesa/drivers/dri/radeon/radeon_lock.c index 64bb3ca103..5774f7ebcf 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.c +++ b/src/mesa/drivers/dri/radeon/radeon_lock.c @@ -41,30 +41,13 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/glheader.h" #include "main/mtypes.h" -#include "radeon_context.h" +#include "main/colormac.h" +#include "dri_util.h" +#include "radeon_screen.h" +#include "radeon_common.h" #include "radeon_lock.h" -#include "radeon_tex.h" -#include "radeon_state.h" -#include "radeon_ioctl.h" - #include "drirenderbuffer.h" -#if DEBUG_LOCKING -char *prevLockFile = NULL; -int prevLockLine = 0; -#endif - -/* Turn on/off page flipping according to the flags in the sarea: - */ -static void radeonUpdatePageFlipping(radeonContextPtr rmesa) -{ - rmesa->doPageFlip = rmesa->sarea->pfState; - if (rmesa->glCtx->WinSysDrawBuffer) { - driFlipRenderbuffers(rmesa->glCtx->WinSysDrawBuffer, - rmesa->sarea->pfCurrentPage); - } -} - /* Update the hardware state. This is called if another context has * grabbed the hardware lock, which includes the X server. This * function also updates the driver's window state after the X server @@ -75,10 +58,11 @@ static void radeonUpdatePageFlipping(radeonContextPtr rmesa) */ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) { - __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; - __DRIdrawablePrivate *const readable = rmesa->dri.readable; + __DRIdrawablePrivate *const drawable = radeon_get_drawable(rmesa); + __DRIdrawablePrivate *const readable = radeon_get_readable(rmesa); __DRIscreenPrivate *sPriv = rmesa->dri.screen; - drm_radeon_sarea_t *sarea = rmesa->sarea; + + assert(drawable != NULL); drmGetLock(rmesa->dri.fd, rmesa->dri.hwContext, flags); @@ -96,29 +80,42 @@ void radeonGetLock(radeonContextPtr rmesa, GLuint flags) } if (rmesa->lastStamp != drawable->lastStamp) { - radeonUpdatePageFlipping(rmesa); - radeonSetCliprects(rmesa); - radeonUpdateViewportOffset(rmesa->glCtx); - driUpdateFramebufferSize(rmesa->glCtx, drawable); + radeon_window_moved(rmesa); + rmesa->lastStamp = drawable->lastStamp; } - RADEON_STATECHANGE(rmesa, ctx); - if (rmesa->sarea->tiling_enabled) { - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= - RADEON_COLOR_TILE_ENABLE; - } else { - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] &= - ~RADEON_COLOR_TILE_ENABLE; - } + rmesa->vtbl.get_lock(rmesa); + + rmesa->lost_context = GL_TRUE; +} - if (sarea->ctx_owner != rmesa->dri.hwContext) { - int i; - sarea->ctx_owner = rmesa->dri.hwContext; +void radeon_lock_hardware(radeonContextPtr radeon) +{ + char ret = 0; + struct radeon_framebuffer *rfb = NULL; + struct radeon_renderbuffer *rrb = NULL; - for (i = 0; i < rmesa->nr_heaps; i++) { - DRI_AGE_TEXTURES(rmesa->texture_heaps[i]); - } + if (radeon_get_drawable(radeon)) { + rfb = radeon_get_drawable(radeon)->driverPrivate; + + if (rfb) + rrb = radeon_get_renderbuffer(&rfb->base, + rfb->base._ColorDrawBufferIndexes[0]); } - rmesa->lost_context = GL_TRUE; + if (!radeon->radeonScreen->driScreen->dri2.enabled) { + DRM_CAS(radeon->dri.hwLock, radeon->dri.hwContext, + (DRM_LOCK_HELD | radeon->dri.hwContext), ret ); + if (ret) + radeonGetLock(radeon, 0); + } +} + +void radeon_unlock_hardware(radeonContextPtr radeon) +{ + if (!radeon->radeonScreen->driScreen->dri2.enabled) { + DRM_UNLOCK( radeon->dri.fd, + radeon->dri.hwLock, + radeon->dri.hwContext ); + } } diff --git a/src/mesa/drivers/dri/radeon/radeon_lock.h b/src/mesa/drivers/dri/radeon/radeon_lock.h index 86e96aa7d2..2817709eed 100644 --- a/src/mesa/drivers/dri/radeon/radeon_lock.h +++ b/src/mesa/drivers/dri/radeon/radeon_lock.h @@ -39,74 +39,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * Kevin E. Martin <martin@valinux.com> */ -#ifndef __RADEON_LOCK_H__ -#define __RADEON_LOCK_H__ +#ifndef COMMON_LOCK_H +#define COMMON_LOCK_H -extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags); - -/* Turn DEBUG_LOCKING on to find locking conflicts. - */ -#define DEBUG_LOCKING 0 - -#if DEBUG_LOCKING -extern char *prevLockFile; -extern int prevLockLine; - -#define DEBUG_LOCK() \ - do { \ - prevLockFile = (__FILE__); \ - prevLockLine = (__LINE__); \ - } while (0) - -#define DEBUG_RESET() \ - do { \ - prevLockFile = 0; \ - prevLockLine = 0; \ - } while (0) - -#define DEBUG_CHECK_LOCK() \ - do { \ - if ( prevLockFile ) { \ - fprintf( stderr, \ - "LOCK SET!\n\tPrevious %s:%d\n\tCurrent: %s:%d\n", \ - prevLockFile, prevLockLine, __FILE__, __LINE__ ); \ - exit( 1 ); \ - } \ - } while (0) - -#else +#include "main/colormac.h" +#include "radeon_screen.h" +#include "radeon_common.h" -#define DEBUG_LOCK() -#define DEBUG_RESET() -#define DEBUG_CHECK_LOCK() +extern void radeonGetLock(radeonContextPtr rmesa, GLuint flags); -#endif - -/* - * !!! We may want to separate locks from locks with validation. This - * could be used to improve performance for those things commands that - * do not do any drawing !!! - */ +void radeon_lock_hardware(radeonContextPtr rmesa); +void radeon_unlock_hardware(radeonContextPtr rmesa); /* Lock the hardware and validate our state. */ -#define LOCK_HARDWARE( rmesa ) \ - do { \ - char __ret = 0; \ - DEBUG_CHECK_LOCK(); \ - DRM_CAS( (rmesa)->dri.hwLock, (rmesa)->dri.hwContext, \ - (DRM_LOCK_HELD | (rmesa)->dri.hwContext), __ret ); \ - if ( __ret ) \ - radeonGetLock( (rmesa), 0 ); \ - DEBUG_LOCK(); \ - } while (0) - -#define UNLOCK_HARDWARE( rmesa ) \ - do { \ - DRM_UNLOCK( (rmesa)->dri.fd, \ - (rmesa)->dri.hwLock, \ - (rmesa)->dri.hwContext ); \ - DEBUG_RESET(); \ - } while (0) +#define LOCK_HARDWARE( rmesa ) radeon_lock_hardware(rmesa) +#define UNLOCK_HARDWARE( rmesa ) radeon_unlock_hardware(rmesa) -#endif /* __RADEON_LOCK_H__ */ +#endif diff --git a/src/mesa/drivers/dri/radeon/radeon_maos.h b/src/mesa/drivers/dri/radeon/radeon_maos.h index b8935e84a0..b88eb198d5 100644 --- a/src/mesa/drivers/dri/radeon/radeon_maos.h +++ b/src/mesa/drivers/dri/radeon/radeon_maos.h @@ -38,6 +38,5 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_context.h" extern void radeonEmitArrays( GLcontext *ctx, GLuint inputs ); -extern void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c index 31eea13f4e..7c6ea0530e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c +++ b/src/mesa/drivers/dri/radeon/radeon_maos_arrays.c @@ -48,160 +48,35 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_maos.h" #include "radeon_tcl.h" -#if 0 -/* Usage: - * - from radeon_tcl_render - * - call radeonEmitArrays to ensure uptodate arrays in dma - * - emit primitives (new type?) which reference the data - * -- need to use elts for lineloop, quads, quadstrip/flat - * -- other primitives are all well-formed (need tristrip-1,fake-poly) - * - */ -static void emit_ubyte_rgba3( GLcontext *ctx, - struct radeon_dma_region *rvb, - char *data, - int stride, - int count ) +static void emit_vecfog(GLcontext *ctx, struct radeon_aos *aos, + GLvoid *data, int stride, int count) { int i; - radeon_color_t *out = (radeon_color_t *)(rvb->start + rvb->address); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d out %p\n", - __FUNCTION__, count, stride, (void *)out); - - for (i = 0; i < count; i++) { - out->red = *data; - out->green = *(data+1); - out->blue = *(data+2); - out->alpha = 0xFF; - out++; - data += stride; - } -} - -static void emit_ubyte_rgba4( GLcontext *ctx, - struct radeon_dma_region *rvb, - char *data, - int stride, - int count ) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); + uint32_t *out; + int size = 1; + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s count %d stride %d\n", __FUNCTION__, count, stride); - if (stride == 4) - COPY_DWORDS( out, data, count ); - else - for (i = 0; i < count; i++) { - *out++ = LE32_TO_CPU(*(int *)data); - data += stride; - } -} - - -static void emit_ubyte_rgba( GLcontext *ctx, - struct radeon_dma_region *rvb, - char *data, - int size, - int stride, - int count ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size); - - assert (!rvb->buf); - if (stride == 0) { - radeonAllocDmaRegion( rmesa, rvb, 4, 4 ); + radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 ); count = 1; - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = 0; - rvb->aos_size = 1; + aos->stride = 0; } else { - radeonAllocDmaRegion( rmesa, rvb, 4 * count, 4 ); /* alignment? */ - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = 1; - rvb->aos_size = 1; + radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32); + aos->stride = size; } - /* Emit the data - */ - switch (size) { - case 3: - emit_ubyte_rgba3( ctx, rvb, data, stride, count ); - break; - case 4: - emit_ubyte_rgba4( ctx, rvb, data, stride, count ); - break; - default: - assert(0); - exit(1); - break; - } -} -#endif - -#if defined(USE_X86_ASM) -#define COPY_DWORDS( dst, src, nr ) \ -do { \ - int __tmp; \ - __asm__ __volatile__( "rep ; movsl" \ - : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \ - : "0" (nr), \ - "D" ((long)dst), \ - "S" ((long)src) ); \ -} while (0) -#else -#define COPY_DWORDS( dst, src, nr ) \ -do { \ - int j; \ - for ( j = 0 ; j < nr ; j++ ) \ - dst[j] = ((int *)src)[j]; \ - dst += nr; \ -} while (0) -#endif - -static void emit_vecfog( GLcontext *ctx, - struct radeon_dma_region *rvb, - char *data, - int stride, - int count ) -{ - int i; - GLfloat *out; + aos->components = size; + aos->count = count; - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); - - assert (!rvb->buf); - - if (stride == 0) { - radeonAllocDmaRegion( rmesa, rvb, 4, 4 ); - count = 1; - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = 0; - rvb->aos_size = 1; - } - else { - radeonAllocDmaRegion( rmesa, rvb, count * 4, 4 ); /* alignment? */ - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = 1; - rvb->aos_size = 1; - } /* Emit the data */ - out = (GLfloat *)(rvb->address + rvb->start); + out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); for (i = 0; i < count; i++) { out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data ); out++; @@ -209,169 +84,9 @@ static void emit_vecfog( GLcontext *ctx, } } -static void emit_vec4( GLcontext *ctx, - struct radeon_dma_region *rvb, - char *data, - int stride, - int count ) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); - - if (stride == 4) - COPY_DWORDS( out, data, count ); - else - for (i = 0; i < count; i++) { - out[0] = *(int *)data; - out++; - data += stride; - } -} - - -static void emit_vec8( GLcontext *ctx, - struct radeon_dma_region *rvb, - char *data, - int stride, - int count ) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); - - if (stride == 8) - COPY_DWORDS( out, data, count*2 ); - else - for (i = 0; i < count; i++) { - out[0] = *(int *)data; - out[1] = *(int *)(data+4); - out += 2; - data += stride; - } -} - -static void emit_vec12( GLcontext *ctx, - struct radeon_dma_region *rvb, - char *data, - int stride, - int count ) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d out %p data %p\n", - __FUNCTION__, count, stride, (void *)out, (void *)data); - - if (stride == 12) - COPY_DWORDS( out, data, count*3 ); - else - for (i = 0; i < count; i++) { - out[0] = *(int *)data; - out[1] = *(int *)(data+4); - out[2] = *(int *)(data+8); - out += 3; - data += stride; - } -} - -static void emit_vec16( GLcontext *ctx, - struct radeon_dma_region *rvb, - char *data, - int stride, - int count ) -{ - int i; - int *out = (int *)(rvb->address + rvb->start); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d stride %d\n", - __FUNCTION__, count, stride); - - if (stride == 16) - COPY_DWORDS( out, data, count*4 ); - else - for (i = 0; i < count; i++) { - out[0] = *(int *)data; - out[1] = *(int *)(data+4); - out[2] = *(int *)(data+8); - out[3] = *(int *)(data+12); - out += 4; - data += stride; - } -} - - -static void emit_vector( GLcontext *ctx, - struct radeon_dma_region *rvb, - char *data, - int size, - int stride, - int count ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - - if (RADEON_DEBUG & DEBUG_VERTS) - fprintf(stderr, "%s count %d size %d stride %d\n", - __FUNCTION__, count, size, stride); - - assert (!rvb->buf); - - if (stride == 0) { - radeonAllocDmaRegion( rmesa, rvb, size * 4, 4 ); - count = 1; - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = 0; - rvb->aos_size = size; - } - else { - radeonAllocDmaRegion( rmesa, rvb, size * count * 4, 4 ); /* alignment? */ - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = size; - rvb->aos_size = size; - } - - /* Emit the data - */ - switch (size) { - case 1: - emit_vec4( ctx, rvb, data, stride, count ); - break; - case 2: - emit_vec8( ctx, rvb, data, stride, count ); - break; - case 3: - emit_vec12( ctx, rvb, data, stride, count ); - break; - case 4: - emit_vec16( ctx, rvb, data, stride, count ); - break; - default: - assert(0); - exit(1); - break; - } - -} - - - -static void emit_s0_vec( GLcontext *ctx, - struct radeon_dma_region *rvb, - char *data, - int stride, - int count ) +static void emit_s0_vec(uint32_t *out, GLvoid *data, int stride, int count) { int i; - int *out = (int *)(rvb->address + rvb->start); - if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s count %d stride %d\n", __FUNCTION__, count, stride); @@ -384,14 +99,9 @@ static void emit_s0_vec( GLcontext *ctx, } } -static void emit_stq_vec( GLcontext *ctx, - struct radeon_dma_region *rvb, - char *data, - int stride, - int count ) +static void emit_stq_vec(uint32_t *out, GLvoid *data, int stride, int count) { int i; - int *out = (int *)(rvb->address + rvb->start); if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s count %d stride %d\n", @@ -409,21 +119,16 @@ static void emit_stq_vec( GLcontext *ctx, -static void emit_tex_vector( GLcontext *ctx, - struct radeon_dma_region *rvb, - char *data, - int size, - int stride, - int count ) +static void emit_tex_vector(GLcontext *ctx, struct radeon_aos *aos, + GLvoid *data, int size, int stride, int count) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); int emitsize; + uint32_t *out; if (RADEON_DEBUG & DEBUG_VERTS) fprintf(stderr, "%s %d/%d\n", __FUNCTION__, count, size); - assert (!rvb->buf); - switch (size) { case 4: emitsize = 3; break; case 3: emitsize = 3; break; @@ -432,34 +137,33 @@ static void emit_tex_vector( GLcontext *ctx, if (stride == 0) { - radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize, 4 ); + radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * 4, 32); count = 1; - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = 0; - rvb->aos_size = emitsize; + aos->stride = 0; } else { - radeonAllocDmaRegion( rmesa, rvb, 4 * emitsize * count, 4 ); - rvb->aos_start = GET_START(rvb); - rvb->aos_stride = emitsize; - rvb->aos_size = emitsize; + radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, emitsize * count * 4, 32); + aos->stride = emitsize; } + aos->components = emitsize; + aos->count = count; /* Emit the data */ + out = (uint32_t*)((char*)aos->bo->ptr + aos->offset); switch (size) { case 1: - emit_s0_vec( ctx, rvb, data, stride, count ); + emit_s0_vec( out, data, stride, count ); break; case 2: - emit_vec8( ctx, rvb, data, stride, count ); + radeonEmitVec8( out, data, stride, count ); break; case 3: - emit_vec12( ctx, rvb, data, stride, count ); + radeonEmitVec12( out, data, stride, count ); break; case 4: - emit_stq_vec( ctx, rvb, data, stride, count ); + emit_stq_vec( out, data, stride, count ); break; default: assert(0); @@ -476,9 +180,8 @@ static void emit_tex_vector( GLcontext *ctx, */ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) { - radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + r100ContextPtr rmesa = R100_CONTEXT( ctx ); struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb; - struct radeon_dma_region **component = rmesa->tcl.aos_components; GLuint nr = 0; GLuint vfmt = 0; GLuint count = VB->Count; @@ -491,12 +194,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (1) { if (!rmesa->tcl.obj.buf) - emit_vector( ctx, - &rmesa->tcl.obj, - (char *)VB->ObjPtr->data, - VB->ObjPtr->size, - VB->ObjPtr->stride, - count); + rcommon_emit_vector( ctx, + &(rmesa->tcl.aos[nr]), + (char *)VB->ObjPtr->data, + VB->ObjPtr->size, + VB->ObjPtr->stride, + count); switch( VB->ObjPtr->size ) { case 4: vfmt |= RADEON_CP_VC_FRMT_W0; @@ -505,21 +208,21 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) default: break; } - component[nr++] = &rmesa->tcl.obj; + nr++; } if (inputs & VERT_BIT_NORMAL) { if (!rmesa->tcl.norm.buf) - emit_vector( ctx, - &(rmesa->tcl.norm), - (char *)VB->NormalPtr->data, - 3, - VB->NormalPtr->stride, - count); + rcommon_emit_vector( ctx, + &(rmesa->tcl.aos[nr]), + (char *)VB->NormalPtr->data, + 3, + VB->NormalPtr->stride, + count); vfmt |= RADEON_CP_VC_FRMT_N0; - component[nr++] = &rmesa->tcl.norm; + nr++; } if (inputs & VERT_BIT_COLOR0) { @@ -537,31 +240,30 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) } if (!rmesa->tcl.rgba.buf) - emit_vector( ctx, - &(rmesa->tcl.rgba), - (char *)VB->ColorPtr[0]->data, - emitsize, - VB->ColorPtr[0]->stride, - count); - - - component[nr++] = &rmesa->tcl.rgba; + rcommon_emit_vector( ctx, + &(rmesa->tcl.aos[nr]), + (char *)VB->ColorPtr[0]->data, + emitsize, + VB->ColorPtr[0]->stride, + count); + + nr++; } if (inputs & VERT_BIT_COLOR1) { if (!rmesa->tcl.spec.buf) { - emit_vector( ctx, - &rmesa->tcl.spec, - (char *)VB->SecondaryColorPtr[0]->data, - 3, - VB->SecondaryColorPtr[0]->stride, - count); + rcommon_emit_vector( ctx, + &(rmesa->tcl.aos[nr]), + (char *)VB->SecondaryColorPtr[0]->data, + 3, + VB->SecondaryColorPtr[0]->stride, + count); } vfmt |= RADEON_CP_VC_FRMT_FPSPEC; - component[nr++] = &rmesa->tcl.spec; + nr++; } /* FIXME: not sure if this is correct. May need to stitch this together with @@ -570,13 +272,13 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (inputs & VERT_BIT_FOG) { if (!rmesa->tcl.fog.buf) emit_vecfog( ctx, - &(rmesa->tcl.fog), + &(rmesa->tcl.aos[nr]), (char *)VB->FogCoordPtr->data, VB->FogCoordPtr->stride, count); vfmt |= RADEON_CP_VC_FRMT_FPFOG; - component[nr++] = &rmesa->tcl.fog; + nr++; } @@ -587,11 +289,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) if (inputs & VERT_BIT_TEX(unit)) { if (!rmesa->tcl.tex[unit].buf) emit_tex_vector( ctx, - &(rmesa->tcl.tex[unit]), + &(rmesa->tcl.aos[nr]), (char *)VB->TexCoordPtr[unit]->data, VB->TexCoordPtr[unit]->size, VB->TexCoordPtr[unit]->stride, count ); + nr++; vfmt |= RADEON_ST_BIT(unit); /* assume we need the 3rd coord if texgen is active for r/q OR at least @@ -609,7 +312,6 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) (swaptexmatcol != ((rmesa->TexMatColSwap >> unit) & 1))) radeonUploadTexMatrix( rmesa, unit, swaptexmatcol ) ; } - component[nr++] = &rmesa->tcl.tex[unit]; } } @@ -622,34 +324,3 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) rmesa->tcl.vertex_format = vfmt; } - -void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); - GLuint unit; - -#if 0 - if (RADEON_DEBUG & DEBUG_VERTS) - _tnl_print_vert_flags( __FUNCTION__, newinputs ); -#endif - - if (newinputs & VERT_BIT_POS) - radeonReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ ); - - if (newinputs & VERT_BIT_NORMAL) - radeonReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ ); - - if (newinputs & VERT_BIT_COLOR0) - radeonReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ ); - - if (newinputs & VERT_BIT_COLOR1) - radeonReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ ); - - if (newinputs & VERT_BIT_FOG) - radeonReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ ); - - for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) { - if (newinputs & VERT_BIT_TEX(unit)) - radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ ); - } -} diff --git a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c index 126d0727c6..78ec119302 100644 --- a/src/mesa/drivers/dri/radeon/radeon_maos_verts.c +++ b/src/mesa/drivers/dri/radeon/radeon_maos_verts.c @@ -310,7 +310,7 @@ static void init_tcl_verts( void ) void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb; GLuint req = 0; GLuint unit; @@ -374,14 +374,15 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) break; if (rmesa->tcl.vertex_format == setup_tab[i].vertex_format && - rmesa->tcl.indexed_verts.buf) + rmesa->radeon.tcl.aos[0].bo) return; - if (rmesa->tcl.indexed_verts.buf) + if (rmesa->radeon.tcl.aos[0].bo) radeonReleaseArrays( ctx, ~0 ); - radeonAllocDmaRegion( rmesa, - &rmesa->tcl.indexed_verts, + radeonAllocDmaRegion( &rmesa->radeon, + &rmesa->radeon.tcl.aos[0].bo, + &rmesa->radeon.tcl.aos[0].offset, VB->Count * setup_tab[i].vertex_size * 4, 4); @@ -421,29 +422,12 @@ void radeonEmitArrays( GLcontext *ctx, GLuint inputs ) setup_tab[i].emit( ctx, 0, VB->Count, - rmesa->tcl.indexed_verts.address + - rmesa->tcl.indexed_verts.start ); + rmesa->radeon.tcl.aos[0].bo->ptr + rmesa->radeon.tcl.aos[0].offset); + // rmesa->radeon.tcl.aos[0].size = setup_tab[i].vertex_size; + rmesa->radeon.tcl.aos[0].stride = setup_tab[i].vertex_size; rmesa->tcl.vertex_format = setup_tab[i].vertex_format; - rmesa->tcl.indexed_verts.aos_start = GET_START( &rmesa->tcl.indexed_verts ); - rmesa->tcl.indexed_verts.aos_size = setup_tab[i].vertex_size; - rmesa->tcl.indexed_verts.aos_stride = setup_tab[i].vertex_size; - - rmesa->tcl.aos_components[0] = &rmesa->tcl.indexed_verts; - rmesa->tcl.nr_aos_components = 1; + rmesa->radeon.tcl.aos_count = 1; } - -void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); - -#if 0 - if (RADEON_DEBUG & DEBUG_VERTS) - _tnl_print_vert_flags( __FUNCTION__, newinputs ); -#endif - - if (newinputs) - radeonReleaseDmaRegion( rmesa, &rmesa->tcl.indexed_verts, __FUNCTION__ ); -} diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c new file mode 100644 index 0000000000..55aa4502da --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -0,0 +1,415 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_mipmap_tree.h" + +#include <errno.h> +#include <unistd.h> + +#include "main/simple_list.h" +#include "main/texcompress.h" +#include "main/texformat.h" + +static GLuint radeon_compressed_texture_size(GLcontext *ctx, + GLsizei width, GLsizei height, GLsizei depth, + GLuint mesaFormat) +{ + GLuint size = _mesa_compressed_texture_size(ctx, width, height, depth, mesaFormat); + + if (mesaFormat == MESA_FORMAT_RGB_DXT1 || + mesaFormat == MESA_FORMAT_RGBA_DXT1) { + if (width + 3 < 8) /* width one block */ + size = size * 4; + else if (width + 3 < 16) + size = size * 2; + } else { + /* DXT3/5, 16 bytes per block */ + // WARN_ONCE("DXT 3/5 suffers from multitexturing problems!\n"); + if (width + 3 < 8) + size = size * 2; + } + + return size; +} + + +static int radeon_compressed_num_bytes(GLuint mesaFormat) +{ + int bytes = 0; + switch(mesaFormat) { + + case MESA_FORMAT_RGB_FXT1: + case MESA_FORMAT_RGBA_FXT1: + case MESA_FORMAT_RGB_DXT1: + case MESA_FORMAT_RGBA_DXT1: + bytes = 2; + break; + + case MESA_FORMAT_RGBA_DXT3: + case MESA_FORMAT_RGBA_DXT5: + bytes = 4; + default: + break; + } + + return bytes; +} + +/** + * Compute sizes and fill in offset and blit information for the given + * image (determined by \p face and \p level). + * + * \param curOffset points to the offset at which the image is to be stored + * and is updated by this function according to the size of the image. + */ +static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree *mt, + GLuint face, GLuint level, GLuint* curOffset) +{ + radeon_mipmap_level *lvl = &mt->levels[level]; + uint32_t row_align = rmesa->texture_row_align - 1; + + /* Find image size in bytes */ + if (mt->compressed) { + /* TODO: Is this correct? Need test cases for compressed textures! */ + lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63; + lvl->size = radeon_compressed_texture_size(mt->radeon->glCtx, + lvl->width, lvl->height, lvl->depth, mt->compressed); + } else if (mt->target == GL_TEXTURE_RECTANGLE_NV) { + lvl->rowstride = (lvl->width * mt->bpp + 63) & ~63; + lvl->size = lvl->rowstride * lvl->height; + } else if (mt->tilebits & RADEON_TXO_MICRO_TILE) { + /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, + * though the actual offset may be different (if texture is less than + * 32 bytes width) to the untiled case */ + lvl->rowstride = (lvl->width * mt->bpp * 2 + 31) & ~31; + lvl->size = lvl->rowstride * ((lvl->height + 1) / 2) * lvl->depth; + } else { + lvl->rowstride = (lvl->width * mt->bpp + row_align) & ~row_align; + lvl->size = lvl->rowstride * lvl->height * lvl->depth; + } + assert(lvl->size > 0); + + /* All images are aligned to a 32-byte offset */ + *curOffset = (*curOffset + 0x1f) & ~0x1f; + lvl->faces[face].offset = *curOffset; + *curOffset += lvl->size; + + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, + "level %d, face %d: rs:%d %dx%d at %d\n", + level, face, lvl->rowstride, lvl->width, lvl->height, lvl->faces[face].offset); +} + +static GLuint minify(GLuint size, GLuint levels) +{ + size = size >> levels; + if (size < 1) + size = 1; + return size; +} + + +static void calculate_miptree_layout_r100(radeonContextPtr rmesa, radeon_mipmap_tree *mt) +{ + GLuint curOffset; + GLuint numLevels; + GLuint i; + GLuint face; + + numLevels = mt->lastLevel - mt->firstLevel + 1; + assert(numLevels <= rmesa->glCtx->Const.MaxTextureLevels); + + curOffset = 0; + for(face = 0; face < mt->faces; face++) { + + for(i = 0; i < numLevels; i++) { + mt->levels[i].width = minify(mt->width0, i); + mt->levels[i].height = minify(mt->height0, i); + mt->levels[i].depth = minify(mt->depth0, i); + compute_tex_image_offset(rmesa, mt, face, i, &curOffset); + } + } + + /* Note the required size in memory */ + mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; +} + +static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_tree *mt) +{ + GLuint curOffset; + GLuint numLevels; + GLuint i; + + numLevels = mt->lastLevel - mt->firstLevel + 1; + assert(numLevels <= rmesa->glCtx->Const.MaxTextureLevels); + + curOffset = 0; + for(i = 0; i < numLevels; i++) { + GLuint face; + + mt->levels[i].width = minify(mt->width0, i); + mt->levels[i].height = minify(mt->height0, i); + mt->levels[i].depth = minify(mt->depth0, i); + + for(face = 0; face < mt->faces; face++) + compute_tex_image_offset(rmesa, mt, face, i, &curOffset); + } + + /* Note the required size in memory */ + mt->totalsize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; +} + +/** + * Create a new mipmap tree, calculate its layout and allocate memory. + */ +radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t, + GLenum target, GLuint firstLevel, GLuint lastLevel, + GLuint width0, GLuint height0, GLuint depth0, + GLuint bpp, GLuint tilebits, GLuint compressed) +{ + radeon_mipmap_tree *mt = CALLOC_STRUCT(_radeon_mipmap_tree); + + mt->radeon = rmesa; + mt->refcount = 1; + mt->t = t; + mt->target = target; + mt->faces = (target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + mt->firstLevel = firstLevel; + mt->lastLevel = lastLevel; + mt->width0 = width0; + mt->height0 = height0; + mt->depth0 = depth0; + mt->bpp = compressed ? radeon_compressed_num_bytes(compressed) : bpp; + mt->tilebits = tilebits; + mt->compressed = compressed; + + if (rmesa->radeonScreen->chip_family >= CHIP_FAMILY_R300) + calculate_miptree_layout_r300(rmesa, mt); + else + calculate_miptree_layout_r100(rmesa, mt); + + mt->bo = radeon_bo_open(rmesa->radeonScreen->bom, + 0, mt->totalsize, 1024, + RADEON_GEM_DOMAIN_VRAM, + 0); + + return mt; +} + +void radeon_miptree_reference(radeon_mipmap_tree *mt) +{ + mt->refcount++; + assert(mt->refcount > 0); +} + +void radeon_miptree_unreference(radeon_mipmap_tree *mt) +{ + if (!mt) + return; + + assert(mt->refcount > 0); + mt->refcount--; + if (!mt->refcount) { + radeon_bo_unref(mt->bo); + free(mt); + } +} + + +/** + * Calculate first and last mip levels for the given texture object, + * where the dimensions are taken from the given texture image at + * the given level. + * + * Note: level is the OpenGL level number, which is not necessarily the same + * as the first level that is actually present. + * + * The base level image of the given texture face must be non-null, + * or this will fail. + */ +static void calculate_first_last_level(struct gl_texture_object *tObj, + GLuint *pfirstLevel, GLuint *plastLevel, + GLuint face, GLuint level) +{ + const struct gl_texture_image * const baseImage = + tObj->Image[face][level]; + + assert(baseImage); + + /* These must be signed values. MinLod and MaxLod can be negative numbers, + * and having firstLevel and lastLevel as signed prevents the need for + * extra sign checks. + */ + int firstLevel; + int lastLevel; + + /* Yes, this looks overly complicated, but it's all needed. + */ + switch (tObj->Target) { + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_3D: + case GL_TEXTURE_CUBE_MAP: + if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { + /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. + */ + firstLevel = lastLevel = tObj->BaseLevel; + } else { + firstLevel = tObj->BaseLevel + (GLint)(tObj->MinLod + 0.5); + firstLevel = MAX2(firstLevel, tObj->BaseLevel); + firstLevel = MIN2(firstLevel, level + baseImage->MaxLog2); + lastLevel = tObj->BaseLevel + (GLint)(tObj->MaxLod + 0.5); + lastLevel = MAX2(lastLevel, tObj->BaseLevel); + lastLevel = MIN2(lastLevel, level + baseImage->MaxLog2); + lastLevel = MIN2(lastLevel, tObj->MaxLevel); + lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ + } + break; + case GL_TEXTURE_RECTANGLE_NV: + case GL_TEXTURE_4D_SGIS: + firstLevel = lastLevel = 0; + break; + default: + return; + } + + /* save these values */ + *pfirstLevel = firstLevel; + *plastLevel = lastLevel; +} + + +/** + * Checks whether the given miptree can hold the given texture image at the + * given face and level. + */ +GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, + struct gl_texture_image *texImage, GLuint face, GLuint level) +{ + radeon_mipmap_level *lvl; + + if (face >= mt->faces || level < mt->firstLevel || level > mt->lastLevel) + return GL_FALSE; + + if (texImage->IsCompressed != mt->compressed) + return GL_FALSE; + + if (!texImage->IsCompressed && + !mt->compressed && + texImage->TexFormat->TexelBytes != mt->bpp) + return GL_FALSE; + + lvl = &mt->levels[level - mt->firstLevel]; + if (lvl->width != texImage->Width || + lvl->height != texImage->Height || + lvl->depth != texImage->Depth) + return GL_FALSE; + + return GL_TRUE; +} + + +/** + * Checks whether the given miptree has the right format to store the given texture object. + */ +GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj) +{ + struct gl_texture_image *firstImage; + GLuint compressed; + GLuint numfaces = 1; + GLuint firstLevel, lastLevel; + + calculate_first_last_level(texObj, &firstLevel, &lastLevel, 0, texObj->BaseLevel); + if (texObj->Target == GL_TEXTURE_CUBE_MAP) + numfaces = 6; + + firstImage = texObj->Image[0][firstLevel]; + compressed = firstImage->IsCompressed ? firstImage->TexFormat->MesaFormat : 0; + + return (mt->firstLevel == firstLevel && + mt->lastLevel == lastLevel && + mt->width0 == firstImage->Width && + mt->height0 == firstImage->Height && + mt->depth0 == firstImage->Depth && + mt->bpp == firstImage->TexFormat->TexelBytes && + mt->compressed == compressed); +} + + +/** + * Try to allocate a mipmap tree for the given texture that will fit the + * given image in the given position. + */ +void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t, + struct gl_texture_image *texImage, GLuint face, GLuint level) +{ + GLuint compressed = texImage->IsCompressed ? texImage->TexFormat->MesaFormat : 0; + GLuint numfaces = 1; + GLuint firstLevel, lastLevel; + + assert(!t->mt); + + calculate_first_last_level(&t->base, &firstLevel, &lastLevel, face, level); + if (t->base.Target == GL_TEXTURE_CUBE_MAP) + numfaces = 6; + + if (level != firstLevel || face >= numfaces) + return; + + t->mt = radeon_miptree_create(rmesa, t, t->base.Target, + firstLevel, lastLevel, + texImage->Width, texImage->Height, texImage->Depth, + texImage->TexFormat->TexelBytes, t->tile_bits, compressed); +} + +/* Although we use the image_offset[] array to store relative offsets + * to cube faces, Mesa doesn't know anything about this and expects + * each cube face to be treated as a separate image. + * + * These functions present that view to mesa: + */ +void +radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets) +{ + if (mt->target != GL_TEXTURE_3D || mt->faces == 1) + offsets[0] = 0; + else { + int i; + for (i = 0; i < 6; i++) + offsets[i] = mt->levels[level].faces[i].offset; + } +} + +GLuint +radeon_miptree_image_offset(radeon_mipmap_tree *mt, + GLuint face, GLuint level) +{ + if (mt->target == GL_TEXTURE_CUBE_MAP_ARB) + return (mt->levels[level].faces[face].offset); + else + return mt->levels[level].faces[0].offset; +} diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h new file mode 100644 index 0000000000..7ece688493 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.h @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_MIPMAP_TREE_H_ +#define __RADEON_MIPMAP_TREE_H_ + +#include "radeon_common.h" + +typedef struct _radeon_mipmap_tree radeon_mipmap_tree; +typedef struct _radeon_mipmap_level radeon_mipmap_level; +typedef struct _radeon_mipmap_image radeon_mipmap_image; + +struct _radeon_mipmap_image { + GLuint offset; /** Offset of this image from the start of mipmap tree buffer, in bytes */ +}; + +struct _radeon_mipmap_level { + GLuint width; + GLuint height; + GLuint depth; + GLuint size; /** Size of each image, in bytes */ + GLuint rowstride; /** in bytes */ + radeon_mipmap_image faces[6]; +}; + +/* store the max possible in the miptree */ +#define RADEON_MIPTREE_MAX_TEXTURE_LEVELS 13 + +/** + * A mipmap tree contains texture images in the layout that the hardware + * expects. + * + * The meta-data of mipmap trees is immutable, i.e. you cannot change the + * layout on-the-fly; however, the texture contents (i.e. texels) can be + * changed. + */ +struct _radeon_mipmap_tree { + radeonContextPtr radeon; + radeonTexObj *t; + struct radeon_bo *bo; + GLuint refcount; + + GLuint totalsize; /** total size of the miptree, in bytes */ + + GLenum target; /** GL_TEXTURE_xxx */ + GLuint faces; /** # of faces: 6 for cubemaps, 1 otherwise */ + GLuint firstLevel; /** First mip level stored in this mipmap tree */ + GLuint lastLevel; /** Last mip level stored in this mipmap tree */ + + GLuint width0; /** Width of firstLevel image */ + GLuint height0; /** Height of firstLevel image */ + GLuint depth0; /** Depth of firstLevel image */ + + GLuint bpp; /** Bytes per texel */ + GLuint tilebits; /** RADEON_TXO_xxx_TILE */ + GLuint compressed; /** MESA_FORMAT_xxx indicating a compressed format, or 0 if uncompressed */ + + radeon_mipmap_level levels[RADEON_MIPTREE_MAX_TEXTURE_LEVELS]; +}; + +radeon_mipmap_tree* radeon_miptree_create(radeonContextPtr rmesa, radeonTexObj *t, + GLenum target, GLuint firstLevel, GLuint lastLevel, + GLuint width0, GLuint height0, GLuint depth0, + GLuint bpp, GLuint tilebits, GLuint compressed); +void radeon_miptree_reference(radeon_mipmap_tree *mt); +void radeon_miptree_unreference(radeon_mipmap_tree *mt); + +GLboolean radeon_miptree_matches_image(radeon_mipmap_tree *mt, + struct gl_texture_image *texImage, GLuint face, GLuint level); +GLboolean radeon_miptree_matches_texture(radeon_mipmap_tree *mt, struct gl_texture_object *texObj); +void radeon_try_alloc_miptree(radeonContextPtr rmesa, radeonTexObj *t, + struct gl_texture_image *texImage, GLuint face, GLuint level); +GLuint radeon_miptree_image_offset(radeon_mipmap_tree *mt, + GLuint face, GLuint level); +void radeon_miptree_depth_offsets(radeon_mipmap_tree *mt, GLuint level, GLuint *offsets); +#endif /* __RADEON_MIPMAP_TREE_H_ */ diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.c b/src/mesa/drivers/dri/radeon/radeon_sanity.c index 6613757fce..bbed838b59 100644 --- a/src/mesa/drivers/dri/radeon/radeon_sanity.c +++ b/src/mesa/drivers/dri/radeon/radeon_sanity.c @@ -973,7 +973,7 @@ static int radeon_emit_packet3_cliprect( drm_radeon_cmd_buffer_t *cmdbuf ) } -int radeonSanityCmdBuffer( radeonContextPtr rmesa, +int radeonSanityCmdBuffer( r100ContextPtr rmesa, int nbox, drm_clip_rect_t *boxes ) { diff --git a/src/mesa/drivers/dri/radeon/radeon_sanity.h b/src/mesa/drivers/dri/radeon/radeon_sanity.h index 1ec06bc586..f30eb1c4f1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_sanity.h +++ b/src/mesa/drivers/dri/radeon/radeon_sanity.h @@ -1,7 +1,7 @@ #ifndef RADEON_SANITY_H #define RADEON_SANITY_H -extern int radeonSanityCmdBuffer( radeonContextPtr rmesa, +extern int radeonSanityCmdBuffer( r100ContextPtr rmesa, int nbox, drm_clip_rect_t *boxes ); diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 791f59826b..12ae4ada5d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -35,6 +35,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * \author Gareth Hughes <gareth@valinux.com> */ +#include <errno.h> #include "main/glheader.h" #include "main/imports.h" #include "main/mtypes.h" @@ -45,32 +46,39 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_chipset.h" #include "radeon_macros.h" #include "radeon_screen.h" +#include "radeon_common.h" +#include "radeon_span.h" #if !RADEON_COMMON #include "radeon_context.h" -#include "radeon_span.h" #include "radeon_tex.h" #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) #include "r200_context.h" #include "r200_ioctl.h" -#include "r200_span.h" #include "r200_tex.h" #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) #include "r300_context.h" #include "r300_fragprog.h" #include "r300_tex.h" -#include "radeon_span.h" #endif #include "utils.h" #include "vblank.h" #include "drirenderbuffer.h" +#include "radeon_bocs_wrapper.h" + #include "GL/internal/dri_interface.h" /* Radeon configuration */ #include "xmlpool.h" +#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \ +DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \ + DRI_CONF_DESC(en,"Size of command buffer (in KB)") \ + DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \ +DRI_CONF_OPT_END + #if !RADEON_COMMON /* R100 */ PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN @@ -80,6 +88,7 @@ DRI_CONF_BEGIN DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) DRI_CONF_MAX_TEXTURE_UNITS(3,2,3) DRI_CONF_HYPERZ(false) + DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32) DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB) @@ -95,7 +104,7 @@ DRI_CONF_BEGIN DRI_CONF_NO_RAST(false) DRI_CONF_SECTION_END DRI_CONF_END; -static const GLuint __driNConfigOptions = 14; +static const GLuint __driNConfigOptions = 15; #elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) @@ -107,6 +116,7 @@ DRI_CONF_BEGIN DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) DRI_CONF_MAX_TEXTURE_UNITS(6,2,6) DRI_CONF_HYPERZ(false) + DRI_CONF_COMMAND_BUFFER_SIZE(8, 8, 32) DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB) @@ -126,7 +136,7 @@ DRI_CONF_BEGIN DRI_CONF_NV_VERTEX_PROGRAM(false) DRI_CONF_SECTION_END DRI_CONF_END; -static const GLuint __driNConfigOptions = 16; +static const GLuint __driNConfigOptions = 17; extern const struct dri_extension blend_extensions[]; extern const struct dri_extension ARB_vp_extension[]; @@ -149,11 +159,7 @@ DRI_CONF_OPT_BEGIN_V(texture_coord_units,int,def, # min ":" # max ) \ DRI_CONF_DESC(de,"Anzahl der Texturkoordinateneinheiten") \ DRI_CONF_OPT_END -#define DRI_CONF_COMMAND_BUFFER_SIZE(def,min,max) \ -DRI_CONF_OPT_BEGIN_V(command_buffer_size,int,def, # min ":" # max ) \ - DRI_CONF_DESC(en,"Size of command buffer (in KB)") \ - DRI_CONF_DESC(de,"Grösse des Befehlspuffers (in KB)") \ -DRI_CONF_OPT_END + #define DRI_CONF_DISABLE_S3TC(def) \ DRI_CONF_OPT_BEGIN(disable_s3tc,bool,def) \ @@ -209,7 +215,6 @@ static const GLuint __driNConfigOptions = 17; extern const struct dri_extension gl_20_extension[]; #ifndef RADEON_DEBUG -int RADEON_DEBUG = 0; static const struct dri_debug_control debug_control[] = { {"fall", DEBUG_FALLBACKS}, @@ -236,19 +241,36 @@ static const struct dri_debug_control debug_control[] = { #endif /* RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) */ extern const struct dri_extension card_extensions[]; +extern const struct dri_extension mm_extensions[]; static int getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ); static int -radeonGetParam(int fd, int param, void *value) +radeonGetParam(__DRIscreenPrivate *sPriv, int param, void *value) { int ret; - drm_radeon_getparam_t gp; - - gp.param = param; - gp.value = value; + drm_radeon_getparam_t gp = { 0 }; + struct drm_radeon_info info = { 0 }; + + if (sPriv->drm_version.major >= 2) { + info.value = (uint64_t)value; + switch (param) { + case RADEON_PARAM_DEVICE_ID: + info.request = RADEON_INFO_DEVICE_ID; + break; + case RADEON_PARAM_NUM_GB_PIPES: + info.request = RADEON_INFO_NUM_GB_PIPES; + break; + default: + return -EINVAL; + } + ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_INFO, &info, sizeof(info)); + } else { + gp.param = param; + gp.value = value; - ret = drmCommandWriteRead( fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); + ret = drmCommandWriteRead(sPriv->fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); + } return ret; } @@ -335,6 +357,12 @@ static const __DRItexOffsetExtension radeonTexOffsetExtension = { { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, radeonSetTexOffset, }; + +static const __DRItexBufferExtension radeonTexBufferExtension = { + { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, + radeonSetTexBuffer, + radeonSetTexBuffer2, +}; #endif #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) @@ -349,6 +377,12 @@ static const __DRItexOffsetExtension r200texOffsetExtension = { { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, r200SetTexOffset, }; + +static const __DRItexBufferExtension r200TexBufferExtension = { + { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, + r200SetTexBuffer, + r200SetTexBuffer2, +}; #endif #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) @@ -356,137 +390,19 @@ static const __DRItexOffsetExtension r300texOffsetExtension = { { __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION }, r300SetTexOffset, }; -#endif -/* Create the device specific screen private data struct. - */ -static radeonScreenPtr -radeonCreateScreen( __DRIscreenPrivate *sPriv ) -{ - radeonScreenPtr screen; - RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv; - unsigned char *RADEONMMIO; - int i; - int ret; - uint32_t temp = 0; - - if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) { - fprintf(stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n"); - return GL_FALSE; - } - - /* Allocate the private area */ - screen = (radeonScreenPtr) CALLOC( sizeof(*screen) ); - if ( !screen ) { - __driUtilMessage("%s: Could not allocate memory for screen structure", - __FUNCTION__); - return NULL; - } - -#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) - RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control); +static const __DRItexBufferExtension r300TexBufferExtension = { + { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, + r300SetTexBuffer, + r300SetTexBuffer2, +}; #endif - /* parse information in __driConfigOptions */ - driParseOptionInfo (&screen->optionCache, - __driConfigOptions, __driNConfigOptions); - - /* This is first since which regions we map depends on whether or - * not we are using a PCI card. - */ - screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP); - { - int ret; - ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BUFFER_OFFSET, - &screen->gart_buffer_offset); - - if (ret) { - FREE( screen ); - fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret); - return NULL; - } - - ret = radeonGetParam( sPriv->fd, RADEON_PARAM_GART_BASE, - &screen->gart_base); - if (ret) { - FREE( screen ); - fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret); - return NULL; - } - - ret = radeonGetParam( sPriv->fd, RADEON_PARAM_IRQ_NR, - &screen->irq); - if (ret) { - FREE( screen ); - fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret); - return NULL; - } - screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7); - screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11); - screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16); - screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18); - screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13); - screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15); - screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25); - } - - screen->mmio.handle = dri_priv->registerHandle; - screen->mmio.size = dri_priv->registerSize; - if ( drmMap( sPriv->fd, - screen->mmio.handle, - screen->mmio.size, - &screen->mmio.map ) ) { - FREE( screen ); - __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); - return NULL; - } - - RADEONMMIO = screen->mmio.map; - - screen->status.handle = dri_priv->statusHandle; - screen->status.size = dri_priv->statusSize; - if ( drmMap( sPriv->fd, - screen->status.handle, - screen->status.size, - &screen->status.map ) ) { - drmUnmap( screen->mmio.map, screen->mmio.size ); - FREE( screen ); - __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); - return NULL; - } - screen->scratch = (__volatile__ uint32_t *) - ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); - - screen->buffers = drmMapBufs( sPriv->fd ); - if ( !screen->buffers ) { - drmUnmap( screen->status.map, screen->status.size ); - drmUnmap( screen->mmio.map, screen->mmio.size ); - FREE( screen ); - __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ ); - return NULL; - } - - if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { - screen->gartTextures.handle = dri_priv->gartTexHandle; - screen->gartTextures.size = dri_priv->gartTexMapSize; - if ( drmMap( sPriv->fd, - screen->gartTextures.handle, - screen->gartTextures.size, - (drmAddressPtr)&screen->gartTextures.map ) ) { - drmUnmapBufs( screen->buffers ); - drmUnmap( screen->status.map, screen->status.size ); - drmUnmap( screen->mmio.map, screen->mmio.size ); - FREE( screen ); - __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); - return NULL; - } - - screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; - } - +static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) +{ + screen->device_id = device_id; screen->chip_flags = 0; - /* XXX: add more chipsets */ - switch ( dri_priv->deviceID ) { + switch ( device_id ) { case PCI_CHIP_RADEON_LY: case PCI_CHIP_RADEON_LZ: case PCI_CHIP_RADEON_QY: @@ -561,7 +477,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->chip_family = CHIP_FAMILY_RS300; break; - case PCI_CHIP_R300_AD: case PCI_CHIP_R300_AE: case PCI_CHIP_R300_AF: @@ -821,9 +736,145 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", - dri_priv->deviceID); + device_id); + return -1; + } + + return 0; +} + + +/* Create the device specific screen private data struct. + */ +static radeonScreenPtr +radeonCreateScreen( __DRIscreenPrivate *sPriv ) +{ + radeonScreenPtr screen; + RADEONDRIPtr dri_priv = (RADEONDRIPtr)sPriv->pDevPriv; + unsigned char *RADEONMMIO = NULL; + int i; + int ret; + uint32_t temp = 0; + + if (sPriv->devPrivSize != sizeof(RADEONDRIRec)) { + fprintf(stderr,"\nERROR! sizeof(RADEONDRIRec) does not match passed size from device driver\n"); + return GL_FALSE; + } + + /* Allocate the private area */ + screen = (radeonScreenPtr) CALLOC( sizeof(*screen) ); + if ( !screen ) { + __driUtilMessage("%s: Could not allocate memory for screen structure", + __FUNCTION__); return NULL; } + +#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) + RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control); +#endif + + /* parse information in __driConfigOptions */ + driParseOptionInfo (&screen->optionCache, + __driConfigOptions, __driNConfigOptions); + + /* This is first since which regions we map depends on whether or + * not we are using a PCI card. + */ + screen->card_type = (dri_priv->IsPCI ? RADEON_CARD_PCI : RADEON_CARD_AGP); + { + int ret; + + ret = radeonGetParam(sPriv, RADEON_PARAM_GART_BUFFER_OFFSET, + &screen->gart_buffer_offset); + + if (ret) { + FREE( screen ); + fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BUFFER_OFFSET): %d\n", ret); + return NULL; + } + + ret = radeonGetParam(sPriv, RADEON_PARAM_GART_BASE, + &screen->gart_base); + if (ret) { + FREE( screen ); + fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_GART_BASE): %d\n", ret); + return NULL; + } + + ret = radeonGetParam(sPriv, RADEON_PARAM_IRQ_NR, + &screen->irq); + if (ret) { + FREE( screen ); + fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_IRQ_NR): %d\n", ret); + return NULL; + } + screen->drmSupportsCubeMapsR200 = (sPriv->drm_version.minor >= 7); + screen->drmSupportsBlendColor = (sPriv->drm_version.minor >= 11); + screen->drmSupportsTriPerf = (sPriv->drm_version.minor >= 16); + screen->drmSupportsFragShader = (sPriv->drm_version.minor >= 18); + screen->drmSupportsPointSprites = (sPriv->drm_version.minor >= 13); + screen->drmSupportsCubeMapsR100 = (sPriv->drm_version.minor >= 15); + screen->drmSupportsVertexProgram = (sPriv->drm_version.minor >= 25); + } + + screen->mmio.handle = dri_priv->registerHandle; + screen->mmio.size = dri_priv->registerSize; + if ( drmMap( sPriv->fd, + screen->mmio.handle, + screen->mmio.size, + &screen->mmio.map ) ) { + FREE( screen ); + __driUtilMessage("%s: drmMap failed\n", __FUNCTION__ ); + return NULL; + } + + RADEONMMIO = screen->mmio.map; + + screen->status.handle = dri_priv->statusHandle; + screen->status.size = dri_priv->statusSize; + if ( drmMap( sPriv->fd, + screen->status.handle, + screen->status.size, + &screen->status.map ) ) { + drmUnmap( screen->mmio.map, screen->mmio.size ); + FREE( screen ); + __driUtilMessage("%s: drmMap (2) failed\n", __FUNCTION__ ); + return NULL; + } + screen->scratch = (__volatile__ uint32_t *) + ((GLubyte *)screen->status.map + RADEON_SCRATCH_REG_OFFSET); + + screen->buffers = drmMapBufs( sPriv->fd ); + if ( !screen->buffers ) { + drmUnmap( screen->status.map, screen->status.size ); + drmUnmap( screen->mmio.map, screen->mmio.size ); + FREE( screen ); + __driUtilMessage("%s: drmMapBufs failed\n", __FUNCTION__ ); + return NULL; + } + + if ( dri_priv->gartTexHandle && dri_priv->gartTexMapSize ) { + screen->gartTextures.handle = dri_priv->gartTexHandle; + screen->gartTextures.size = dri_priv->gartTexMapSize; + if ( drmMap( sPriv->fd, + screen->gartTextures.handle, + screen->gartTextures.size, + (drmAddressPtr)&screen->gartTextures.map ) ) { + drmUnmapBufs( screen->buffers ); + drmUnmap( screen->status.map, screen->status.size ); + drmUnmap( screen->mmio.map, screen->mmio.size ); + FREE( screen ); + __driUtilMessage("%s: drmMap failed for GART texture area\n", __FUNCTION__); + return NULL; + } + + screen->gart_texture_offset = dri_priv->gartTexOffset + screen->gart_base; + } + + ret = radeon_set_screen_flags(screen, dri_priv->deviceID); + if (ret == -1) + return NULL; + if ((screen->chip_family == CHIP_FAMILY_R350 || screen->chip_family == CHIP_FAMILY_R300) && sPriv->ddx_version.minor < 2) { fprintf(stderr, "xf86-video-ati-6.6.2 or newer needed for Radeon 9500/9700/9800 cards.\n"); @@ -848,8 +899,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->cpp = dri_priv->bpp / 8; screen->AGPMode = dri_priv->AGPMode; - ret = radeonGetParam( sPriv->fd, RADEON_PARAM_FB_LOCATION, - &temp); + ret = radeonGetParam(sPriv, RADEON_PARAM_FB_LOCATION, &temp); if (ret) { if (screen->chip_family < CHIP_FAMILY_RS600) screen->fbLocation = ( INREG( RADEON_MC_FB_LOCATION ) & 0xffff) << 16; @@ -863,8 +913,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) } if (screen->chip_family >= CHIP_FAMILY_R300) { - ret = radeonGetParam( sPriv->fd, RADEON_PARAM_NUM_GB_PIPES, - &temp); + ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp); if (ret) { fprintf(stderr, "Unable to get num_pipes, need newer drm\n"); switch (screen->chip_family) { @@ -901,7 +950,6 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) default: break; } - } if ( sPriv->drm_version.minor >= 10 ) { @@ -971,7 +1019,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) #if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) if (IS_R200_CLASS(screen)) - screen->extensions[i++] = &r200AllocateExtension.base; + screen->extensions[i++] = &r200AllocateExtension.base; screen->extensions[i++] = &r200texOffsetExtension.base; #endif @@ -985,6 +1033,153 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) screen->driScreen = sPriv; screen->sarea_priv_offset = dri_priv->sarea_priv_offset; + screen->sarea = (drm_radeon_sarea_t *) ((GLubyte *) sPriv->pSAREA + + screen->sarea_priv_offset); + + screen->bom = radeon_bo_manager_legacy_ctor(screen); + if (screen->bom == NULL) { + free(screen); + return NULL; + } + + return screen; +} + +static radeonScreenPtr +radeonCreateScreen2(__DRIscreenPrivate *sPriv) +{ + radeonScreenPtr screen; + int i; + int ret; + uint32_t device_id = 0; + uint32_t temp = 0; + + /* Allocate the private area */ + screen = (radeonScreenPtr) CALLOC( sizeof(*screen) ); + if ( !screen ) { + __driUtilMessage("%s: Could not allocate memory for screen structure", + __FUNCTION__); + fprintf(stderr, "leaving here\n"); + return NULL; + } + +#if DO_DEBUG && RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) + RADEON_DEBUG = driParseDebugString(getenv("RADEON_DEBUG"), debug_control); +#endif + + /* parse information in __driConfigOptions */ + driParseOptionInfo (&screen->optionCache, + __driConfigOptions, __driNConfigOptions); + + screen->kernel_mm = 1; + screen->chip_flags = 0; + + /* if we have kms we can support all of these */ + screen->drmSupportsCubeMapsR200 = 1; + screen->drmSupportsBlendColor = 1; + screen->drmSupportsTriPerf = 1; + screen->drmSupportsFragShader = 1; + screen->drmSupportsPointSprites = 1; + screen->drmSupportsCubeMapsR100 = 1; + screen->drmSupportsVertexProgram = 1; + screen->irq = 1; + + ret = radeonGetParam(sPriv, RADEON_PARAM_DEVICE_ID, &device_id); + if (ret) { + FREE( screen ); + fprintf(stderr, "drm_radeon_getparam_t (RADEON_PARAM_DEVICE_ID): %d\n", ret); + return NULL; + } + + ret = radeon_set_screen_flags(screen, device_id); + if (ret == -1) + return NULL; + + if (screen->chip_family >= CHIP_FAMILY_R300) { + ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp); + if (ret) { + fprintf(stderr, "Unable to get num_pipes, need newer drm\n"); + switch (screen->chip_family) { + case CHIP_FAMILY_R300: + case CHIP_FAMILY_R350: + screen->num_gb_pipes = 2; + break; + case CHIP_FAMILY_R420: + case CHIP_FAMILY_R520: + case CHIP_FAMILY_R580: + case CHIP_FAMILY_RV560: + case CHIP_FAMILY_RV570: + screen->num_gb_pipes = 4; + break; + case CHIP_FAMILY_RV350: + case CHIP_FAMILY_RV515: + case CHIP_FAMILY_RV530: + case CHIP_FAMILY_RV410: + default: + screen->num_gb_pipes = 1; + break; + } + } else { + screen->num_gb_pipes = temp; + } + + /* pipe overrides */ + switch (device_id) { + case PCI_CHIP_R300_AD: /* 9500 with 1 quadpipe verified by: Reid Linnemann <lreid@cs.okstate.edu> */ + case PCI_CHIP_RV410_5E4C: /* RV410 SE only have 1 quadpipe */ + case PCI_CHIP_RV410_5E4F: /* RV410 SE only have 1 quadpipe */ + screen->num_gb_pipes = 1; + break; + default: + break; + } + + } + + if (screen->chip_family <= CHIP_FAMILY_RS200) + screen->chip_flags |= RADEON_CLASS_R100; + else if (screen->chip_family <= CHIP_FAMILY_RV280) + screen->chip_flags |= RADEON_CLASS_R200; + else + screen->chip_flags |= RADEON_CLASS_R300; + + if (getenv("R300_NO_TCL")) + screen->chip_flags &= ~RADEON_CHIPSET_TCL; + + i = 0; + screen->extensions[i++] = &driCopySubBufferExtension.base; + screen->extensions[i++] = &driFrameTrackingExtension.base; + screen->extensions[i++] = &driReadDrawableExtension; + + if ( screen->irq != 0 ) { + screen->extensions[i++] = &driSwapControlExtension.base; + screen->extensions[i++] = &driMediaStreamCounterExtension.base; + } + +#if !RADEON_COMMON + screen->extensions[i++] = &radeonTexBufferExtension.base; +#endif + +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + if (IS_R200_CLASS(screen)) + screen->extensions[i++] = &r200AllocateExtension.base; + + screen->extensions[i++] = &r200TexBufferExtension.base; +#endif + +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) + screen->extensions[i++] = &r300TexBufferExtension.base; +#endif + + screen->extensions[i++] = NULL; + sPriv->extensions = screen->extensions; + + screen->driScreen = sPriv; + screen->bom = radeon_bo_manager_gem_ctor(sPriv->fd); + if (screen->bom == NULL) { + free(screen); + return NULL; + } return screen; } @@ -993,23 +1188,32 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv ) static void radeonDestroyScreen( __DRIscreenPrivate *sPriv ) { - radeonScreenPtr screen = (radeonScreenPtr)sPriv->private; + radeonScreenPtr screen = (radeonScreenPtr)sPriv->private; - if (!screen) - return; + if (!screen) + return; - if ( screen->gartTextures.map ) { - drmUnmap( screen->gartTextures.map, screen->gartTextures.size ); - } - drmUnmapBufs( screen->buffers ); - drmUnmap( screen->status.map, screen->status.size ); - drmUnmap( screen->mmio.map, screen->mmio.size ); + if (screen->kernel_mm) { +#ifdef RADEON_BO_TRACK + radeon_tracker_print(&screen->bom->tracker, stderr); +#endif + radeon_bo_manager_gem_dtor(screen->bom); + } else { + radeon_bo_manager_legacy_dtor(screen->bom); + + if ( screen->gartTextures.map ) { + drmUnmap( screen->gartTextures.map, screen->gartTextures.size ); + } + drmUnmapBufs( screen->buffers ); + drmUnmap( screen->status.map, screen->status.size ); + drmUnmap( screen->mmio.map, screen->mmio.size ); + } - /* free all option information */ - driDestroyOptionInfo (&screen->optionCache); + /* free all option information */ + driDestroyOptionInfo (&screen->optionCache); - FREE( screen ); - sPriv->private = NULL; + FREE( screen ); + sPriv->private = NULL; } @@ -1018,16 +1222,21 @@ radeonDestroyScreen( __DRIscreenPrivate *sPriv ) static GLboolean radeonInitDriver( __DRIscreenPrivate *sPriv ) { - sPriv->private = (void *) radeonCreateScreen( sPriv ); - if ( !sPriv->private ) { - radeonDestroyScreen( sPriv ); - return GL_FALSE; - } + if (sPriv->dri2.enabled) { + sPriv->private = (void *) radeonCreateScreen2( sPriv ); + } else { + sPriv->private = (void *) radeonCreateScreen( sPriv ); + } + if ( !sPriv->private ) { + radeonDestroyScreen( sPriv ); + return GL_FALSE; + } - return GL_TRUE; + return GL_TRUE; } + /** * Create the Mesa framebuffer and renderbuffers for a given window/drawable. * @@ -1040,101 +1249,111 @@ radeonCreateBuffer( __DRIscreenPrivate *driScrnPriv, const __GLcontextModes *mesaVis, GLboolean isPixmap ) { - radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private; + radeonScreenPtr screen = (radeonScreenPtr) driScrnPriv->private; + + const GLboolean swDepth = GL_FALSE; + const GLboolean swAlpha = GL_FALSE; + const GLboolean swAccum = mesaVis->accumRedBits > 0; + const GLboolean swStencil = mesaVis->stencilBits > 0 && + mesaVis->depthBits != 24; + GLenum rgbFormat; + struct radeon_framebuffer *rfb; - if (isPixmap) { + if (isPixmap) return GL_FALSE; /* not implemented */ - } - else { - const GLboolean swDepth = GL_FALSE; - const GLboolean swAlpha = GL_FALSE; - const GLboolean swAccum = mesaVis->accumRedBits > 0; - const GLboolean swStencil = mesaVis->stencilBits > 0 && - mesaVis->depthBits != 24; - struct gl_framebuffer *fb = _mesa_create_framebuffer(mesaVis); - - /* front color renderbuffer */ - { - driRenderbuffer *frontRb - = driNewRenderbuffer(GL_RGBA, - driScrnPriv->pFB + screen->frontOffset, - screen->cpp, - screen->frontOffset, screen->frontPitch, - driDrawPriv); - radeonSetSpanFunctions(frontRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_FRONT_LEFT, &frontRb->Base); - } - /* back color renderbuffer */ - if (mesaVis->doubleBufferMode) { - driRenderbuffer *backRb - = driNewRenderbuffer(GL_RGBA, - driScrnPriv->pFB + screen->backOffset, - screen->cpp, - screen->backOffset, screen->backPitch, - driDrawPriv); - radeonSetSpanFunctions(backRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_BACK_LEFT, &backRb->Base); - } + rfb = CALLOC_STRUCT(radeon_framebuffer); + if (!rfb) + return GL_FALSE; - /* depth renderbuffer */ - if (mesaVis->depthBits == 16) { - driRenderbuffer *depthRb - = driNewRenderbuffer(GL_DEPTH_COMPONENT16, - driScrnPriv->pFB + screen->depthOffset, - screen->cpp, - screen->depthOffset, screen->depthPitch, - driDrawPriv); - radeonSetSpanFunctions(depthRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); - depthRb->depthHasSurface = screen->depthHasSurface; - } - else if (mesaVis->depthBits == 24) { - driRenderbuffer *depthRb - = driNewRenderbuffer(GL_DEPTH_COMPONENT24, - driScrnPriv->pFB + screen->depthOffset, - screen->cpp, - screen->depthOffset, screen->depthPitch, - driDrawPriv); - radeonSetSpanFunctions(depthRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_DEPTH, &depthRb->Base); - depthRb->depthHasSurface = screen->depthHasSurface; - } + _mesa_initialize_framebuffer(&rfb->base, mesaVis); + + if (mesaVis->redBits == 5) + rgbFormat = GL_RGB5; + else if (mesaVis->alphaBits == 0) + rgbFormat = GL_RGB8; + else + rgbFormat = GL_RGBA8; + + /* front color renderbuffer */ + rfb->color_rb[0] = radeon_create_renderbuffer(rgbFormat, driDrawPriv); + _mesa_add_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT, &rfb->color_rb[0]->base); + rfb->color_rb[0]->has_surface = 1; + + /* back color renderbuffer */ + if (mesaVis->doubleBufferMode) { + rfb->color_rb[1] = radeon_create_renderbuffer(rgbFormat, driDrawPriv); + _mesa_add_renderbuffer(&rfb->base, BUFFER_BACK_LEFT, &rfb->color_rb[1]->base); + rfb->color_rb[1]->has_surface = 1; + } - /* stencil renderbuffer */ - if (mesaVis->stencilBits > 0 && !swStencil) { - driRenderbuffer *stencilRb - = driNewRenderbuffer(GL_STENCIL_INDEX8_EXT, - driScrnPriv->pFB + screen->depthOffset, - screen->cpp, - screen->depthOffset, screen->depthPitch, - driDrawPriv); - radeonSetSpanFunctions(stencilRb, mesaVis); - _mesa_add_renderbuffer(fb, BUFFER_STENCIL, &stencilRb->Base); - stencilRb->depthHasSurface = screen->depthHasSurface; + if (mesaVis->depthBits == 24) { + if (mesaVis->stencilBits == 8) { + struct radeon_renderbuffer *depthStencilRb = radeon_create_renderbuffer(GL_DEPTH24_STENCIL8_EXT, driDrawPriv); + _mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depthStencilRb->base); + _mesa_add_renderbuffer(&rfb->base, BUFFER_STENCIL, &depthStencilRb->base); + depthStencilRb->has_surface = screen->depthHasSurface; + } else { + /* depth renderbuffer */ + struct radeon_renderbuffer *depth = radeon_create_renderbuffer(GL_DEPTH_COMPONENT24, driDrawPriv); + _mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base); + depth->has_surface = screen->depthHasSurface; } + } else if (mesaVis->depthBits == 16) { + /* just 16-bit depth buffer, no hw stencil */ + struct radeon_renderbuffer *depth = radeon_create_renderbuffer(GL_DEPTH_COMPONENT16, driDrawPriv); + _mesa_add_renderbuffer(&rfb->base, BUFFER_DEPTH, &depth->base); + depth->has_surface = screen->depthHasSurface; + } - _mesa_add_soft_renderbuffers(fb, - GL_FALSE, /* color */ - swDepth, - swStencil, - swAccum, - swAlpha, - GL_FALSE /* aux */); - driDrawPriv->driverPrivate = (void *) fb; + _mesa_add_soft_renderbuffers(&rfb->base, + GL_FALSE, /* color */ + swDepth, + swStencil, + swAccum, + swAlpha, + GL_FALSE /* aux */); + driDrawPriv->driverPrivate = (void *) rfb; - return (driDrawPriv->driverPrivate != NULL); - } + return (driDrawPriv->driverPrivate != NULL); } -static void +static void radeon_cleanup_renderbuffers(struct radeon_framebuffer *rfb) +{ + struct radeon_renderbuffer *rb; + + rb = rfb->color_rb[0]; + if (rb && rb->bo) { + radeon_bo_unref(rb->bo); + rb->bo = NULL; + } + rb = rfb->color_rb[1]; + if (rb && rb->bo) { + radeon_bo_unref(rb->bo); + rb->bo = NULL; + } + rb = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH); + if (rb && rb->bo) { + radeon_bo_unref(rb->bo); + rb->bo = NULL; + } +} + +void radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv) { - _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); + struct radeon_framebuffer *rfb; + if (!driDrawPriv) + return; + + rfb = (void*)driDrawPriv->driverPrivate; + if (!rfb) + return; + radeon_cleanup_renderbuffers(rfb); + _mesa_reference_framebuffer((GLframebuffer **)(&(driDrawPriv->driverPrivate)), NULL); } -#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) /** * Choose the appropriate CreateContext function based on the chipset. * Eventually, all drivers will go through this process. @@ -1145,25 +1364,22 @@ static GLboolean radeonCreateContext(const __GLcontextModes * glVisual, { __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private); - +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300) if (IS_R300_CLASS(screen)) return r300CreateContext(glVisual, driContextPriv, sharedContextPriv); - return GL_FALSE; -} - -/** - * Choose the appropriate DestroyContext function based on the chipset. - */ -static void radeonDestroyContext(__DRIcontextPrivate * driContextPriv) -{ - radeonContextPtr radeon = (radeonContextPtr) driContextPriv->driverPrivate; - - if (IS_R300_CLASS(radeon->radeonScreen)) - return r300DestroyContext(driContextPriv); -} +#endif +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + if (IS_R200_CLASS(screen)) + return r200CreateContext(glVisual, driContextPriv, sharedContextPriv); +#endif +#if !RADEON_COMMON + (void)screen; + return r100CreateContext(glVisual, driContextPriv, sharedContextPriv); #endif + return GL_FALSE; +} /** @@ -1225,13 +1441,103 @@ radeonInitScreen(__DRIscreenPrivate *psp) if (!radeonInitDriver(psp)) return NULL; + /* for now fill in all modes */ return radeonFillInModes( psp, dri_priv->bpp, (dri_priv->bpp == 16) ? 16 : 24, - (dri_priv->bpp == 16) ? 0 : 8, - (dri_priv->backOffset != dri_priv->depthOffset) ); + (dri_priv->bpp == 16) ? 0 : 8, 1); } +#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0])) + +/** + * This is the driver specific part of the createNewScreen entry point. + * Called when using DRI2. + * + * \return the __GLcontextModes supported by this driver + */ +static const +__DRIconfig **radeonInitScreen2(__DRIscreenPrivate *psp) +{ + GLenum fb_format[3]; + GLenum fb_type[3]; + /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't + * support pageflipping at all. + */ + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML, /*, GLX_SWAP_COPY_OML*/ + }; + uint8_t depth_bits[4], stencil_bits[4], msaa_samples_array[1]; + int color; + __DRIconfig **configs = NULL; + + /* Calling driInitExtensions here, with a NULL context pointer, + * does not actually enable the extensions. It just makes sure + * that all the dispatch offsets for all the extensions that + * *might* be enables are known. This is needed because the + * dispatch offsets need to be known when _mesa_context_create + * is called, but we can't enable the extensions until we have a + * context pointer. + * + * Hello chicken. Hello egg. How are you two today? + */ + driInitExtensions( NULL, card_extensions, GL_FALSE ); + driInitExtensions( NULL, mm_extensions, GL_FALSE ); +#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) + driInitExtensions( NULL, blend_extensions, GL_FALSE ); + driInitSingleExtension( NULL, ARB_vp_extension ); + driInitSingleExtension( NULL, NV_vp_extension ); + driInitSingleExtension( NULL, ATI_fs_extension ); + driInitExtensions( NULL, point_extensions, GL_FALSE ); +#endif + + if (!radeonInitDriver(psp)) { + return NULL; + } + depth_bits[0] = 0; + stencil_bits[0] = 0; + depth_bits[1] = 16; + stencil_bits[1] = 0; + depth_bits[2] = 24; + stencil_bits[2] = 0; + depth_bits[3] = 24; + stencil_bits[3] = 8; + + msaa_samples_array[0] = 0; + + fb_format[0] = GL_RGB; + fb_type[0] = GL_UNSIGNED_SHORT_5_6_5; + + fb_format[1] = GL_BGR; + fb_type[1] = GL_UNSIGNED_INT_8_8_8_8_REV; + + fb_format[2] = GL_BGRA; + fb_type[2] = GL_UNSIGNED_INT_8_8_8_8_REV; + + for (color = 0; color < ARRAY_SIZE(fb_format); color++) { + __DRIconfig **new_configs; + + new_configs = driCreateConfigs(fb_format[color], fb_type[color], + depth_bits, + stencil_bits, + ARRAY_SIZE(depth_bits), + back_buffer_modes, + ARRAY_SIZE(back_buffer_modes), + msaa_samples_array, + ARRAY_SIZE(msaa_samples_array)); + if (configs == NULL) + configs = new_configs; + else + configs = driConcatConfigs(configs, new_configs); + } + if (configs == NULL) { + fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, + __LINE__); + return NULL; + } + + return (const __DRIconfig **)configs; +} /** * Get information about previous buffer swaps. @@ -1239,31 +1545,26 @@ radeonInitScreen(__DRIscreenPrivate *psp) static int getSwapInfo( __DRIdrawablePrivate *dPriv, __DRIswapInfo * sInfo ) { -#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)) - radeonContextPtr rmesa; -#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R200) - r200ContextPtr rmesa; -#endif + struct radeon_framebuffer *rfb; - if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL) - || (dPriv->driContextPriv->driverPrivate == NULL) - || (sInfo == NULL) ) { - return -1; + if ( (dPriv == NULL) || (dPriv->driContextPriv == NULL) + || (dPriv->driContextPriv->driverPrivate == NULL) + || (sInfo == NULL) ) { + return -1; } - rmesa = dPriv->driContextPriv->driverPrivate; - sInfo->swap_count = rmesa->swap_count; - sInfo->swap_ust = rmesa->swap_ust; - sInfo->swap_missed_count = rmesa->swap_missed_count; + rfb = dPriv->driverPrivate; + sInfo->swap_count = rfb->swap_count; + sInfo->swap_ust = rfb->swap_ust; + sInfo->swap_missed_count = rfb->swap_missed_count; sInfo->swap_missed_usage = (sInfo->swap_missed_count != 0) - ? driCalculateSwapUsage( dPriv, 0, rmesa->swap_missed_ust ) + ? driCalculateSwapUsage( dPriv, 0, rfb->swap_missed_ust ) : 0.0; return 0; } -#if !RADEON_COMMON || (RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)) const struct __DriverAPIRec driDriverAPI = { .InitScreen = radeonInitScreen, .DestroyScreen = radeonDestroyScreen, @@ -1280,23 +1581,7 @@ const struct __DriverAPIRec driDriverAPI = { .WaitForSBC = NULL, .SwapBuffersMSC = NULL, .CopySubBuffer = radeonCopySubBuffer, + /* DRI2 */ + .InitScreen2 = radeonInitScreen2, }; -#else -const struct __DriverAPIRec driDriverAPI = { - .InitScreen = radeonInitScreen, - .DestroyScreen = radeonDestroyScreen, - .CreateContext = r200CreateContext, - .DestroyContext = r200DestroyContext, - .CreateBuffer = radeonCreateBuffer, - .DestroyBuffer = radeonDestroyBuffer, - .SwapBuffers = r200SwapBuffers, - .MakeCurrent = r200MakeCurrent, - .UnbindContext = r200UnbindContext, - .GetSwapInfo = getSwapInfo, - .GetDrawableMSC = driDrawableGetMSC32, - .WaitForMSC = driWaitForMSC32, - .WaitForSBC = NULL, - .SwapBuffersMSC = NULL, - .CopySubBuffer = r200CopySubBuffer, -}; -#endif + diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h index b84c70bfae..fe5c7d875a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.h +++ b/src/mesa/drivers/dri/radeon/radeon_screen.h @@ -54,11 +54,12 @@ typedef struct { drmAddress map; /* Mapping of the DRM region */ } radeonRegionRec, *radeonRegionPtr; -typedef struct { +typedef struct radeon_screen { int chip_family; int chip_flags; int cpp; int card_type; + int device_id; /* PCI ID */ int AGPMode; unsigned int irq; /* IRQ number (0 means none) */ @@ -103,9 +104,12 @@ typedef struct { /* Configuration cache with default values for all contexts */ driOptionCache optionCache; - const __DRIextension *extensions[8]; + const __DRIextension *extensions[16]; int num_gb_pipes; + int kernel_mm; + drm_radeon_sarea_t *sarea; /* Private SAREA data */ + struct radeon_bo_manager *bom; } radeonScreenRec, *radeonScreenPtr; #define IS_R100_CLASS(screen) \ @@ -115,4 +119,5 @@ typedef struct { #define IS_R300_CLASS(screen) \ ((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R300) +extern void radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv); #endif /* __RADEON_SCREEN_H__ */ diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index 12051ff1c8..e28f28662b 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -43,46 +43,203 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/glheader.h" #include "swrast/swrast.h" -#include "radeon_context.h" -#include "radeon_ioctl.h" -#include "radeon_state.h" +#include "radeon_common.h" +#include "radeon_lock.h" #include "radeon_span.h" -#include "radeon_tex.h" - -#include "drirenderbuffer.h" #define DBG 0 +static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); + +static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; + GLint offset; + GLint nmacroblkpl; + GLint nmicroblkpl; + + if (rrb->has_surface || !(rrb->bo->flags & mask)) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + offset = 0; + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { + if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { + nmacroblkpl = rrb->pitch >> 5; + offset += ((y >> 4) * nmacroblkpl) << 11; + offset += ((y & 15) >> 1) << 8; + offset += (y & 1) << 4; + offset += (x >> 5) << 11; + offset += ((x & 31) >> 2) << 5; + offset += (x & 3) << 2; + } else { + nmacroblkpl = rrb->pitch >> 6; + offset += ((y >> 3) * nmacroblkpl) << 11; + offset += (y & 7) << 8; + offset += (x >> 6) << 11; + offset += ((x & 63) >> 3) << 5; + offset += (x & 7) << 2; + } + } else { + nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5; + offset += (y * nmicroblkpl) << 5; + offset += (x >> 3) << 5; + offset += (x & 7) << 2; + } + } + return &ptr[offset]; +} + +static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; + GLint offset; + GLint nmacroblkpl; + GLint nmicroblkpl; + + if (rrb->has_surface || !(rrb->bo->flags & mask)) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + offset = 0; + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { + if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { + nmacroblkpl = rrb->pitch >> 6; + offset += ((y >> 4) * nmacroblkpl) << 11; + offset += ((y & 15) >> 1) << 8; + offset += (y & 1) << 4; + offset += (x >> 6) << 11; + offset += ((x & 63) >> 3) << 5; + offset += (x & 7) << 1; + } else { + nmacroblkpl = rrb->pitch >> 7; + offset += ((y >> 3) * nmacroblkpl) << 11; + offset += (y & 7) << 8; + offset += (x >> 7) << 11; + offset += ((x & 127) >> 4) << 5; + offset += (x & 15) << 2; + } + } else { + nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5; + offset += (y * nmicroblkpl) << 5; + offset += (x >> 4) << 5; + offset += (x & 15) << 2; + } + } + return &ptr[offset]; +} + +static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb, + GLint x, GLint y) +{ + GLubyte *ptr = rrb->bo->ptr; + uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; + GLint offset; + GLint microblkxs; + GLint macroblkxs; + GLint nmacroblkpl; + GLint nmicroblkpl; + + if (rrb->has_surface || !(rrb->bo->flags & mask)) { + offset = x * rrb->cpp + y * rrb->pitch; + } else { + offset = 0; + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { + if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { + microblkxs = 16 / rrb->cpp; + macroblkxs = 128 / rrb->cpp; + nmacroblkpl = rrb->pitch / macroblkxs; + offset += ((y >> 4) * nmacroblkpl) << 11; + offset += ((y & 15) >> 1) << 8; + offset += (y & 1) << 4; + offset += (x / macroblkxs) << 11; + offset += ((x & (macroblkxs - 1)) / microblkxs) << 5; + offset += (x & (microblkxs - 1)) * rrb->cpp; + } else { + microblkxs = 32 / rrb->cpp; + macroblkxs = 256 / rrb->cpp; + nmacroblkpl = rrb->pitch / macroblkxs; + offset += ((y >> 3) * nmacroblkpl) << 11; + offset += (y & 7) << 8; + offset += (x / macroblkxs) << 11; + offset += ((x & (macroblkxs - 1)) / microblkxs) << 5; + offset += (x & (microblkxs - 1)) * rrb->cpp; + } + } else { + microblkxs = 32 / rrb->cpp; + nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5; + offset += (y * nmicroblkpl) << 5; + offset += (x / microblkxs) << 5; + offset += (x & (microblkxs - 1)) * rrb->cpp; + } + } + return &ptr[offset]; +} + +#ifndef COMPILE_R300 +static uint32_t +z24s8_to_s8z24(uint32_t val) +{ + return (val << 24) | (val >> 8); +} + +static uint32_t +s8z24_to_z24s8(uint32_t val) +{ + return (val >> 24) | (val << 8); +} +#endif + /* * Note that all information needed to access pixels in a renderbuffer * should be obtained through the gl_renderbuffer parameter, not per-context * information. */ #define LOCAL_VARS \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - const __DRIdrawablePrivate *dPriv = drb->dPriv; \ - const GLuint bottom = dPriv->h - 1; \ - GLubyte *buf = (GLubyte *) drb->flippedData \ - + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp; \ - GLuint p; \ - (void) p; + struct radeon_context *radeon = RADEON_CONTEXT(ctx); \ + struct radeon_renderbuffer *rrb = (void *) rb; \ + const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ + const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\ + unsigned int num_cliprects; \ + struct drm_clip_rect *cliprects; \ + int x_off, y_off; \ + GLuint p; \ + (void)p; \ + radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off); #define LOCAL_DEPTH_VARS \ - driRenderbuffer *drb = (driRenderbuffer *) rb; \ - const __DRIdrawablePrivate *dPriv = drb->dPriv; \ - const GLuint bottom = dPriv->h - 1; \ - GLuint xo = dPriv->x; \ - GLuint yo = dPriv->y; \ - GLubyte *buf = (GLubyte *) drb->Base.Data; + struct radeon_context *radeon = RADEON_CONTEXT(ctx); \ + struct radeon_renderbuffer *rrb = (void *) rb; \ + const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ + const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\ + unsigned int num_cliprects; \ + struct drm_clip_rect *cliprects; \ + int x_off, y_off; \ + radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off); #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS -#define Y_FLIP(Y) (bottom - (Y)) +#define Y_FLIP(_y) ((_y) * yScale + yBias) #define HW_LOCK() #define HW_UNLOCK() +/* XXX FBO: this is identical to the macro in spantmp2.h except we get + * the cliprect info from the context, not the driDrawable. + * Move this into spantmp2.h someday. + */ +#define HW_CLIPLOOP() \ + do { \ + int _nc = num_cliprects; \ + while ( _nc-- ) { \ + int minx = cliprects[_nc].x1 - x_off; \ + int miny = cliprects[_nc].y1 - y_off; \ + int maxx = cliprects[_nc].x2 - x_off; \ + int maxy = cliprects[_nc].y2 - y_off; + /* ================================================================ * Color buffer */ @@ -94,7 +251,21 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define TAG(x) radeon##x##_RGB565 #define TAG2(x,y) radeon##x##_RGB565##y -#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2) +#define GET_PTR(X,Y) radeon_ptr16(rrb, (X) + x_off, (Y) + y_off) +#include "spantmp2.h" + +/* 32 bit, xRGB8888 color spanline and pixel functions + */ +#define SPANTMP_PIXEL_FMT GL_BGRA +#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV + +#define TAG(x) radeon##x##_xRGB8888 +#define TAG2(x,y) radeon##x##_xRGB8888##y +#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) | 0xff000000)) +#define PUT_VALUE(_x, _y, d) { \ + GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \ + *_ptr = d; \ +} while (0) #include "spantmp2.h" /* 32 bit, ARGB8888 color spanline and pixel functions @@ -104,7 +275,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define TAG(x) radeon##x##_ARGB8888 #define TAG2(x,y) radeon##x##_ARGB8888##y -#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4) +#define GET_PTR(X,Y) radeon_ptr32(rrb, (X) + x_off, (Y) + y_off) #include "spantmp2.h" /* ================================================================ @@ -121,70 +292,20 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * too... */ -static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y) -{ - GLuint pitch = drb->pitch; - if (drb->depthHasSurface) { - return 4 * (x + y * pitch); - } else { - GLuint ba, address = 0; /* a[0..1] = 0 */ - -#ifdef COMPILE_R300 - ba = (y / 8) * (pitch / 8) + (x / 8); -#else - ba = (y / 16) * (pitch / 16) + (x / 16); -#endif - - address |= (x & 0x7) << 2; /* a[2..4] = x[0..2] */ - address |= (y & 0x3) << 5; /* a[5..6] = y[0..1] */ - address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5; /* a[7] = x[4] ^ y[2] */ - address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ - - address |= (y & 0x8) << 7; /* a[10] = y[3] */ - address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7; /* a[11] = x[3] ^ y[4] */ - address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ - - return address; - } -} - -static INLINE GLuint -radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) -{ - GLuint pitch = drb->pitch; - if (drb->depthHasSurface) { - return 2 * (x + y * pitch); - } else { - GLuint ba, address = 0; /* a[0] = 0 */ - - ba = (y / 16) * (pitch / 32) + (x / 32); - - address |= (x & 0x7) << 1; /* a[1..3] = x[0..2] */ - address |= (y & 0x7) << 4; /* a[4..6] = y[0..2] */ - address |= (x & 0x8) << 4; /* a[7] = x[3] */ - address |= (ba & 0x3) << 8; /* a[8..9] = ba[0..1] */ - address |= (y & 0x8) << 7; /* a[10] = y[3] */ - address |= ((x & 0x10) ^ (y & 0x10)) << 7; /* a[11] = x[4] ^ y[4] */ - address |= (ba & ~0x3) << 10; /* a[12..] = ba[2..] */ - - return address; - } -} - /* 16-bit depth buffer functions */ #define VALUE_TYPE GLushort #define WRITE_DEPTH( _x, _y, d ) \ - *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d; + *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off) = d #define READ_DEPTH( d, _x, _y ) \ - d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )); + d = *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off) #define TAG(x) radeon##x##_z16 #include "depthtmp.h" -/* 24 bit depth, 8 bit stencil depthbuffer functions +/* 24 bit depth * * Careful: It looks like the R300 uses ZZZS byte order while the R200 * uses SZZZ for 24 bit depth, 8 bit stencil mode. @@ -194,35 +315,76 @@ radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y) #ifdef COMPILE_R300 #define WRITE_DEPTH( _x, _y, d ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ + GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ tmp &= 0x000000ff; \ tmp |= ((d << 8) & 0xffffff00); \ - *(GLuint *)(buf + offset) = tmp; \ + *_ptr = tmp; \ } while (0) #else #define WRITE_DEPTH( _x, _y, d ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ + GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ tmp &= 0xff000000; \ tmp |= ((d) & 0x00ffffff); \ - *(GLuint *)(buf + offset) = tmp; \ + *_ptr = tmp; \ } while (0) #endif #ifdef COMPILE_R300 #define READ_DEPTH( d, _x, _y ) \ - do { \ - d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ - _y + yo )) & 0xffffff00) >> 8; \ + do { \ + d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \ }while(0) #else -#define READ_DEPTH( d, _x, _y ) \ - d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo, \ - _y + yo )) & 0x00ffffff; +#define READ_DEPTH( d, _x, _y ) \ + d = *(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; +#endif +/* + fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\ + d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff; +*/ +#define TAG(x) radeon##x##_z24 +#include "depthtmp.h" + +/* 24 bit depth, 8 bit stencil depthbuffer functions + * EXT_depth_stencil + * + * Careful: It looks like the R300 uses ZZZS byte order while the R200 + * uses SZZZ for 24 bit depth, 8 bit stencil mode. + */ +#define VALUE_TYPE GLuint + +#ifdef COMPILE_R300 +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \ + *_ptr = d; \ +} while (0) +#else +#define WRITE_DEPTH( _x, _y, d ) \ +do { \ + GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = z24s8_to_s8z24(d); \ + *_ptr = tmp; \ +} while (0) #endif +#ifdef COMPILE_R300 +#define READ_DEPTH( d, _x, _y ) \ + do { \ + d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off))); \ + }while(0) +#else +#define READ_DEPTH( d, _x, _y ) do { \ + d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off ))); \ + } while (0) +#endif +/* + fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\ + d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff; +*/ #define TAG(x) radeon##x##_z24_s8 #include "depthtmp.h" @@ -235,35 +397,35 @@ do { \ #ifdef COMPILE_R300 #define WRITE_STENCIL( _x, _y, d ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ + GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off); \ + GLuint tmp = *_ptr; \ tmp &= 0xffffff00; \ tmp |= (d) & 0xff; \ - *(GLuint *)(buf + offset) = tmp; \ + *_ptr = tmp; \ } while (0) #else #define WRITE_STENCIL( _x, _y, d ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ + GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off); \ + GLuint tmp = *_ptr; \ tmp &= 0x00ffffff; \ tmp |= (((d) & 0xff) << 24); \ - *(GLuint *)(buf + offset) = tmp; \ + *_ptr = tmp; \ } while (0) #endif #ifdef COMPILE_R300 #define READ_STENCIL( d, _x, _y ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ + GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ d = tmp & 0x000000ff; \ } while (0) #else #define READ_STENCIL( d, _x, _y ) \ do { \ - GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo ); \ - GLuint tmp = *(GLuint *)(buf + offset); \ + GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \ + GLuint tmp = *_ptr; \ d = (tmp & 0xff000000) >> 24; \ } while (0) #endif @@ -271,29 +433,105 @@ do { \ #define TAG(x) radeon##x##_z24_s8 #include "stenciltmp.h" -/* Move locking out to get reasonable span performance (10x better - * than doing this in HW_LOCK above). WaitForIdle() is the main - * culprit. - */ +static void map_unmap_rb(struct gl_renderbuffer *rb, int flag) +{ + struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb); + int r; + + if (rrb == NULL || !rrb->bo) + return; + + if (flag) { + r = radeon_bo_map(rrb->bo, 1); + if (r) { + fprintf(stderr, "(%s) error(%d) mapping buffer.\n", + __FUNCTION__, r); + } + + radeonSetSpanFunctions(rrb); + } else { + radeon_bo_unmap(rrb->bo); + rb->GetRow = NULL; + rb->PutRow = NULL; + } +} + +static void +radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map) +{ + GLuint i, j; + + /* color draw buffers */ + for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) + map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map); + + /* check for render to textures */ + for (i = 0; i < BUFFER_COUNT; i++) { + struct gl_renderbuffer_attachment *att = + ctx->DrawBuffer->Attachment + i; + struct gl_texture_object *tex = att->Texture; + if (tex) { + /* render to texture */ + ASSERT(att->Renderbuffer); + if (map) + ctx->Driver.MapTexture(ctx, tex); + else + ctx->Driver.UnmapTexture(ctx, tex); + } + } + + map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map); + + /* depth buffer (Note wrapper!) */ + if (ctx->DrawBuffer->_DepthBuffer) + map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map); + + if (ctx->DrawBuffer->_StencilBuffer) + map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map); + +} static void radeonSpanRenderStart(GLcontext * ctx) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); -#ifdef COMPILE_R300 - r300ContextPtr r300 = (r300ContextPtr) rmesa; - R300_FIREVERTICES(r300); -#else - RADEON_FIREVERTICES(rmesa); -#endif - LOCK_HARDWARE(rmesa); - radeonWaitForIdleLocked(rmesa); + int i; + + radeon_firevertices(rmesa); + + /* The locking and wait for idle should really only be needed in classic mode. + * In a future memory manager based implementation, this should become + * unnecessary due to the fact that mapping our buffers, textures, etc. + * should implicitly wait for any previous rendering commands that must + * be waited on. */ + if (!rmesa->radeonScreen->driScreen->dri2.enabled) { + LOCK_HARDWARE(rmesa); + radeonWaitForIdleLocked(rmesa); + } + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) + ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current); + } + + radeon_map_unmap_buffers(ctx, 1); + + + } static void radeonSpanRenderFinish(GLcontext * ctx) { radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + int i; _swrast_flush(ctx); - UNLOCK_HARDWARE(rmesa); + if (!rmesa->radeonScreen->driScreen->dri2.enabled) { + UNLOCK_HARDWARE(rmesa); + } + for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { + if (ctx->Texture.Unit[i]._ReallyEnabled) + ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current); + } + + radeon_map_unmap_buffers(ctx, 0); } void radeonInitSpanFuncs(GLcontext * ctx) @@ -307,20 +545,21 @@ void radeonInitSpanFuncs(GLcontext * ctx) /** * Plug in the Get/Put routines for the given driRenderbuffer. */ -void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis) +static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb) { - if (drb->Base.InternalFormat == GL_RGBA) { - if (vis->redBits == 5 && vis->greenBits == 6 - && vis->blueBits == 5) { - radeonInitPointers_RGB565(&drb->Base); - } else { - radeonInitPointers_ARGB8888(&drb->Base); - } - } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) { - radeonInitDepthPointers_z16(&drb->Base); - } else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) { - radeonInitDepthPointers_z24_s8(&drb->Base); - } else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) { - radeonInitStencilPointers_z24_s8(&drb->Base); + if (rrb->base._ActualFormat == GL_RGB5) { + radeonInitPointers_RGB565(&rrb->base); + } else if (rrb->base._ActualFormat == GL_RGB8) { + radeonInitPointers_xRGB8888(&rrb->base); + } else if (rrb->base._ActualFormat == GL_RGBA8) { + radeonInitPointers_ARGB8888(&rrb->base); + } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) { + radeonInitDepthPointers_z16(&rrb->base); + } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) { + radeonInitDepthPointers_z24(&rrb->base); + } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) { + radeonInitDepthPointers_z24_s8(&rrb->base); + } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) { + radeonInitStencilPointers_z24_s8(&rrb->base); } } diff --git a/src/mesa/drivers/dri/radeon/radeon_span.h b/src/mesa/drivers/dri/radeon/radeon_span.h index 9abe0864b1..ea6a2e7fb4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.h +++ b/src/mesa/drivers/dri/radeon/radeon_span.h @@ -42,9 +42,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef __RADEON_SPAN_H__ #define __RADEON_SPAN_H__ -#include "drirenderbuffer.h" - extern void radeonInitSpanFuncs(GLcontext * ctx); -extern void radeonSetSpanFunctions(driRenderbuffer * rb, const GLvisual * vis); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index b6561001e7..06b8c29936 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -47,6 +47,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "swrast_setup/swrast_setup.h" #include "radeon_context.h" +#include "radeon_mipmap_tree.h" #include "radeon_ioctl.h" #include "radeon_state.h" #include "radeon_tcl.h" @@ -62,7 +63,7 @@ static void radeonUpdateSpecular( GLcontext *ctx ); static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); int pp_misc = rmesa->hw.ctx.cmd[CTX_PP_MISC]; GLubyte refByte; @@ -106,7 +107,7 @@ static void radeonAlphaFunc( GLcontext *ctx, GLenum func, GLfloat ref ) static void radeonBlendEquationSeparate( GLcontext *ctx, GLenum modeRGB, GLenum modeA ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~RADEON_COMB_FCN_MASK; GLboolean fallback = GL_FALSE; @@ -147,8 +148,8 @@ static void radeonBlendFuncSeparate( GLcontext *ctx, GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorA, GLenum dfactorA ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & + r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint b = rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] & ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK); GLboolean fallback = GL_FALSE; @@ -257,7 +258,7 @@ static void radeonBlendFuncSeparate( GLcontext *ctx, static void radeonDepthFunc( GLcontext *ctx, GLenum func ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); RADEON_STATECHANGE( rmesa, ctx ); rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_Z_TEST_MASK; @@ -293,7 +294,7 @@ static void radeonDepthFunc( GLcontext *ctx, GLenum func ) static void radeonDepthMask( GLcontext *ctx, GLboolean flag ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); RADEON_STATECHANGE( rmesa, ctx ); if ( ctx->Depth.Mask ) { @@ -305,16 +306,16 @@ static void radeonDepthMask( GLcontext *ctx, GLboolean flag ) static void radeonClearDepth( GLcontext *ctx, GLclampd d ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint format = (rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] & RADEON_DEPTH_FORMAT_MASK); switch ( format ) { case RADEON_DEPTH_FORMAT_16BIT_INT_Z: - rmesa->state.depth.clear = d * 0x0000ffff; + rmesa->radeon.state.depth.clear = d * 0x0000ffff; break; case RADEON_DEPTH_FORMAT_24BIT_INT_Z: - rmesa->state.depth.clear = d * 0x00ffffff; + rmesa->radeon.state.depth.clear = d * 0x00ffffff; break; } } @@ -327,7 +328,7 @@ static void radeonClearDepth( GLcontext *ctx, GLclampd d ) static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); union { int i; float f; } c, d; GLchan col[4]; @@ -391,7 +392,7 @@ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) rmesa->hw.fog.cmd[FOG_D] = d.i; } break; - case GL_FOG_COLOR: + case GL_FOG_COLOR: RADEON_STATECHANGE( rmesa, ctx ); UNCLAMPED_FLOAT_TO_RGB_CHAN( col, ctx->Fog.Color ); rmesa->hw.ctx.cmd[CTX_PP_FOG_COLOR] &= ~RADEON_FOG_COLOR_MASK; @@ -406,109 +407,13 @@ static void radeonFogfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) } } - -/* ============================================================= - * Scissoring - */ - - -static GLboolean intersect_rect( drm_clip_rect_t *out, - drm_clip_rect_t *a, - drm_clip_rect_t *b ) -{ - *out = *a; - if ( b->x1 > out->x1 ) out->x1 = b->x1; - if ( b->y1 > out->y1 ) out->y1 = b->y1; - if ( b->x2 < out->x2 ) out->x2 = b->x2; - if ( b->y2 < out->y2 ) out->y2 = b->y2; - if ( out->x1 >= out->x2 ) return GL_FALSE; - if ( out->y1 >= out->y2 ) return GL_FALSE; - return GL_TRUE; -} - - -void radeonRecalcScissorRects( radeonContextPtr rmesa ) -{ - drm_clip_rect_t *out; - int i; - - /* Grow cliprect store? - */ - if (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) { - while (rmesa->state.scissor.numAllocedClipRects < rmesa->numClipRects) { - rmesa->state.scissor.numAllocedClipRects += 1; /* zero case */ - rmesa->state.scissor.numAllocedClipRects *= 2; - } - - if (rmesa->state.scissor.pClipRects) - FREE(rmesa->state.scissor.pClipRects); - - rmesa->state.scissor.pClipRects = - MALLOC( rmesa->state.scissor.numAllocedClipRects * - sizeof(drm_clip_rect_t) ); - - if ( rmesa->state.scissor.pClipRects == NULL ) { - rmesa->state.scissor.numAllocedClipRects = 0; - return; - } - } - - out = rmesa->state.scissor.pClipRects; - rmesa->state.scissor.numClipRects = 0; - - for ( i = 0 ; i < rmesa->numClipRects ; i++ ) { - if ( intersect_rect( out, - &rmesa->pClipRects[i], - &rmesa->state.scissor.rect ) ) { - rmesa->state.scissor.numClipRects++; - out++; - } - } -} - - -static void radeonUpdateScissor( GLcontext *ctx ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - - if ( rmesa->dri.drawable ) { - __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; - - int x = ctx->Scissor.X; - int y = dPriv->h - ctx->Scissor.Y - ctx->Scissor.Height; - int w = ctx->Scissor.X + ctx->Scissor.Width - 1; - int h = dPriv->h - ctx->Scissor.Y - 1; - - rmesa->state.scissor.rect.x1 = x + dPriv->x; - rmesa->state.scissor.rect.y1 = y + dPriv->y; - rmesa->state.scissor.rect.x2 = w + dPriv->x + 1; - rmesa->state.scissor.rect.y2 = h + dPriv->y + 1; - - radeonRecalcScissorRects( rmesa ); - } -} - - -static void radeonScissor( GLcontext *ctx, - GLint x, GLint y, GLsizei w, GLsizei h ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - - if ( ctx->Scissor.Enabled ) { - RADEON_FIREVERTICES( rmesa ); /* don't pipeline cliprect changes */ - radeonUpdateScissor( ctx ); - } - -} - - /* ============================================================= * Culling */ static void radeonCullFace( GLcontext *ctx, GLenum unused ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL]; GLuint t = rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL]; @@ -545,7 +450,7 @@ static void radeonCullFace( GLcontext *ctx, GLenum unused ) static void radeonFrontFace( GLcontext *ctx, GLenum mode ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); RADEON_STATECHANGE( rmesa, set ); rmesa->hw.set.cmd[SET_SE_CNTL] &= ~RADEON_FFACE_CULL_DIR_MASK; @@ -570,7 +475,7 @@ static void radeonFrontFace( GLcontext *ctx, GLenum mode ) */ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); RADEON_STATECHANGE( rmesa, lin ); RADEON_STATECHANGE( rmesa, set ); @@ -587,10 +492,10 @@ static void radeonLineWidth( GLcontext *ctx, GLfloat widthf ) static void radeonLineStipple( GLcontext *ctx, GLint factor, GLushort pattern ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); RADEON_STATECHANGE( rmesa, lin ); - rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = + rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] = ((((GLuint)factor & 0xff) << 16) | ((GLuint)pattern)); } @@ -602,8 +507,8 @@ static void radeonColorMask( GLcontext *ctx, GLboolean r, GLboolean g, GLboolean b, GLboolean a ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - GLuint mask = radeonPackColor( rmesa->radeonScreen->cpp, + r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint mask = radeonPackColor( rmesa->radeon.radeonScreen->cpp, ctx->Color.ColorMask[RCOMP], ctx->Color.ColorMask[GCOMP], ctx->Color.ColorMask[BCOMP], @@ -623,8 +528,9 @@ static void radeonColorMask( GLcontext *ctx, static void radeonPolygonOffset( GLcontext *ctx, GLfloat factor, GLfloat units ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - float_ui32_type constant = { units * rmesa->state.depth.scale }; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + float_ui32_type constant = { units * depthScale }; float_ui32_type factoru = { factor }; RADEON_STATECHANGE( rmesa, zbs ); @@ -634,7 +540,7 @@ static void radeonPolygonOffset( GLcontext *ctx, static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint i; drm_radeon_stipple_t stipple; @@ -646,27 +552,27 @@ static void radeonPolygonStipple( GLcontext *ctx, const GLubyte *mask ) /* TODO: push this into cmd mechanism */ - RADEON_FIREVERTICES( rmesa ); - LOCK_HARDWARE( rmesa ); + radeon_firevertices(&rmesa->radeon); + LOCK_HARDWARE( &rmesa->radeon ); /* FIXME: Use window x,y offsets into stipple RAM. */ stipple.mask = rmesa->state.stipple.mask; - drmCommandWrite( rmesa->dri.fd, DRM_RADEON_STIPPLE, + drmCommandWrite( rmesa->radeon.dri.fd, DRM_RADEON_STIPPLE, &stipple, sizeof(drm_radeon_stipple_t) ); - UNLOCK_HARDWARE( rmesa ); + UNLOCK_HARDWARE( &rmesa->radeon ); } static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLboolean flag = (ctx->_TriangleCaps & DD_TRI_UNFILLED) != 0; /* Can't generally do unfilled via tcl, but some good special - * cases work. + * cases work. */ TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_UNFILLED, flag); - if (rmesa->TclFallback) { + if (rmesa->radeon.TclFallback) { radeonChooseRenderState( ctx ); radeonChooseVertexState( ctx ); } @@ -686,7 +592,7 @@ static void radeonPolygonMode( GLcontext *ctx, GLenum face, GLenum mode ) */ static void radeonUpdateSpecular( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); uint32_t p = rmesa->hw.ctx.cmd[CTX_PP_CNTL]; GLuint flag = 0; @@ -711,7 +617,7 @@ static void radeonUpdateSpecular( GLcontext *ctx ) rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_PK_DIFFUSE; rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] |= RADEON_LIGHTING_ENABLE; p |= RADEON_SPECULAR_ENABLE; - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_DIFFUSE_SPECULAR_COMBINE; } else if (ctx->Light.Enabled) { @@ -741,7 +647,7 @@ static void radeonUpdateSpecular( GLcontext *ctx ) RADEON_TCL_COMPUTE_SPECULAR) != 0; } } - + TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_FOGCOORDSPEC, flag); if (NEED_SECONDARY_COLOR(ctx)) { @@ -757,7 +663,7 @@ static void radeonUpdateSpecular( GLcontext *ctx ) /* Update vertex/render formats */ - if (rmesa->TclFallback) { + if (rmesa->radeon.TclFallback) { radeonChooseRenderState( ctx ); radeonChooseVertexState( ctx ); } @@ -769,12 +675,12 @@ static void radeonUpdateSpecular( GLcontext *ctx ) */ -/* Update on colormaterial, material emmissive/ambient, +/* Update on colormaterial, material emmissive/ambient, * lightmodel.globalambient */ static void update_global_ambient( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); float *fcmd = (float *)RADEON_DB_STATE( glt ); /* Need to do more if both emmissive & ambient are PREMULT: @@ -782,23 +688,23 @@ static void update_global_ambient( GLcontext *ctx ) */ if ((rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] & ((3 << RADEON_EMISSIVE_SOURCE_SHIFT) | - (3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0) + (3 << RADEON_AMBIENT_SOURCE_SHIFT))) == 0) { - COPY_3V( &fcmd[GLT_RED], + COPY_3V( &fcmd[GLT_RED], ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_EMISSION]); ACC_SCALE_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient, ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_AMBIENT]); - } + } else { COPY_3V( &fcmd[GLT_RED], ctx->Light.Model.Ambient ); } - + RADEON_DB_STATECHANGE(rmesa, &rmesa->hw.glt); } -/* Update on change to +/* Update on change to * - light[p].colors * - light[p].enabled */ @@ -809,13 +715,13 @@ static void update_light_colors( GLcontext *ctx, GLuint p ) /* fprintf(stderr, "%s\n", __FUNCTION__); */ if (l->Enabled) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); float *fcmd = (float *)RADEON_DB_STATE( lit[p] ); - COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient ); + COPY_4V( &fcmd[LIT_AMBIENT_RED], l->Ambient ); COPY_4V( &fcmd[LIT_DIFFUSE_RED], l->Diffuse ); COPY_4V( &fcmd[LIT_SPECULAR_RED], l->Specular ); - + RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.lit[p] ); } } @@ -829,7 +735,7 @@ static void check_twoside_fallback( GLcontext *ctx ) if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { if (ctx->Light.ColorMaterialEnabled && - (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != + (ctx->Light.ColorMaterialBitmask & BACK_MATERIAL_BITS) != ((ctx->Light.ColorMaterialBitmask & FRONT_MATERIAL_BITS)<<1)) fallback = GL_TRUE; else { @@ -837,7 +743,7 @@ static void check_twoside_fallback( GLcontext *ctx ) if (memcmp( ctx->Light.Material.Attrib[i], ctx->Light.Material.Attrib[i+1], sizeof(GLfloat)*4) != 0) { - fallback = GL_TRUE; + fallback = GL_TRUE; break; } } @@ -849,14 +755,14 @@ static void check_twoside_fallback( GLcontext *ctx ) static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint light_model_ctl1 = rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]; light_model_ctl1 &= ~((3 << RADEON_EMISSIVE_SOURCE_SHIFT) | (3 << RADEON_AMBIENT_SOURCE_SHIFT) | (3 << RADEON_DIFFUSE_SOURCE_SHIFT) | - (3 << RADEON_SPECULAR_SOURCE_SHIFT)); - + (3 << RADEON_SPECULAR_SOURCE_SHIFT)); + if (ctx->Light.ColorMaterialEnabled) { GLuint mask = ctx->Light.ColorMaterialBitmask; @@ -877,7 +783,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT << RADEON_AMBIENT_SOURCE_SHIFT); } - + if (mask & MAT_BIT_FRONT_DIFFUSE) { light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << RADEON_DIFFUSE_SOURCE_SHIFT); @@ -886,7 +792,7 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) light_model_ctl1 |= (RADEON_LM_SOURCE_STATE_MULT << RADEON_DIFFUSE_SOURCE_SHIFT); } - + if (mask & MAT_BIT_FRONT_SPECULAR) { light_model_ctl1 |= (RADEON_LM_SOURCE_VERTEX_DIFFUSE << RADEON_SPECULAR_SOURCE_SHIFT); @@ -904,27 +810,27 @@ static void radeonColorMaterial( GLcontext *ctx, GLenum face, GLenum mode ) (RADEON_LM_SOURCE_STATE_MULT << RADEON_DIFFUSE_SOURCE_SHIFT) | (RADEON_LM_SOURCE_STATE_MULT << RADEON_SPECULAR_SOURCE_SHIFT); } - + if (light_model_ctl1 != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) { RADEON_STATECHANGE( rmesa, tcl ); - rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl1; + rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = light_model_ctl1; } } void radeonUpdateMaterial( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLfloat (*mat)[4] = ctx->Light.Material.Attrib; GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( mtl ); GLuint mask = ~0; - + if (ctx->Light.ColorMaterialEnabled) mask &= ~ctx->Light.ColorMaterialBitmask; if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "%s\n", __FUNCTION__); - + if (mask & MAT_BIT_FRONT_EMISSION) { fcmd[MTL_EMMISSIVE_RED] = mat[MAT_ATTRIB_FRONT_EMISSION][0]; fcmd[MTL_EMMISSIVE_GREEN] = mat[MAT_ATTRIB_FRONT_EMISSION][1]; @@ -974,11 +880,11 @@ void radeonUpdateMaterial( GLcontext *ctx ) * * which are calculated in light.c and are correct for the current * lighting space (model or eye), hence dependencies on _NEW_MODELVIEW - * and _MESA_NEW_NEED_EYE_COORDS. + * and _MESA_NEW_NEED_EYE_COORDS. */ static void update_light( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); /* Have to check these, or have an automatic shortcircuit mechanism * to remove noop statechanges. (Or just do a better job on the @@ -991,12 +897,12 @@ static void update_light( GLcontext *ctx ) tmp &= ~RADEON_LIGHT_IN_MODELSPACE; else tmp |= RADEON_LIGHT_IN_MODELSPACE; - + /* Leave this test disabled: (unexplained q3 lockup) (even with new packets) */ - if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) + if (tmp != rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]) { RADEON_STATECHANGE( rmesa, tcl ); rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] = tmp; @@ -1020,10 +926,10 @@ static void update_light( GLcontext *ctx ) if (ctx->Light.Light[p].Enabled) { struct gl_light *l = &ctx->Light.Light[p]; GLfloat *fcmd = (GLfloat *)RADEON_DB_STATE( lit[p] ); - + if (l->EyePosition[3] == 0.0) { - COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); - COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); + COPY_3FV( &fcmd[LIT_POSITION_X], l->_VP_inf_norm ); + COPY_3FV( &fcmd[LIT_DIRECTION_X], l->_h_inf_norm ); fcmd[LIT_POSITION_W] = 0; fcmd[LIT_DIRECTION_W] = 0; } else { @@ -1043,30 +949,30 @@ static void update_light( GLcontext *ctx ) static void radeonLightfv( GLcontext *ctx, GLenum light, GLenum pname, const GLfloat *params ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLint p = light - GL_LIGHT0; struct gl_light *l = &ctx->Light.Light[p]; GLfloat *fcmd = (GLfloat *)rmesa->hw.lit[p].cmd; - + switch (pname) { - case GL_AMBIENT: + case GL_AMBIENT: case GL_DIFFUSE: case GL_SPECULAR: update_light_colors( ctx, p ); break; - case GL_SPOT_DIRECTION: - /* picked up in update_light */ + case GL_SPOT_DIRECTION: + /* picked up in update_light */ break; case GL_POSITION: { - /* positions picked up in update_light, but can do flag here */ + /* positions picked up in update_light, but can do flag here */ GLuint flag; GLuint idx = TCL_PER_LIGHT_CTL_0 + p/2; /* FIXME: Set RANGE_ATTEN only when needed */ - if (p&1) + if (p&1) flag = RADEON_LIGHT_1_IS_LOCAL; else flag = RADEON_LIGHT_0_IS_LOCAL; @@ -1158,16 +1064,16 @@ static void radeonLightfv( GLcontext *ctx, GLenum light, } } - + static void radeonLightModelfv( GLcontext *ctx, GLenum pname, const GLfloat *param ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); switch (pname) { - case GL_LIGHT_MODEL_AMBIENT: + case GL_LIGHT_MODEL_AMBIENT: update_global_ambient( ctx ); break; @@ -1188,7 +1094,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname, check_twoside_fallback( ctx ); - if (rmesa->TclFallback) { + if (rmesa->radeon.TclFallback) { radeonChooseRenderState( ctx ); radeonChooseVertexState( ctx ); } @@ -1205,7 +1111,7 @@ static void radeonLightModelfv( GLcontext *ctx, GLenum pname, static void radeonShadeModel( GLcontext *ctx, GLenum mode ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint s = rmesa->hw.set.cmd[SET_SE_CNTL]; s &= ~(RADEON_DIFFUSE_SHADE_MASK | @@ -1244,7 +1150,7 @@ static void radeonShadeModel( GLcontext *ctx, GLenum mode ) static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) { GLint p = (GLint) plane - (GLint) GL_CLIP_PLANE0; - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLint *ip = (GLint *)ctx->Transform._ClipUserPlane[p]; RADEON_STATECHANGE( rmesa, ucp[p] ); @@ -1256,7 +1162,7 @@ static void radeonClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) static void radeonUpdateClipPlanes( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint p; for (p = 0; p < ctx->Const.MaxClipPlanes; p++) { @@ -1281,7 +1187,7 @@ static void radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func, GLint ref, GLuint mask ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint refmask = (((ctx->Stencil.Ref[0] & 0xff) << RADEON_STENCIL_REF_SHIFT) | ((ctx->Stencil.ValueMask[0] & 0xff) << RADEON_STENCIL_MASK_SHIFT)); @@ -1325,7 +1231,7 @@ radeonStencilFuncSeparate( GLcontext *ctx, GLenum face, GLenum func, static void radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); RADEON_STATECHANGE( rmesa, msk ); rmesa->hw.msk.cmd[MSK_RB3D_STENCILREFMASK] &= ~RADEON_STENCIL_WRITE_MASK; @@ -1336,20 +1242,20 @@ radeonStencilMaskSeparate( GLcontext *ctx, GLenum face, GLuint mask ) static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail, GLenum zfail, GLenum zpass ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); /* radeon 7200 have stencil bug, DEC and INC_WRAP will actually both do DEC_WRAP, and DEC_WRAP (and INVERT) will do INVERT. No way to get correct INC_WRAP and DEC, but DEC_WRAP can be fixed by using DEC and INC_WRAP at least use INC. */ - + GLuint tempRADEON_STENCIL_FAIL_DEC_WRAP; GLuint tempRADEON_STENCIL_FAIL_INC_WRAP; GLuint tempRADEON_STENCIL_ZFAIL_DEC_WRAP; GLuint tempRADEON_STENCIL_ZFAIL_INC_WRAP; GLuint tempRADEON_STENCIL_ZPASS_DEC_WRAP; GLuint tempRADEON_STENCIL_ZPASS_INC_WRAP; - - if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) { + + if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_BROKEN_STENCIL) { tempRADEON_STENCIL_FAIL_DEC_WRAP = RADEON_STENCIL_FAIL_DEC; tempRADEON_STENCIL_FAIL_INC_WRAP = RADEON_STENCIL_FAIL_INC; tempRADEON_STENCIL_ZFAIL_DEC_WRAP = RADEON_STENCIL_ZFAIL_DEC; @@ -1365,7 +1271,7 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail, tempRADEON_STENCIL_ZPASS_DEC_WRAP = RADEON_STENCIL_ZPASS_DEC_WRAP; tempRADEON_STENCIL_ZPASS_INC_WRAP = RADEON_STENCIL_ZPASS_INC_WRAP; } - + RADEON_STATECHANGE( rmesa, ctx ); rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] &= ~(RADEON_STENCIL_FAIL_MASK | RADEON_STENCIL_ZFAIL_MASK | @@ -1455,9 +1361,9 @@ static void radeonStencilOpSeparate( GLcontext *ctx, GLenum face, GLenum fail, static void radeonClearStencil( GLcontext *ctx, GLint s ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); - rmesa->state.stencil.clear = + rmesa->radeon.state.stencil.clear = ((GLuint) (ctx->Stencil.Clear & 0xff) | (0xff << RADEON_STENCIL_MASK_SHIFT) | ((ctx->Stencil.WriteMask[0] & 0xff) << RADEON_STENCIL_WRITEMASK_SHIFT)); @@ -1481,20 +1387,30 @@ static void radeonClearStencil( GLcontext *ctx, GLint s ) */ void radeonUpdateWindow( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; - GLfloat xoffset = (GLfloat)dPriv->x; - GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); + GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0; + GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0; const GLfloat *v = ctx->Viewport._WindowMap.m; + const GLboolean render_to_fbo = (ctx->DrawBuffer ? (ctx->DrawBuffer->Name != 0) : 0); + const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + GLfloat y_scale, y_bias; + + if (render_to_fbo) { + y_scale = 1.0; + y_bias = 0; + } else { + y_scale = -1.0; + y_bias = yoffset; + } float_ui32_type sx = { v[MAT_SX] }; float_ui32_type tx = { v[MAT_TX] + xoffset + SUBPIXEL_X }; - float_ui32_type sy = { - v[MAT_SY] }; - float_ui32_type ty = { (- v[MAT_TY]) + yoffset + SUBPIXEL_Y }; - float_ui32_type sz = { v[MAT_SZ] * rmesa->state.depth.scale }; - float_ui32_type tz = { v[MAT_TZ] * rmesa->state.depth.scale }; + float_ui32_type sy = { v[MAT_SY] * y_scale }; + float_ui32_type ty = { (v[MAT_TY] * y_scale) + y_bias + SUBPIXEL_Y }; + float_ui32_type sz = { v[MAT_SZ] * depthScale }; + float_ui32_type tz = { v[MAT_TZ] * depthScale }; - RADEON_FIREVERTICES( rmesa ); RADEON_STATECHANGE( rmesa, vpt ); rmesa->hw.vpt.cmd[VPT_SE_VPORT_XSCALE] = sx.ui32; @@ -1514,6 +1430,8 @@ static void radeonViewport( GLcontext *ctx, GLint x, GLint y, * values, or keep the originals hanging around. */ radeonUpdateWindow( ctx ); + + radeon_viewport(ctx, x, y, width, height); } static void radeonDepthRange( GLcontext *ctx, GLclampd nearval, @@ -1524,8 +1442,8 @@ static void radeonDepthRange( GLcontext *ctx, GLclampd nearval, void radeonUpdateViewportOffset( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - __DRIdrawablePrivate *dPriv = rmesa->dri.drawable; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + __DRIdrawablePrivate *dPriv = radeon_get_drawable(&rmesa->radeon); GLfloat xoffset = (GLfloat)dPriv->x; GLfloat yoffset = (GLfloat)dPriv->y + dPriv->h; const GLfloat *v = ctx->Viewport._WindowMap.m; @@ -1555,8 +1473,8 @@ void radeonUpdateViewportOffset( GLcontext *ctx ) RADEON_STIPPLE_Y_OFFSET_MASK); /* add magic offsets, then invert */ - stx = 31 - ((rmesa->dri.drawable->x - 1) & RADEON_STIPPLE_COORD_MASK); - sty = 31 - ((rmesa->dri.drawable->y + rmesa->dri.drawable->h - 1) + stx = 31 - ((dPriv->x - 1) & RADEON_STIPPLE_COORD_MASK); + sty = 31 - ((dPriv->y + dPriv->h - 1) & RADEON_STIPPLE_COORD_MASK); m |= ((stx << RADEON_STIPPLE_X_OFFSET_SHIFT) | @@ -1580,20 +1498,20 @@ void radeonUpdateViewportOffset( GLcontext *ctx ) static void radeonClearColor( GLcontext *ctx, const GLfloat color[4] ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLubyte c[4]; CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]); CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]); CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]); CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]); - rmesa->state.color.clear = radeonPackColor( rmesa->radeonScreen->cpp, + rmesa->radeon.state.color.clear = radeonPackColor( rmesa->radeon.radeonScreen->cpp, c[0], c[1], c[2], c[3] ); } static void radeonRenderMode( GLcontext *ctx, GLenum mode ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); FALLBACK( rmesa, RADEON_FALLBACK_RENDER_MODE, (mode != GL_RENDER) ); } @@ -1619,7 +1537,7 @@ static GLuint radeon_rop_tab[] = { static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint rop = (GLuint)opcode - GL_CLEAR; ASSERT( rop < 16 ); @@ -1628,105 +1546,13 @@ static void radeonLogicOpCode( GLcontext *ctx, GLenum opcode ) rmesa->hw.msk.cmd[MSK_RB3D_ROPCNTL] = radeon_rop_tab[rop]; } - -/** - * Set up the cliprects for either front or back-buffer drawing. - */ -void radeonSetCliprects( radeonContextPtr rmesa ) -{ - __DRIdrawablePrivate *const drawable = rmesa->dri.drawable; - __DRIdrawablePrivate *const readable = rmesa->dri.readable; - GLframebuffer *const draw_fb = (GLframebuffer*) drawable->driverPrivate; - GLframebuffer *const read_fb = (GLframebuffer*) readable->driverPrivate; - - if (draw_fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { - /* Can't ignore 2d windows if we are page flipping. - */ - if ( drawable->numBackClipRects == 0 || rmesa->doPageFlip ) { - rmesa->numClipRects = drawable->numClipRects; - rmesa->pClipRects = drawable->pClipRects; - } - else { - rmesa->numClipRects = drawable->numBackClipRects; - rmesa->pClipRects = drawable->pBackClipRects; - } - } - else { - /* front buffer (or none, or multiple buffers */ - rmesa->numClipRects = drawable->numClipRects; - rmesa->pClipRects = drawable->pClipRects; - } - - if ((draw_fb->Width != drawable->w) || (draw_fb->Height != drawable->h)) { - _mesa_resize_framebuffer(rmesa->glCtx, draw_fb, - drawable->w, drawable->h); - draw_fb->Initialized = GL_TRUE; - } - - if (drawable != readable) { - if ((read_fb->Width != readable->w) || (read_fb->Height != readable->h)) { - _mesa_resize_framebuffer(rmesa->glCtx, read_fb, - readable->w, readable->h); - read_fb->Initialized = GL_TRUE; - } - } - - if (rmesa->state.scissor.enabled) - radeonRecalcScissorRects( rmesa ); - - rmesa->lastStamp = drawable->lastStamp; -} - - -/** - * Called via glDrawBuffer. - */ -static void radeonDrawBuffer( GLcontext *ctx, GLenum mode ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - - if (RADEON_DEBUG & DEBUG_DRI) - fprintf(stderr, "%s %s\n", __FUNCTION__, - _mesa_lookup_enum_by_nr( mode )); - - RADEON_FIREVERTICES(rmesa); /* don't pipeline cliprect changes */ - - if (ctx->DrawBuffer->_NumColorDrawBuffers != 1) { - /* 0 (GL_NONE) buffers or multiple color drawing buffers */ - FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE ); - return; - } - - switch ( ctx->DrawBuffer->_ColorDrawBufferIndexes[0] ) { - case BUFFER_FRONT_LEFT: - case BUFFER_BACK_LEFT: - FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_FALSE ); - break; - default: - FALLBACK( rmesa, RADEON_FALLBACK_DRAW_BUFFER, GL_TRUE ); - return; - } - - radeonSetCliprects( rmesa ); - - /* We'll set the drawing engine's offset/pitch parameters later - * when we update other state. - */ -} - -static void radeonReadBuffer( GLcontext *ctx, GLenum mode ) -{ - /* nothing, until we implement h/w glRead/CopyPixels or CopyTexImage */ -} - - /* ============================================================= * State enable/disable */ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint p, flag; if ( RADEON_DEBUG & DEBUG_STATE ) @@ -1787,7 +1613,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_CLIP_PLANE2: case GL_CLIP_PLANE3: case GL_CLIP_PLANE4: - case GL_CLIP_PLANE5: + case GL_CLIP_PLANE5: p = cap-GL_CLIP_PLANE0; RADEON_STATECHANGE( rmesa, tcl ); if (state) { @@ -1821,10 +1647,10 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) RADEON_STATECHANGE(rmesa, ctx ); if ( state ) { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE; - rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->state.color.roundEnable; + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~rmesa->radeon.state.color.roundEnable; } else { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_DITHER_ENABLE; - rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable; + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable; } break; @@ -1852,13 +1678,13 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_LIGHT7: RADEON_STATECHANGE(rmesa, tcl); p = cap - GL_LIGHT0; - if (p&1) + if (p&1) flag = (RADEON_LIGHT_1_ENABLE | - RADEON_LIGHT_1_ENABLE_AMBIENT | + RADEON_LIGHT_1_ENABLE_AMBIENT | RADEON_LIGHT_1_ENABLE_SPECULAR); else flag = (RADEON_LIGHT_0_ENABLE | - RADEON_LIGHT_0_ENABLE_AMBIENT | + RADEON_LIGHT_0_ENABLE_AMBIENT | RADEON_LIGHT_0_ENABLE_SPECULAR); if (state) @@ -1866,7 +1692,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) else rmesa->hw.tcl.cmd[p/2 + TCL_PER_LIGHT_CTL_0] &= ~flag; - /* + /* */ update_light_colors( ctx, p ); break; @@ -1904,7 +1730,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE; } break; - + case GL_NORMALIZE: RADEON_STATECHANGE( rmesa, tcl ); if ( state ) { @@ -1971,21 +1797,30 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) } case GL_SCISSOR_TEST: - RADEON_FIREVERTICES( rmesa ); - rmesa->state.scissor.enabled = state; + radeon_firevertices(&rmesa->radeon); + rmesa->radeon.state.scissor.enabled = state; radeonUpdateScissor( ctx ); break; case GL_STENCIL_TEST: - if ( rmesa->state.stencil.hwBuffer ) { - RADEON_STATECHANGE( rmesa, ctx ); - if ( state ) { - rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_STENCIL_ENABLE; + { + GLboolean hw_stencil = GL_FALSE; + if (ctx->DrawBuffer) { + struct radeon_renderbuffer *rrbStencil + = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL); + hw_stencil = (rrbStencil && rrbStencil->bo); + } + + if (hw_stencil) { + RADEON_STATECHANGE( rmesa, ctx ); + if ( state ) { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_STENCIL_ENABLE; + } else { + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE; + } } else { - rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_STENCIL_ENABLE; + FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state ); } - } else { - FALLBACK( rmesa, RADEON_FALLBACK_STENCIL, state ); } break; @@ -1995,7 +1830,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) case GL_TEXTURE_GEN_T: /* Picked up in radeonUpdateTextureState. */ - rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; + rmesa->recheck_texgen[ctx->Texture.CurrentUnit] = GL_TRUE; break; case GL_COLOR_SUM_EXT: @@ -2010,7 +1845,7 @@ static void radeonEnable( GLcontext *ctx, GLenum cap, GLboolean state ) static void radeonLightingSpaceChange( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLboolean tmp; RADEON_STATECHANGE( rmesa, tcl ); @@ -2029,7 +1864,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx ) rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL] &= ~RADEON_RESCALE_NORMALS; } - if (RADEON_DEBUG & DEBUG_STATE) + if (RADEON_DEBUG & DEBUG_STATE) fprintf(stderr, "%s %d AFTER %x\n", __FUNCTION__, ctx->_NeedEyeCoords, rmesa->hw.tcl.cmd[TCL_LIGHT_MODEL_CTL]); } @@ -2039,7 +1874,7 @@ static void radeonLightingSpaceChange( GLcontext *ctx ) */ -void radeonUploadTexMatrix( radeonContextPtr rmesa, +void radeonUploadTexMatrix( r100ContextPtr rmesa, int unit, GLboolean swapcols ) { /* Here's how this works: on r100, only 3 tex coords can be submitted, so the @@ -2065,7 +1900,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa, int idx = TEXMAT_0 + unit; float *dest = ((float *)RADEON_DB_STATE( mat[idx] )) + MAT_ELT_0; int i; - struct gl_texture_unit tUnit = rmesa->glCtx->Texture.Unit[unit]; + struct gl_texture_unit tUnit = rmesa->radeon.glCtx->Texture.Unit[unit]; GLfloat *src = rmesa->tmpmat[unit].m; rmesa->TexMatColSwap &= ~(1 << unit); @@ -2119,7 +1954,7 @@ void radeonUploadTexMatrix( radeonContextPtr rmesa, } -static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx ) +static void upload_matrix( r100ContextPtr rmesa, GLfloat *src, int idx ) { float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0; int i; @@ -2135,7 +1970,7 @@ static void upload_matrix( radeonContextPtr rmesa, GLfloat *src, int idx ) RADEON_DB_STATECHANGE( rmesa, &rmesa->hw.mat[idx] ); } -static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx ) +static void upload_matrix_t( r100ContextPtr rmesa, GLfloat *src, int idx ) { float *dest = ((float *)RADEON_DB_STATE( mat[idx] ))+MAT_ELT_0; memcpy(dest, src, 16*sizeof(float)); @@ -2145,7 +1980,7 @@ static void upload_matrix_t( radeonContextPtr rmesa, GLfloat *src, int idx ) static void update_texturematrix( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + r100ContextPtr rmesa = R100_CONTEXT( ctx ); GLuint tpc = rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL]; GLuint vs = rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL]; int unit; @@ -2209,64 +2044,75 @@ static void update_texturematrix( GLcontext *ctx ) } } - -/** - * Tell the card where to render (offset, pitch). - * Effected by glDrawBuffer, etc - */ -void -radeonUpdateDrawBuffer(GLcontext *ctx) +static GLboolean r100ValidateBuffers(GLcontext *ctx) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - struct gl_framebuffer *fb = ctx->DrawBuffer; - driRenderbuffer *drb; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + struct radeon_renderbuffer *rrb; + int i; - if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { - /* draw to front */ - drb = (driRenderbuffer *) fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer; - } - else if (fb->_ColorDrawBufferIndexes[0] == BUFFER_BACK_LEFT) { - /* draw to back */ - drb = (driRenderbuffer *) fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer; + radeon_validate_reset_bos(&rmesa->radeon); + + rrb = radeon_get_colorbuffer(&rmesa->radeon); + /* color buffer */ + if (rrb && rrb->bo) { + radeon_validate_bo(&rmesa->radeon, rrb->bo, + 0, RADEON_GEM_DOMAIN_VRAM); } - else { - /* drawing to multiple buffers, or none */ - return; + + /* depth buffer */ + rrb = radeon_get_depthbuffer(&rmesa->radeon); + /* color buffer */ + if (rrb && rrb->bo) { + radeon_validate_bo(&rmesa->radeon, rrb->bo, + 0, RADEON_GEM_DOMAIN_VRAM); } - assert(drb); - assert(drb->flippedPitch); + for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) { + radeonTexObj *t; - RADEON_STATECHANGE( rmesa, ctx ); + if (!ctx->Texture.Unit[i]._ReallyEnabled) + continue; - /* Note: we used the (possibly) page-flipped values */ - rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] - = ((drb->flippedOffset + rmesa->radeonScreen->fbLocation) - & RADEON_COLOROFFSET_MASK); - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = drb->flippedPitch; - if (rmesa->sarea->tiling_enabled) { - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE; + t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); + if (t->image_override && t->bo) + radeon_validate_bo(&rmesa->radeon, t->bo, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); + else if (t->mt->bo) + radeon_validate_bo(&rmesa->radeon, t->mt->bo, + RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); } -} + if (rmesa->radeon.dma.current) + radeon_validate_bo(&rmesa->radeon, rmesa->radeon.dma.current, + RADEON_GEM_DOMAIN_GTT, 0); + + return radeon_revalidate_bos(ctx); +} -void radeonValidateState( GLcontext *ctx ) +GLboolean radeonValidateState( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - GLuint new_state = rmesa->NewGLState; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint new_state = rmesa->radeon.NewGLState; - if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) { - radeonUpdateDrawBuffer(ctx); + if (new_state & _NEW_BUFFERS) { + _mesa_update_framebuffer(ctx); + /* this updates the DrawBuffer's Width/Height if it's a FBO */ + _mesa_update_draw_buffer_bounds(ctx); + RADEON_STATECHANGE(rmesa, ctx); } if (new_state & _NEW_TEXTURE) { radeonUpdateTextureState( ctx ); - new_state |= rmesa->NewGLState; /* may add TEXTURE_MATRIX */ + new_state |= rmesa->radeon.NewGLState; /* may add TEXTURE_MATRIX */ } + /* we need to do a space check here */ + if (!r100ValidateBuffers(ctx)) + return GL_FALSE; + /* Need an event driven matrix update? */ - if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) + if (new_state & (_NEW_MODELVIEW|_NEW_PROJECTION)) upload_matrix( rmesa, ctx->_ModelProjectMatrix.m, MODEL_PROJ ); /* Need these for lighting (shouldn't upload otherwise) @@ -2290,12 +2136,14 @@ void radeonValidateState( GLcontext *ctx ) /* emit all active clip planes if projection matrix changes. */ if (new_state & (_NEW_PROJECTION)) { - if (ctx->Transform.ClipPlanesEnabled) + if (ctx->Transform.ClipPlanesEnabled) radeonUpdateClipPlanes( ctx ); } - rmesa->NewGLState = 0; + rmesa->radeon.NewGLState = 0; + + return GL_TRUE; } @@ -2306,7 +2154,7 @@ static void radeonInvalidateState( GLcontext *ctx, GLuint new_state ) _vbo_InvalidateState( ctx, new_state ); _tnl_InvalidateState( ctx, new_state ); _ae_invalidate_state( ctx, new_state ); - RADEON_CONTEXT(ctx)->NewGLState |= new_state; + R100_CONTEXT(ctx)->radeon.NewGLState |= new_state; } @@ -2317,8 +2165,8 @@ static GLboolean check_material( GLcontext *ctx ) TNLcontext *tnl = TNL_CONTEXT(ctx); GLint i; - for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT; - i < _TNL_ATTRIB_MAT_BACK_INDEXES; + for (i = _TNL_ATTRIB_MAT_FRONT_AMBIENT; + i < _TNL_ATTRIB_MAT_BACK_INDEXES; i++) if (tnl->vb.AttribPtr[i] && tnl->vb.AttribPtr[i]->stride) @@ -2326,20 +2174,21 @@ static GLboolean check_material( GLcontext *ctx ) return GL_FALSE; } - + static void radeonWrapRunPipeline( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLboolean has_material; if (0) - fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->NewGLState); + fprintf(stderr, "%s, newstate: %x\n", __FUNCTION__, rmesa->radeon.NewGLState); /* Validate state: */ - if (rmesa->NewGLState) - radeonValidateState( ctx ); + if (rmesa->radeon.NewGLState) + if (!radeonValidateState( ctx )) + FALLBACK(rmesa, RADEON_FALLBACK_TEXTURE, GL_TRUE); has_material = (ctx->Light.Enabled && check_material( ctx )); @@ -2348,7 +2197,7 @@ static void radeonWrapRunPipeline( GLcontext *ctx ) } /* Run the pipeline. - */ + */ _tnl_run_pipeline( ctx ); if (has_material) { diff --git a/src/mesa/drivers/dri/radeon/radeon_state.h b/src/mesa/drivers/dri/radeon/radeon_state.h index 2171879f75..a7c8eef32a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.h +++ b/src/mesa/drivers/dri/radeon/radeon_state.h @@ -39,30 +39,25 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_context.h" -extern void radeonInitState( radeonContextPtr rmesa ); +extern void radeonInitState( r100ContextPtr rmesa ); extern void radeonInitStateFuncs( GLcontext *ctx ); extern void radeonUpdateMaterial( GLcontext *ctx ); -extern void radeonSetCliprects( radeonContextPtr rmesa ); -extern void radeonRecalcScissorRects( radeonContextPtr rmesa ); extern void radeonUpdateViewportOffset( GLcontext *ctx ); extern void radeonUpdateWindow( GLcontext *ctx ); extern void radeonUpdateDrawBuffer( GLcontext *ctx ); -extern void radeonUploadTexMatrix( radeonContextPtr rmesa, +extern void radeonUploadTexMatrix( r100ContextPtr rmesa, int unit, GLboolean swapcols ); -extern void radeonValidateState( GLcontext *ctx ); - -extern void radeonPrintDirty( radeonContextPtr rmesa, - const char *msg ); +extern GLboolean radeonValidateState( GLcontext *ctx ); extern void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ); #define FALLBACK( rmesa, bit, mode ) do { \ if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \ __FUNCTION__, bit, mode ); \ - radeonFallback( rmesa->glCtx, bit, mode ); \ + radeonFallback( rmesa->radeon.glCtx, bit, mode ); \ } while (0) diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c index 57dc380050..c517487098 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state_init.c +++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c @@ -38,39 +38,140 @@ #include "swrast_setup/swrast_setup.h" #include "radeon_context.h" +#include "radeon_mipmap_tree.h" #include "radeon_ioctl.h" #include "radeon_state.h" #include "radeon_tcl.h" #include "radeon_tex.h" #include "radeon_swtcl.h" +#include "../r200/r200_reg.h" + #include "xmlpool.h" +/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in + * 1.3 cmdbuffers allow all previous state to be updated as well as + * the tcl scalar and vector areas. + */ +static struct { + int start; + int len; + const char *name; +} packet[RADEON_MAX_STATE_PACKETS] = { + {RADEON_PP_MISC, 7, "RADEON_PP_MISC"}, + {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"}, + {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"}, + {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"}, + {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"}, + {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"}, + {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"}, + {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"}, + {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"}, + {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"}, + {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"}, + {RADEON_RE_MISC, 1, "RADEON_RE_MISC"}, + {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"}, + {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"}, + {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"}, + {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"}, + {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"}, + {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"}, + {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"}, + {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"}, + {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17, + "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"}, + {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"}, + {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"}, + {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"}, + {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"}, + {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"}, + {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"}, + {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"}, + {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"}, + {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"}, + {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"}, + {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"}, + {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"}, + {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"}, + {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"}, + {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"}, + {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"}, + {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"}, + {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"}, + {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"}, + {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"}, + {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"}, + {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"}, + {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"}, + {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"}, + {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"}, + {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"}, + {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"}, + {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"}, + {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, + "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"}, + {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"}, + {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"}, + {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"}, + {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"}, + {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"}, + {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"}, + {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"}, + {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"}, + {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"}, + {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"}, + {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, + "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"}, + {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */ + {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */ + {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"}, + {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"}, + {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"}, + {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"}, + {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"}, + {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"}, + {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"}, + {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"}, + {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"}, + {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"}, + {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"}, + {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"}, + {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"}, + {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"}, + {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"}, + {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"}, + {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"}, + {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"}, + {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"}, + {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"}, + {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"}, + {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"}, + {R200_PP_TXCBLEND_8, 32, "R200_PP_AFS_0"}, /* 85 */ + {R200_PP_TXCBLEND_0, 32, "R200_PP_AFS_1"}, + {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"}, + {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"}, + {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"}, + {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"}, + {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"}, + {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"}, + {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"}, + {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"}, +}; + /* ============================================================= * State initialization */ - -void radeonPrintDirty( radeonContextPtr rmesa, const char *msg ) +static int cmdpkt( r100ContextPtr rmesa, int id ) { - struct radeon_state_atom *l; - - fprintf(stderr, msg); - fprintf(stderr, ": "); + drm_radeon_cmd_header_t h; - foreach(l, &rmesa->hw.atomlist) { - if (l->dirty || rmesa->hw.all_dirty) - fprintf(stderr, "%s, ", l->name); + if (rmesa->radeon.radeonScreen->kernel_mm) { + return CP_PACKET0(packet[id].start, packet[id].len - 1); + } else { + h.i = 0; + h.packet.cmd_type = RADEON_CMD_PACKET; + h.packet.packet_id = id; } - - fprintf(stderr, "\n"); -} - -static int cmdpkt( int id ) -{ - drm_radeon_cmd_header_t h; - h.i = 0; - h.packet.cmd_type = RADEON_CMD_PACKET; - h.packet.packet_id = id; return h.i; } @@ -96,17 +197,17 @@ static int cmdscl( int offset, int stride, int count ) return h.i; } -#define CHECK( NM, FLAG ) \ -static GLboolean check_##NM( GLcontext *ctx ) \ -{ \ - return FLAG; \ +#define CHECK( NM, FLAG ) \ +static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ +{ \ + return FLAG ? atom->cmd_size : 0; \ } #define TCL_CHECK( NM, FLAG ) \ -static GLboolean check_##NM( GLcontext *ctx ) \ +static int check_##NM( GLcontext *ctx, struct radeon_state_atom *atom ) \ { \ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \ - return !rmesa->TclFallback && (FLAG); \ + r100ContextPtr rmesa = R100_CONTEXT(ctx); \ + return (!rmesa->radeon.TclFallback && (FLAG)) ? atom->cmd_size : 0; \ } @@ -146,81 +247,420 @@ CHECK( txr0, (ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT)) CHECK( txr1, (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT)) CHECK( txr2, (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT)) +#define OUT_VEC(hdr, data) do { \ + drm_radeon_cmd_header_t h; \ + h.i = hdr; \ + OUT_BATCH(CP_PACKET0(RADEON_SE_TCL_STATE_FLUSH, 0)); \ + OUT_BATCH(0); \ + OUT_BATCH(CP_PACKET0(R200_SE_TCL_VECTOR_INDX_REG, 0)); \ + OUT_BATCH(h.vectors.offset | (h.vectors.stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); \ + OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_VECTOR_DATA_REG, h.vectors.count - 1)); \ + OUT_BATCH_TABLE((data), h.vectors.count); \ + } while(0) + +#define OUT_SCL(hdr, data) do { \ + drm_radeon_cmd_header_t h; \ + h.i = hdr; \ + OUT_BATCH(CP_PACKET0(R200_SE_TCL_SCALAR_INDX_REG, 0)); \ + OUT_BATCH((h.scalars.offset) | (h.scalars.stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); \ + OUT_BATCH(CP_PACKET0_ONE(R200_SE_TCL_SCALAR_DATA_REG, h.scalars.count - 1)); \ + OUT_BATCH_TABLE((data), h.scalars.count); \ + } while(0) + +static void scl_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + uint32_t dwords = atom->cmd_size; + + dwords += 2; + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_SCL(atom->cmd[0], atom->cmd+1); + END_BATCH(); +} -/* Initialize the context's hardware state. - */ -void radeonInitState( radeonContextPtr rmesa ) +static void vec_emit(GLcontext *ctx, struct radeon_state_atom *atom) { - GLcontext *ctx = rmesa->glCtx; - GLuint color_fmt, depth_fmt, i; - GLint drawPitch, drawOffset; + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + uint32_t dwords = atom->cmd_size; + + dwords += 4; + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_VEC(atom->cmd[0], atom->cmd+1); + END_BATCH(); +} - switch ( rmesa->radeonScreen->cpp ) { - case 2: - color_fmt = RADEON_COLOR_FORMAT_RGB565; - break; - case 4: - color_fmt = RADEON_COLOR_FORMAT_ARGB8888; - break; - default: - fprintf( stderr, "Error: Unsupported pixel depth... exiting\n" ); - exit( -1 ); + +static void lit_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + uint32_t dwords = atom->cmd_size; + + dwords += 6; + BEGIN_BATCH_NO_AUTOSTATE(dwords); + OUT_VEC(atom->cmd[LIT_CMD_0], atom->cmd+1); + OUT_SCL(atom->cmd[LIT_CMD_1], atom->cmd+LIT_CMD_1+1); + END_BATCH(); +} + +static void ctx_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + struct radeon_renderbuffer *rrb; + uint32_t cbpitch; + uint32_t zbpitch, depth_fmt; + uint32_t dwords = atom->cmd_size; + + /* output the first 7 bytes of context */ + BEGIN_BATCH_NO_AUTOSTATE(dwords + 4); + OUT_BATCH_TABLE(atom->cmd, 5); + + rrb = radeon_get_depthbuffer(&r100->radeon); + if (!rrb) { + OUT_BATCH(0); + OUT_BATCH(0); + } else { + zbpitch = (rrb->pitch / rrb->cpp); + if (r100->using_hyperz) + zbpitch |= RADEON_DEPTH_HYPERZ; + + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH(zbpitch); + if (rrb->cpp == 4) + depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; + else + depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; + atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK; + atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; + } + + OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]); + OUT_BATCH(atom->cmd[CTX_CMD_1]); + OUT_BATCH(atom->cmd[CTX_PP_CNTL]); + + rrb = radeon_get_colorbuffer(&r100->radeon); + if (!rrb || !rrb->bo) { + OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); + OUT_BATCH(atom->cmd[CTX_RB3D_COLOROFFSET]); + } else { + atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); + if (rrb->cpp == 4) + atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; + else + atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; + + OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + } + + OUT_BATCH(atom->cmd[CTX_CMD_2]); + + if (!rrb || !rrb->bo) { + OUT_BATCH(atom->cmd[CTX_RB3D_COLORPITCH]); + } else { + cbpitch = (rrb->pitch / rrb->cpp); + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + cbpitch |= RADEON_COLOR_TILE_ENABLE; + OUT_BATCH(cbpitch); + } + + END_BATCH(); +} + +static void ctx_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + struct radeon_renderbuffer *rrb, *drb; + uint32_t cbpitch = 0; + uint32_t zbpitch = 0; + uint32_t dwords = atom->cmd_size; + uint32_t depth_fmt; + + rrb = radeon_get_colorbuffer(&r100->radeon); + if (!rrb || !rrb->bo) { + fprintf(stderr, "no rrb\n"); + return; + } + + atom->cmd[CTX_RB3D_CNTL] &= ~(0xf << 10); + if (rrb->cpp == 4) + atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_ARGB8888; + else + atom->cmd[CTX_RB3D_CNTL] |= RADEON_COLOR_FORMAT_RGB565; + + cbpitch = (rrb->pitch / rrb->cpp); + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + cbpitch |= R200_COLOR_TILE_ENABLE; + + drb = radeon_get_depthbuffer(&r100->radeon); + if (drb) { + zbpitch = (drb->pitch / drb->cpp); + if (drb->cpp == 4) + depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; + else + depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; + atom->cmd[CTX_RB3D_ZSTENCILCNTL] &= ~RADEON_DEPTH_FORMAT_MASK; + atom->cmd[CTX_RB3D_ZSTENCILCNTL] |= depth_fmt; + } - rmesa->state.color.clear = 0x00000000; + /* output the first 7 bytes of context */ + dwords = 10; + if (drb) + dwords += 6; + if (rrb) + dwords += 6; + BEGIN_BATCH_NO_AUTOSTATE(dwords); + + /* In the CS case we need to split this up */ + OUT_BATCH(CP_PACKET0(packet[0].start, 3)); + OUT_BATCH_TABLE((atom->cmd + 1), 4); + + if (drb) { + OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHOFFSET, 0)); + OUT_BATCH_RELOC(0, drb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + + OUT_BATCH(CP_PACKET0(RADEON_RB3D_DEPTHPITCH, 0)); + OUT_BATCH(zbpitch); + } + + OUT_BATCH(CP_PACKET0(RADEON_RB3D_ZSTENCILCNTL, 0)); + OUT_BATCH(atom->cmd[CTX_RB3D_ZSTENCILCNTL]); + OUT_BATCH(CP_PACKET0(RADEON_PP_CNTL, 1)); + OUT_BATCH(atom->cmd[CTX_PP_CNTL]); + OUT_BATCH(atom->cmd[CTX_RB3D_CNTL]); + + if (rrb) { + OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0)); + OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + + OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); + OUT_BATCH(cbpitch); + } + + // if (atom->cmd_size == CTX_STATE_SIZE_NEWDRM) { + // OUT_BATCH_TABLE((atom->cmd + 14), 4); + // } + + END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(4); + OUT_BATCH(CP_PACKET0(RADEON_RE_TOP_LEFT, 0)); + OUT_BATCH(0); + OUT_BATCH(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0)); + if (rrb) { + OUT_BATCH(((rrb->width - 1) << RADEON_RE_WIDTH_SHIFT) | + ((rrb->height - 1) << RADEON_RE_HEIGHT_SHIFT)); + } else { + OUT_BATCH(0); + } + END_BATCH(); +} + +static void cube_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + uint32_t dwords = 3; + int i = atom->idx, j; + radeonTexObj *t = r100->state.texture.unit[i].texobj; + radeon_mipmap_level *lvl; + + if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) + return; + + if (!t) + return; + + if (!t->mt) + return; + + BEGIN_BATCH_NO_AUTOSTATE(dwords + (5 * 3)); + OUT_BATCH_TABLE(atom->cmd, 3); + lvl = &t->mt->levels[0]; + for (j = 0; j < 5; j++) { + OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset, + RADEON_GEM_DOMAIN_VRAM, 0, 0); + } + END_BATCH(); +} + +static void cube_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + uint32_t dwords = 2; + int i = atom->idx, j; + radeonTexObj *t = r100->state.texture.unit[i].texobj; + radeon_mipmap_level *lvl; + uint32_t base_reg; + + if (!(ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) + return; + + if (!t) + return; + + if (!t->mt) + return; + + switch(i) { + case 1: base_reg = RADEON_PP_CUBIC_OFFSET_T1_0; break; + case 2: base_reg = RADEON_PP_CUBIC_OFFSET_T2_0; break; + default: + case 0: base_reg = RADEON_PP_CUBIC_OFFSET_T0_0; break; + }; + BEGIN_BATCH_NO_AUTOSTATE(dwords + (5 * 4)); + OUT_BATCH_TABLE(atom->cmd, 2); + lvl = &t->mt->levels[0]; + for (j = 0; j < 5; j++) { + OUT_BATCH(CP_PACKET0(base_reg + (4 * j), 0)); + OUT_BATCH_RELOC(lvl->faces[j].offset, t->mt->bo, lvl->faces[j].offset, + RADEON_GEM_DOMAIN_VRAM, 0, 0); + } + END_BATCH(); +} + +static void tex_emit(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + uint32_t dwords = atom->cmd_size; + int i = atom->idx; + radeonTexObj *t = r100->state.texture.unit[i].texobj; + radeon_mipmap_level *lvl; + + if (t && t->mt && !t->image_override) + dwords += 2; + BEGIN_BATCH_NO_AUTOSTATE(dwords); + + OUT_BATCH_TABLE(atom->cmd, 3); + if (t && t->mt && !t->image_override) { + if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) { + lvl = &t->mt->levels[0]; + OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + } else { + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + } + } else if (!t) { + /* workaround for old CS mechanism */ + OUT_BATCH(r100->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]); + // OUT_BATCH(r100->radeon.radeonScreen); + } else { + OUT_BATCH(t->override_offset); + } + + OUT_BATCH_TABLE((atom->cmd+4), 5); + END_BATCH(); +} + +static void tex_emit_cs(GLcontext *ctx, struct radeon_state_atom *atom) +{ + r100ContextPtr r100 = R100_CONTEXT(ctx); + BATCH_LOCALS(&r100->radeon); + uint32_t dwords = atom->cmd_size; + int i = atom->idx; + radeonTexObj *t = r100->state.texture.unit[i].texobj; + radeon_mipmap_level *lvl; + int hastexture = 1; + + if (!t) + hastexture = 0; + else { + if (!t->mt && !t->bo) + hastexture = 0; + } + dwords += 1; + if (hastexture) + dwords += 2; + else + dwords -= 2; + BEGIN_BATCH_NO_AUTOSTATE(dwords); + + OUT_BATCH(CP_PACKET0(RADEON_PP_TXFILTER_0 + (24 * i), 1)); + OUT_BATCH_TABLE((atom->cmd + 1), 2); + + if (hastexture) { + OUT_BATCH(CP_PACKET0(RADEON_PP_TXOFFSET_0 + (24 * i), 0)); + if (t->mt && !t->image_override) { + if ((ctx->Texture.Unit[i]._ReallyEnabled & TEXTURE_CUBE_BIT)) { + lvl = &t->mt->levels[0]; + OUT_BATCH_RELOC(lvl->faces[5].offset, t->mt->bo, lvl->faces[5].offset, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + } else { + OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + } + } else { + if (t->bo) + OUT_BATCH_RELOC(t->tile_bits, t->bo, 0, + RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); + } + } + + OUT_BATCH(CP_PACKET0(RADEON_PP_TXCBLEND_0 + (i * 24), 1)); + OUT_BATCH_TABLE((atom->cmd+4), 2); + OUT_BATCH(CP_PACKET0(RADEON_PP_BORDER_COLOR_0 + (i * 4), 0)); + OUT_BATCH((atom->cmd[TEX_PP_BORDER_COLOR])); + END_BATCH(); +} + +/* Initialize the context's hardware state. + */ +void radeonInitState( r100ContextPtr rmesa ) +{ + GLcontext *ctx = rmesa->radeon.glCtx; + GLuint i; + + rmesa->radeon.state.color.clear = 0x00000000; switch ( ctx->Visual.depthBits ) { case 16: - rmesa->state.depth.clear = 0x0000ffff; - rmesa->state.depth.scale = 1.0 / (GLfloat)0xffff; - depth_fmt = RADEON_DEPTH_FORMAT_16BIT_INT_Z; - rmesa->state.stencil.clear = 0x00000000; + rmesa->radeon.state.depth.clear = 0x0000ffff; + rmesa->radeon.state.stencil.clear = 0x00000000; break; case 24: - rmesa->state.depth.clear = 0x00ffffff; - rmesa->state.depth.scale = 1.0 / (GLfloat)0xffffff; - depth_fmt = RADEON_DEPTH_FORMAT_24BIT_INT_Z; - rmesa->state.stencil.clear = 0xffff0000; + rmesa->radeon.state.depth.clear = 0x00ffffff; + rmesa->radeon.state.stencil.clear = 0xffff0000; break; default: - fprintf( stderr, "Error: Unsupported depth %d... exiting\n", - ctx->Visual.depthBits ); - exit( -1 ); + break; } - /* Only have hw stencil when depth buffer is 24 bits deep */ - rmesa->state.stencil.hwBuffer = ( ctx->Visual.stencilBits > 0 && - ctx->Visual.depthBits == 24 ); + rmesa->radeon.Fallback = 0; - rmesa->Fallback = 0; - - if ( ctx->Visual.doubleBufferMode && rmesa->sarea->pfCurrentPage == 0 ) { - drawOffset = rmesa->radeonScreen->backOffset; - drawPitch = rmesa->radeonScreen->backPitch; - } else { - drawOffset = rmesa->radeonScreen->frontOffset; - drawPitch = rmesa->radeonScreen->frontPitch; - } - rmesa->hw.max_state_size = 0; + rmesa->radeon.hw.max_state_size = 0; -#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG ) \ +#define ALLOC_STATE_IDX( ATOM, CHK, SZ, NM, FLAG, IDX ) \ do { \ rmesa->hw.ATOM.cmd_size = SZ; \ - rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int)); \ - rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int)); \ - rmesa->hw.ATOM.name = NM; \ + rmesa->hw.ATOM.cmd = (GLuint *)CALLOC(SZ * sizeof(int)); \ + rmesa->hw.ATOM.lastcmd = (GLuint *)CALLOC(SZ * sizeof(int)); \ + rmesa->hw.ATOM.name = NM; \ rmesa->hw.ATOM.is_tcl = FLAG; \ rmesa->hw.ATOM.check = check_##CHK; \ - rmesa->hw.ATOM.dirty = GL_TRUE; \ - rmesa->hw.max_state_size += SZ * sizeof(int); \ + rmesa->hw.ATOM.dirty = GL_TRUE; \ + rmesa->hw.ATOM.idx = IDX; \ + rmesa->radeon.hw.max_state_size += SZ * sizeof(int); \ } while (0) - - + +#define ALLOC_STATE( ATOM, CHK, SZ, NM, FLAG ) \ + ALLOC_STATE_IDX(ATOM, CHK, SZ, NM, FLAG, 0) + /* Allocate state buffers: */ ALLOC_STATE( ctx, always, CTX_STATE_SIZE, "CTX/context", 0 ); + if (rmesa->radeon.radeonScreen->kernel_mm) + rmesa->hw.ctx.emit = ctx_emit_cs; + else + rmesa->hw.ctx.emit = ctx_emit; ALLOC_STATE( lin, always, LIN_STATE_SIZE, "LIN/line", 0 ); ALLOC_STATE( msk, always, MSK_STATE_SIZE, "MSK/mask", 0 ); ALLOC_STATE( vpt, always, VPT_STATE_SIZE, "VPT/viewport", 0 ); @@ -233,20 +673,32 @@ void radeonInitState( radeonContextPtr rmesa ) ALLOC_STATE( fog, fog, FOG_STATE_SIZE, "FOG/fog", 1 ); ALLOC_STATE( glt, tcl_lighting, GLT_STATE_SIZE, "GLT/light-global", 1 ); ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 ); - ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 ); - ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 ); - ALLOC_STATE( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0 ); - if (rmesa->radeonScreen->drmSupportsCubeMapsR100) + ALLOC_STATE_IDX( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0, 0); + ALLOC_STATE_IDX( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0, 1); + ALLOC_STATE_IDX( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0, 2); + + for (i = 0; i < 3; i++) { + if (rmesa->radeon.radeonScreen->kernel_mm) + rmesa->hw.tex[i].emit = tex_emit_cs; + else + rmesa->hw.tex[i].emit = tex_emit; + } + if (rmesa->radeon.radeonScreen->drmSupportsCubeMapsR100) { - ALLOC_STATE( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0 ); - ALLOC_STATE( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0 ); - ALLOC_STATE( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0 ); + ALLOC_STATE_IDX( cube[0], cube0, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 ); + ALLOC_STATE_IDX( cube[1], cube1, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 ); + ALLOC_STATE_IDX( cube[2], cube2, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 ); + for (i = 0; i < 3; i++) + if (rmesa->radeon.radeonScreen->kernel_mm) + rmesa->hw.cube[i].emit = cube_emit_cs; + else + rmesa->hw.cube[i].emit = cube_emit; } else { - ALLOC_STATE( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0 ); - ALLOC_STATE( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0 ); - ALLOC_STATE( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0 ); + ALLOC_STATE_IDX( cube[0], never, CUBE_STATE_SIZE, "CUBE/cube-0", 0, 0 ); + ALLOC_STATE_IDX( cube[1], never, CUBE_STATE_SIZE, "CUBE/cube-1", 0, 1 ); + ALLOC_STATE_IDX( cube[2], never, CUBE_STATE_SIZE, "CUBE/cube-2", 0, 2 ); } ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 ); ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 ); @@ -268,43 +720,43 @@ void radeonInitState( radeonContextPtr rmesa ) ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 ); ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 ); ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 ); - ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 ); - ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 ); - ALLOC_STATE( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0 ); + ALLOC_STATE_IDX( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0, 0 ); + ALLOC_STATE_IDX( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0, 1 ); + ALLOC_STATE_IDX( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0, 2 ); radeonSetUpAtomList( rmesa ); /* Fill in the packet headers: */ - rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(RADEON_EMIT_PP_MISC); - rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(RADEON_EMIT_PP_CNTL); - rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(RADEON_EMIT_RB3D_COLORPITCH); - rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(RADEON_EMIT_RE_LINE_PATTERN); - rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(RADEON_EMIT_SE_LINE_WIDTH); - rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(RADEON_EMIT_RB3D_STENCILREFMASK); - rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(RADEON_EMIT_SE_VPORT_XSCALE); - rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(RADEON_EMIT_SE_CNTL); - rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(RADEON_EMIT_SE_CNTL_STATUS); - rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(RADEON_EMIT_RE_MISC); - rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_0); - rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0); - rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1); - rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1); - rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_2); - rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_2); - rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_0); - rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T0); - rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_1); - rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T1); - rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(RADEON_EMIT_PP_CUBIC_FACES_2); - rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(RADEON_EMIT_PP_CUBIC_OFFSETS_T2); - rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR); - rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT); + rmesa->hw.ctx.cmd[CTX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_MISC); + rmesa->hw.ctx.cmd[CTX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CNTL); + rmesa->hw.ctx.cmd[CTX_CMD_2] = cmdpkt(rmesa, RADEON_EMIT_RB3D_COLORPITCH); + rmesa->hw.lin.cmd[LIN_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_LINE_PATTERN); + rmesa->hw.lin.cmd[LIN_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_LINE_WIDTH); + rmesa->hw.msk.cmd[MSK_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RB3D_STENCILREFMASK); + rmesa->hw.vpt.cmd[VPT_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_VPORT_XSCALE); + rmesa->hw.set.cmd[SET_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL); + rmesa->hw.set.cmd[SET_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_SE_CNTL_STATUS); + rmesa->hw.msc.cmd[MSC_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_RE_MISC); + rmesa->hw.tex[0].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_0); + rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_0); + rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_1); + rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_1); + rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TXFILTER_2); + rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_BORDER_COLOR_2); + rmesa->hw.cube[0].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_0); + rmesa->hw.cube[0].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T0); + rmesa->hw.cube[1].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_1); + rmesa->hw.cube[1].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T1); + rmesa->hw.cube[2].cmd[CUBE_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_FACES_2); + rmesa->hw.cube[2].cmd[CUBE_CMD_1] = cmdpkt(rmesa, RADEON_EMIT_PP_CUBIC_OFFSETS_T2); + rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_ZBIAS_FACTOR); + rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT); rmesa->hw.mtl.cmd[MTL_CMD_0] = - cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED); - rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_0); - rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_1); - rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_2); + cmdpkt(rmesa, RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED); + rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_0); + rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_1); + rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(rmesa, RADEON_EMIT_PP_TEX_SIZE_2); rmesa->hw.grd.cmd[GRD_CMD_0] = cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 ); rmesa->hw.fog.cmd[FOG_CMD_0] = @@ -331,6 +783,22 @@ void radeonInitState( radeonContextPtr rmesa ) cmdvec( RADEON_VS_UCP_ADDR + i, 1, 4 ); } + if (rmesa->radeon.radeonScreen->kernel_mm) { + rmesa->hw.grd.emit = scl_emit; + rmesa->hw.fog.emit = vec_emit; + rmesa->hw.glt.emit = vec_emit; + rmesa->hw.eye.emit = vec_emit; + + for (i = 0; i <= 6; i++) + rmesa->hw.mat[i].emit = vec_emit; + + for (i = 0; i < 8; i++) + rmesa->hw.lit[i].emit = lit_emit; + + for (i = 0; i < 6; i++) + rmesa->hw.ucp[i].emit = vec_emit; + } + rmesa->last_ReallyEnabled = -1; /* Initial Harware state: @@ -352,19 +820,7 @@ void radeonInitState( radeonContextPtr rmesa ) RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO ); - rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHOFFSET] = - rmesa->radeonScreen->depthOffset + rmesa->radeonScreen->fbLocation; - - rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] = - ((rmesa->radeonScreen->depthPitch & - RADEON_DEPTHPITCH_MASK) | - RADEON_DEPTH_ENDIAN_NO_SWAP); - - if (rmesa->using_hyperz) - rmesa->hw.ctx.cmd[CTX_RB3D_DEPTHPITCH] |= RADEON_DEPTH_HYPERZ; - - rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (depth_fmt | - RADEON_Z_TEST_LESS | + rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] = (RADEON_Z_TEST_LESS | RADEON_STENCIL_TEST_ALWAYS | RADEON_STENCIL_FAIL_KEEP | RADEON_STENCIL_ZPASS_KEEP | @@ -374,7 +830,7 @@ void radeonInitState( radeonContextPtr rmesa ) if (rmesa->using_hyperz) { rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_COMPRESSION_ENABLE | RADEON_Z_DECOMPRESSION_ENABLE; - if (rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { + if (rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) { /* works for q3, but slight rendering errors with glxgears ? */ /* rmesa->hw.ctx.cmd[CTX_RB3D_ZSTENCILCNTL] |= RADEON_Z_HIERARCHY_ENABLE;*/ /* need this otherwise get lots of lockups with q3 ??? */ @@ -386,10 +842,9 @@ void radeonInitState( radeonContextPtr rmesa ) RADEON_ANTI_ALIAS_NONE); rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] = (RADEON_PLANE_MASK_ENABLE | - color_fmt | RADEON_ZBLOCK16); - switch ( driQueryOptioni( &rmesa->optionCache, "dither_mode" ) ) { + switch ( driQueryOptioni( &rmesa->radeon.optionCache, "dither_mode" ) ) { case DRI_CONF_DITHER_XERRORDIFFRESET: rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_INIT; break; @@ -397,31 +852,18 @@ void radeonInitState( radeonContextPtr rmesa ) rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_SCALE_DITHER_ENABLE; break; } - if ( driQueryOptioni( &rmesa->optionCache, "round_mode" ) == + if ( driQueryOptioni( &rmesa->radeon.optionCache, "round_mode" ) == DRI_CONF_ROUND_ROUND ) - rmesa->state.color.roundEnable = RADEON_ROUND_ENABLE; + rmesa->radeon.state.color.roundEnable = RADEON_ROUND_ENABLE; else - rmesa->state.color.roundEnable = 0; - if ( driQueryOptioni (&rmesa->optionCache, "color_reduction" ) == + rmesa->radeon.state.color.roundEnable = 0; + if ( driQueryOptioni (&rmesa->radeon.optionCache, "color_reduction" ) == DRI_CONF_COLOR_REDUCTION_DITHER ) rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_DITHER_ENABLE; else - rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->state.color.roundEnable; - - rmesa->hw.ctx.cmd[CTX_RB3D_COLOROFFSET] = ((drawOffset + - rmesa->radeonScreen->fbLocation) - & RADEON_COLOROFFSET_MASK); - - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] = ((drawPitch & - RADEON_COLORPITCH_MASK) | - RADEON_COLOR_ENDIAN_NO_SWAP); + rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= rmesa->radeon.state.color.roundEnable; - /* (fixed size) sarea is initialized to zero afaics so can omit version check. Phew! */ - if (rmesa->sarea->tiling_enabled) { - rmesa->hw.ctx.cmd[CTX_RB3D_COLORPITCH] |= RADEON_COLOR_TILE_ENABLE; - } - rmesa->hw.set.cmd[SET_SE_CNTL] = (RADEON_FFACE_CULL_CCW | RADEON_BFACE_SOLID | RADEON_FFACE_SOLID | @@ -444,7 +886,7 @@ void radeonInitState( radeonContextPtr rmesa ) RADEON_VC_NO_SWAP; #endif - if (!(rmesa->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { + if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) { rmesa->hw.set.cmd[SET_SE_CNTL_STATUS] |= RADEON_TCL_BYPASS; } @@ -491,8 +933,8 @@ void radeonInitState( radeonContextPtr rmesa ) (2 << RADEON_TXFORMAT_HEIGHT_SHIFT)); /* Initialize the texture offset to the start of the card texture heap */ - rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] = - rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + // rmesa->hw.tex[i].cmd[TEX_PP_TXOFFSET] = + // rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; rmesa->hw.tex[i].cmd[TEX_PP_BORDER_COLOR] = 0; rmesa->hw.tex[i].cmd[TEX_PP_TXCBLEND] = @@ -513,15 +955,15 @@ void radeonInitState( radeonContextPtr rmesa ) rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_FACES] = 0; rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_0] = - rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_1] = - rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_2] = - rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_3] = - rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; rmesa->hw.cube[i].cmd[CUBE_PP_CUBIC_OFFSET_4] = - rmesa->radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; + rmesa->radeon.radeonScreen->texOffset[RADEON_LOCAL_TEX_HEAP]; } /* Can only add ST1 at the time of doing some multitex but can keep @@ -613,5 +1055,7 @@ void radeonInitState( radeonContextPtr rmesa ) rmesa->hw.eye.cmd[EYE_Z] = IEEE_ONE; rmesa->hw.eye.cmd[EYE_RESCALE_FACTOR] = IEEE_ONE; - rmesa->hw.all_dirty = GL_TRUE; + rmesa->radeon.hw.all_dirty = GL_TRUE; + + rcommonInitCmdBuf(&rmesa->radeon); } diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index ebea1fecdc..e31f045991 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -52,8 +52,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_tcl.h" -static void flush_last_swtcl_prim( radeonContextPtr rmesa ); - /* R100: xyzw, c0, c1/fog, stq[0..2] = 4+1+1+3*3 = 15 right? */ /* R200: xyzw, c0, c1/fog, strq[0..5] = 4+1+1+4*6 = 30 */ #define RADEON_MAX_TNL_VERTEX_SIZE (15 * sizeof(GLfloat)) /* for mesa _tnl stage */ @@ -64,18 +62,18 @@ static void flush_last_swtcl_prim( radeonContextPtr rmesa ); #define EMIT_ATTR( ATTR, STYLE, F0 ) \ do { \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = (ATTR); \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = (STYLE); \ - rmesa->swtcl.vertex_attr_count++; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ fmt_0 |= F0; \ } while (0) #define EMIT_PAD( N ) \ do { \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].attrib = 0; \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].format = EMIT_PAD; \ - rmesa->swtcl.vertex_attrs[rmesa->swtcl.vertex_attr_count].offset = (N); \ - rmesa->swtcl.vertex_attr_count++; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \ + rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \ + rmesa->radeon.swtcl.vertex_attr_count++; \ } while (0) static GLuint radeon_cp_vc_frmts[3][2] = @@ -87,7 +85,7 @@ static GLuint radeon_cp_vc_frmts[3][2] = static void radeonSetVertexFormat( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + r100ContextPtr rmesa = R100_CONTEXT( ctx ); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; DECLARE_RENDERINPUTS(index_bitset); @@ -106,7 +104,7 @@ static void radeonSetVertexFormat( GLcontext *ctx ) } assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL ); - rmesa->swtcl.vertex_attr_count = 0; + rmesa->radeon.swtcl.vertex_attr_count = 0; /* EMIT_ATTR's must be in order as they tell t_vertex.c how to * build up a hardware vertex. @@ -204,33 +202,33 @@ static void radeonSetVertexFormat( GLcontext *ctx ) } } - if (!RENDERINPUTS_EQUAL( rmesa->tnl_index_bitset, index_bitset ) || + if (!RENDERINPUTS_EQUAL( rmesa->radeon.tnl_index_bitset, index_bitset ) || fmt_0 != rmesa->swtcl.vertex_format) { RADEON_NEWPRIM(rmesa); rmesa->swtcl.vertex_format = fmt_0; - rmesa->swtcl.vertex_size = + rmesa->radeon.swtcl.vertex_size = _tnl_install_attrs( ctx, - rmesa->swtcl.vertex_attrs, - rmesa->swtcl.vertex_attr_count, + rmesa->radeon.swtcl.vertex_attrs, + rmesa->radeon.swtcl.vertex_attr_count, NULL, 0 ); - rmesa->swtcl.vertex_size /= 4; - RENDERINPUTS_COPY( rmesa->tnl_index_bitset, index_bitset ); + rmesa->radeon.swtcl.vertex_size /= 4; + RENDERINPUTS_COPY( rmesa->radeon.tnl_index_bitset, index_bitset ); if (RADEON_DEBUG & DEBUG_VERTS) fprintf( stderr, "%s: vertex_size= %d floats\n", - __FUNCTION__, rmesa->swtcl.vertex_size); + __FUNCTION__, rmesa->radeon.swtcl.vertex_size); } } static void radeonRenderStart( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + r100ContextPtr rmesa = R100_CONTEXT( ctx ); radeonSetVertexFormat( ctx ); - if (rmesa->dma.flush != 0 && - rmesa->dma.flush != flush_last_swtcl_prim) - rmesa->dma.flush( rmesa ); + if (rmesa->radeon.dma.flush != 0 && + rmesa->radeon.dma.flush != rcommon_flush_last_swtcl_prim) + rmesa->radeon.dma.flush( ctx ); } @@ -241,7 +239,7 @@ static void radeonRenderStart( GLcontext *ctx ) */ void radeonChooseVertexState( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + r100ContextPtr rmesa = R100_CONTEXT( ctx ); TNLcontext *tnl = TNL_CONTEXT(ctx); GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; @@ -254,7 +252,7 @@ void radeonChooseVertexState( GLcontext *ctx ) * rasterization fallback. As this function will be called again when we * leave a rasterization fallback, we can just skip it for now. */ - if (rmesa->Fallback != 0) + if (rmesa->radeon.Fallback != 0) return; /* HW perspective divide is a win, but tiny vertex formats are a @@ -281,80 +279,29 @@ void radeonChooseVertexState( GLcontext *ctx ) } } - -/* Flush vertices in the current dma region. - */ -static void flush_last_swtcl_prim( radeonContextPtr rmesa ) +void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset) { - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, "%s\n", __FUNCTION__); - - rmesa->dma.flush = NULL; + r100ContextPtr rmesa = R100_CONTEXT(ctx); - if (rmesa->dma.current.buf) { - struct radeon_dma_region *current = &rmesa->dma.current; - GLuint current_offset = (rmesa->radeonScreen->gart_buffer_offset + - current->buf->buf->idx * RADEON_BUFFER_SIZE + - current->start); + rcommonEnsureCmdBufSpace(&rmesa->radeon, + rmesa->radeon.hw.max_state_size + (12*sizeof(int)), + __FUNCTION__); - assert (!(rmesa->swtcl.hw_primitive & RADEON_CP_VC_CNTL_PRIM_WALK_IND)); - assert (current->start + - rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == - current->ptr); + radeonEmitState(&rmesa->radeon); + radeonEmitVertexAOS( rmesa, + rmesa->radeon.swtcl.vertex_size, + rmesa->radeon.dma.current, + current_offset); - if (rmesa->dma.current.start != rmesa->dma.current.ptr) { - radeonEnsureCmdBufSpace( rmesa, VERT_AOS_BUFSZ + - rmesa->hw.max_state_size + VBUF_BUFSZ ); - - radeonEmitVertexAOS( rmesa, - rmesa->swtcl.vertex_size, - current_offset); - - radeonEmitVbufPrim( rmesa, - rmesa->swtcl.vertex_format, - rmesa->swtcl.hw_primitive, - rmesa->swtcl.numverts); - } + + radeonEmitVbufPrim( rmesa, + rmesa->swtcl.vertex_format, + rmesa->radeon.swtcl.hw_primitive, + rmesa->radeon.swtcl.numverts); - rmesa->swtcl.numverts = 0; - current->start = current->ptr; - } } - -/* Alloc space in the current dma region. - */ -static INLINE void * -radeonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize ) -{ - GLuint bytes = vsize * nverts; - - if ( rmesa->dma.current.ptr + bytes > rmesa->dma.current.end ) - radeonRefillCurrentDmaRegion( rmesa ); - - if (!rmesa->dma.flush) { - rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES; - rmesa->dma.flush = flush_last_swtcl_prim; - } - - assert( vsize == rmesa->swtcl.vertex_size * 4 ); - assert( rmesa->dma.flush == flush_last_swtcl_prim ); - assert (rmesa->dma.current.start + - rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 == - rmesa->dma.current.ptr); - - - { - GLubyte *head = (GLubyte *)(rmesa->dma.current.address + rmesa->dma.current.ptr); - rmesa->dma.current.ptr += bytes; - rmesa->swtcl.numverts += nverts; - return head; - } - -} - - /* * Render unclipped vertex buffers by emitting vertices directly to * dma buffers. Use strip/fan hardware primitives where possible. @@ -387,22 +334,22 @@ static const GLuint hw_prim[GL_POLYGON+1] = { }; static INLINE void -radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim ) +radeonDmaPrimitive( r100ContextPtr rmesa, GLenum prim ) { RADEON_NEWPRIM( rmesa ); - rmesa->swtcl.hw_primitive = hw_prim[prim]; - assert(rmesa->dma.current.ptr == rmesa->dma.current.start); + rmesa->radeon.swtcl.hw_primitive = hw_prim[prim]; + // assert(rmesa->radeon.dma.current.ptr == rmesa->radeon.dma.current.start); } -#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx) +#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx) #define INIT( prim ) radeonDmaPrimitive( rmesa, prim ) #define FLUSH() RADEON_NEWPRIM( rmesa ) -#define GET_CURRENT_VB_MAX_VERTS() \ - (((int)rmesa->dma.current.end - (int)rmesa->dma.current.ptr) / (rmesa->swtcl.vertex_size*4)) +#define GET_CURRENT_VB_MAX_VERTS() 10\ +// (((int)rmesa->radeon.dma.current.end - (int)rmesa->radeon.dma.current.ptr) / (rmesa->radeon.swtcl.vertex_size*4)) #define GET_SUBSEQUENT_VB_MAX_VERTS() \ - ((RADEON_BUFFER_SIZE) / (rmesa->swtcl.vertex_size*4)) + ((RADEON_BUFFER_SIZE) / (rmesa->radeon.swtcl.vertex_size*4)) #define ALLOC_VERTS( nr ) \ - radeonAllocDmaLowVerts( rmesa, nr, rmesa->swtcl.vertex_size * 4 ) + rcommonAllocDmaLowVerts( &rmesa->radeon, nr, rmesa->radeon.swtcl.vertex_size * 4 ) #define EMIT_VERTS( ctx, j, nr, buf ) \ _tnl_emit_vertices_to_buffer(ctx, j, (j)+(nr), buf) @@ -418,16 +365,13 @@ radeonDmaPrimitive( radeonContextPtr rmesa, GLenum prim ) static GLboolean radeon_run_render( GLcontext *ctx, struct tnl_pipeline_stage *stage ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; tnl_render_func *tab = TAG(render_tab_verts); GLuint i; - if (rmesa->swtcl.indexed_verts.buf) - RELEASE_ELT_VERTS(); - - if (rmesa->swtcl.RenderIndex != 0 || + if (rmesa->radeon.swtcl.RenderIndex != 0 || !radeon_dma_validate_render( ctx, VB )) return GL_TRUE; @@ -496,13 +440,13 @@ static void radeonResetLineStipple( GLcontext *ctx ); #undef LOCAL_VARS #undef ALLOC_VERTS -#define CTX_ARG radeonContextPtr rmesa -#define GET_VERTEX_DWORDS() rmesa->swtcl.vertex_size -#define ALLOC_VERTS( n, size ) radeonAllocDmaLowVerts( rmesa, n, (size) * 4 ) +#define CTX_ARG r100ContextPtr rmesa +#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size +#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, (size) * 4 ) #undef LOCAL_VARS #define LOCAL_VARS \ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \ - const char *radeonverts = (char *)rmesa->swtcl.verts; + r100ContextPtr rmesa = R100_CONTEXT(ctx); \ + const char *radeonverts = (char *)rmesa->radeon.swtcl.verts; #define VERT(x) (radeonVertex *)(radeonverts + ((x) * (vertsize) * sizeof(int))) #define VERTEX radeonVertex #undef TAG @@ -560,7 +504,7 @@ static struct { #define VERT_Y(_v) _v->v.y #define VERT_Z(_v) _v->v.z #define AREA_IS_CCW( a ) (a < 0) -#define GET_VERTEX(e) (rmesa->swtcl.verts + ((e) * rmesa->swtcl.vertex_size * sizeof(int))) +#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + ((e) * rmesa->radeon.swtcl.vertex_size * sizeof(int))) #define VERT_SET_RGBA( v, c ) \ do { \ @@ -606,7 +550,7 @@ do { \ #undef INIT #define LOCAL_VARS(n) \ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \ + r100ContextPtr rmesa = R100_CONTEXT(ctx); \ GLuint color[n], spec[n]; \ GLuint coloroffset = rmesa->swtcl.coloroffset; \ GLuint specoffset = rmesa->swtcl.specoffset; \ @@ -617,7 +561,7 @@ do { \ ***********************************************************************/ #define RASTERIZE(x) radeonRasterPrimitive( ctx, reduced_hw_prim[x] ) -#define RENDER_PRIMITIVE rmesa->swtcl.render_primitive +#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive #undef TAG #define TAG(x) x #include "tnl_dd/t_dd_unfilled.h" @@ -673,9 +617,9 @@ static void init_rast_tab( void ) } while (0) #undef LOCAL_VARS #define LOCAL_VARS \ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); \ - const GLuint vertsize = rmesa->swtcl.vertex_size; \ - const char *radeonverts = (char *)rmesa->swtcl.verts; \ + r100ContextPtr rmesa = R100_CONTEXT(ctx); \ + const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \ + const char *radeonverts = (char *)rmesa->radeon.swtcl.verts; \ const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \ const GLboolean stipple = ctx->Line.StippleFlag; \ (void) elt; (void) stipple; @@ -700,17 +644,17 @@ static void init_rast_tab( void ) void radeonChooseRenderState( GLcontext *ctx ) { TNLcontext *tnl = TNL_CONTEXT(ctx); - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint index = 0; GLuint flags = ctx->_TriangleCaps; - if (!rmesa->TclFallback || rmesa->Fallback) + if (!rmesa->radeon.TclFallback || rmesa->radeon.Fallback) return; if (flags & DD_TRI_LIGHT_TWOSIDE) index |= RADEON_TWOSIDE_BIT; if (flags & DD_TRI_UNFILLED) index |= RADEON_UNFILLED_BIT; - if (index != rmesa->swtcl.RenderIndex) { + if (index != rmesa->radeon.swtcl.RenderIndex) { tnl->Driver.Render.Points = rast_tab[index].points; tnl->Driver.Render.Line = rast_tab[index].line; tnl->Driver.Render.ClippedLine = rast_tab[index].line; @@ -727,7 +671,7 @@ void radeonChooseRenderState( GLcontext *ctx ) tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon; } - rmesa->swtcl.RenderIndex = index; + rmesa->radeon.swtcl.RenderIndex = index; } } @@ -739,18 +683,18 @@ void radeonChooseRenderState( GLcontext *ctx ) static void radeonRasterPrimitive( GLcontext *ctx, GLuint hwprim ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); - if (rmesa->swtcl.hw_primitive != hwprim) { + if (rmesa->radeon.swtcl.hw_primitive != hwprim) { RADEON_NEWPRIM( rmesa ); - rmesa->swtcl.hw_primitive = hwprim; + rmesa->radeon.swtcl.hw_primitive = hwprim; } } static void radeonRenderPrimitive( GLcontext *ctx, GLenum prim ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - rmesa->swtcl.render_primitive = prim; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + rmesa->radeon.swtcl.render_primitive = prim; if (prim < GL_TRIANGLES || !(ctx->_TriangleCaps & DD_TRI_UNFILLED)) radeonRasterPrimitive( ctx, reduced_hw_prim[prim] ); } @@ -761,7 +705,7 @@ static void radeonRenderFinish( GLcontext *ctx ) static void radeonResetLineStipple( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); RADEON_STATECHANGE( rmesa, lin ); } @@ -795,17 +739,17 @@ static const char *getFallbackString(GLuint bit) void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); - GLuint oldfallback = rmesa->Fallback; + GLuint oldfallback = rmesa->radeon.Fallback; if (mode) { - rmesa->Fallback |= bit; + rmesa->radeon.Fallback |= bit; if (oldfallback == 0) { - RADEON_FIREVERTICES( rmesa ); + radeon_firevertices(&rmesa->radeon); TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_TRUE ); _swsetup_Wakeup( ctx ); - rmesa->swtcl.RenderIndex = ~0; + rmesa->radeon.swtcl.RenderIndex = ~0; if (RADEON_DEBUG & DEBUG_FALLBACKS) { fprintf(stderr, "Radeon begin rasterization fallback: 0x%x %s\n", bit, getFallbackString(bit)); @@ -813,7 +757,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) } } else { - rmesa->Fallback &= ~bit; + rmesa->radeon.Fallback &= ~bit; if (oldfallback == bit) { _swrast_flush( ctx ); tnl->Driver.Render.Start = radeonRenderStart; @@ -826,14 +770,14 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) tnl->Driver.Render.ResetLineStipple = radeonResetLineStipple; TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_RASTER, GL_FALSE ); - if (rmesa->TclFallback) { - /* These are already done if rmesa->TclFallback goes to + if (rmesa->radeon.TclFallback) { + /* These are already done if rmesa->radeon.TclFallback goes to * zero above. But not if it doesn't (RADEON_NO_TCL for * example?) */ _tnl_invalidate_vertex_state( ctx, ~0 ); _tnl_invalidate_vertices( ctx, ~0 ); - RENDERINPUTS_ZERO( rmesa->tnl_index_bitset ); + RENDERINPUTS_ZERO( rmesa->radeon.tnl_index_bitset ); radeonChooseVertexState( ctx ); radeonChooseRenderState( ctx ); } @@ -853,7 +797,7 @@ void radeonFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) void radeonInitSwtcl( GLcontext *ctx ) { TNLcontext *tnl = TNL_CONTEXT(ctx); - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); static int firsttime = 1; if (firsttime) { @@ -872,18 +816,9 @@ void radeonInitSwtcl( GLcontext *ctx ) _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12, RADEON_MAX_TNL_VERTEX_SIZE); - rmesa->swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; - rmesa->swtcl.RenderIndex = ~0; - rmesa->swtcl.render_primitive = GL_TRIANGLES; - rmesa->swtcl.hw_primitive = 0; + rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf; + rmesa->radeon.swtcl.RenderIndex = ~0; + rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES; + rmesa->radeon.swtcl.hw_primitive = 0; } - -void radeonDestroySwtcl( GLcontext *ctx ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - - if (rmesa->swtcl.indexed_verts.buf) - radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, - __FUNCTION__ ); -} diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.h b/src/mesa/drivers/dri/radeon/radeon_swtcl.h index e485052ad7..da89158eeb 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.h +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.h @@ -40,7 +40,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_context.h" extern void radeonInitSwtcl( GLcontext *ctx ); -extern void radeonDestroySwtcl( GLcontext *ctx ); extern void radeonChooseRenderState( GLcontext *ctx ); extern void radeonChooseVertexState( GLcontext *ctx ); @@ -63,5 +62,5 @@ extern void radeon_translate_vertex( GLcontext *ctx, extern void radeon_print_vertex( GLcontext *ctx, const radeonVertex *v ); - +extern void r100_swtcl_flush(GLcontext *ctx, uint32_t current_offset); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index 779e9ae5df..df6708f05e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -42,6 +42,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "tnl/tnl.h" #include "tnl/t_pipeline.h" +#include "radeon_common.h" #include "radeon_context.h" #include "radeon_state.h" #include "radeon_ioctl.h" @@ -104,7 +105,7 @@ static GLboolean discrete_prim[0x10] = { }; -#define LOCAL_VARS radeonContextPtr rmesa = RADEON_CONTEXT(ctx) +#define LOCAL_VARS r100ContextPtr rmesa = R100_CONTEXT(ctx) #define ELT_TYPE GLushort #define ELT_INIT(prim, hw_prim) \ @@ -125,7 +126,7 @@ static GLboolean discrete_prim[0x10] = { #define RESET_STIPPLE() do { \ RADEON_STATECHANGE( rmesa, lin ); \ - radeonEmitState( rmesa ); \ + radeonEmitState(&rmesa->radeon); \ } while (0) #define AUTO_STIPPLE( mode ) do { \ @@ -136,31 +137,29 @@ static GLboolean discrete_prim[0x10] = { else \ rmesa->hw.lin.cmd[LIN_RE_LINE_PATTERN] &= \ ~RADEON_LINE_PATTERN_AUTO_RESET; \ - radeonEmitState( rmesa ); \ + radeonEmitState(&rmesa->radeon); \ } while (0) #define ALLOC_ELTS(nr) radeonAllocElts( rmesa, nr ) -static GLushort *radeonAllocElts( radeonContextPtr rmesa, GLuint nr ) +static GLushort *radeonAllocElts( r100ContextPtr rmesa, GLuint nr ) { - if (rmesa->dma.flush) - rmesa->dma.flush( rmesa ); + if (rmesa->radeon.dma.flush) + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); - radeonEnsureCmdBufSpace(rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + - rmesa->hw.max_state_size + ELTS_BUFSZ(nr)); + rcommonEnsureCmdBufSpace(&rmesa->radeon, rmesa->radeon.hw.max_state_size + ELTS_BUFSZ(nr) + + AOS_BUFSZ(rmesa->radeon.tcl.aos_count), __FUNCTION__); - radeonEmitAOS( rmesa, - rmesa->tcl.aos_components, - rmesa->tcl.nr_aos_components, 0 ); + radeonEmitAOS( rmesa, + rmesa->radeon.tcl.aos_count, 0 ); - return radeonAllocEltsOpenEnded( rmesa, - rmesa->tcl.vertex_format, - rmesa->tcl.hw_primitive, nr ); + return radeonAllocEltsOpenEnded( rmesa, rmesa->tcl.vertex_format, + rmesa->tcl.hw_primitive, nr ); } -#define CLOSE_ELTS() RADEON_NEWPRIM( rmesa ) +#define CLOSE_ELTS() if (0) RADEON_NEWPRIM( rmesa ) @@ -174,15 +173,15 @@ static void radeonEmitPrim( GLcontext *ctx, GLuint start, GLuint count) { - radeonContextPtr rmesa = RADEON_CONTEXT( ctx ); + r100ContextPtr rmesa = R100_CONTEXT( ctx ); radeonTclPrimitive( ctx, prim, hwprim ); - radeonEnsureCmdBufSpace( rmesa, AOS_BUFSZ(rmesa->tcl.nr_aos_components) + - rmesa->hw.max_state_size + VBUF_BUFSZ ); + rcommonEnsureCmdBufSpace( &rmesa->radeon, + AOS_BUFSZ(rmesa->radeon.tcl.aos_count) + + rmesa->radeon.hw.max_state_size + VBUF_BUFSZ, __FUNCTION__ ); radeonEmitAOS( rmesa, - rmesa->tcl.aos_components, - rmesa->tcl.nr_aos_components, + rmesa->radeon.tcl.aos_count, start ); /* Why couldn't this packet have taken an offset param? @@ -254,7 +253,7 @@ void radeonTclPrimitive( GLcontext *ctx, GLenum prim, int hw_prim ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint se_cntl; GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE; @@ -371,7 +370,7 @@ radeonComputeFogBlendFactor( GLcontext *ctx, GLfloat fogcoord ) static GLboolean radeon_run_tcl_render( GLcontext *ctx, struct tnl_pipeline_stage *stage ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *VB = &tnl->vb; GLuint inputs = VERT_BIT_POS | VERT_BIT_COLOR0; @@ -379,7 +378,7 @@ static GLboolean radeon_run_tcl_render( GLcontext *ctx, /* TODO: separate this from the swtnl pipeline */ - if (rmesa->TclFallback) + if (rmesa->radeon.TclFallback) return GL_TRUE; /* fallback to software t&l */ if (VB->Count == 0) @@ -461,7 +460,7 @@ const struct tnl_pipeline_stage _radeon_tcl_stage = static void transition_to_swtnl( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); GLuint se_cntl; @@ -490,7 +489,7 @@ static void transition_to_swtnl( GLcontext *ctx ) static void transition_to_hwtnl( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); TNLcontext *tnl = TNL_CONTEXT(ctx); GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; @@ -509,15 +508,15 @@ static void transition_to_hwtnl( GLcontext *ctx ) tnl->Driver.NotifyMaterialChange = radeonUpdateMaterial; - if ( rmesa->dma.flush ) - rmesa->dma.flush( rmesa ); + if ( rmesa->radeon.dma.flush ) + rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); - rmesa->dma.flush = NULL; + rmesa->radeon.dma.flush = NULL; rmesa->swtcl.vertex_format = 0; - if (rmesa->swtcl.indexed_verts.buf) - radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, - __FUNCTION__ ); + // if (rmesa->swtcl.indexed_verts.buf) + // radeonReleaseDmaRegion( rmesa, &rmesa->swtcl.indexed_verts, + // __FUNCTION__ ); if (RADEON_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "Radeon end tcl fallback\n"); @@ -550,11 +549,11 @@ static char *getFallbackString(GLuint bit) void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - GLuint oldfallback = rmesa->TclFallback; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + GLuint oldfallback = rmesa->radeon.TclFallback; if (mode) { - rmesa->TclFallback |= bit; + rmesa->radeon.TclFallback |= bit; if (oldfallback == 0) { if (RADEON_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "Radeon begin tcl fallback %s\n", @@ -563,7 +562,7 @@ void radeonTclFallback( GLcontext *ctx, GLuint bit, GLboolean mode ) } } else { - rmesa->TclFallback &= ~bit; + rmesa->radeon.TclFallback &= ~bit; if (oldfallback == bit) { if (RADEON_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "Radeon end tcl fallback %s\n", diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index f2b6deb9c0..2549d5cb5c 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/texobj.h" #include "radeon_context.h" +#include "radeon_mipmap_tree.h" #include "radeon_state.h" #include "radeon_ioctl.h" #include "radeon_swtcl.h" @@ -170,10 +171,13 @@ static void radeonSetTexFilter( radeonTexObjPtr t, GLenum minf, GLenum magf ) { GLuint anisotropy = (t->pp_txfilter & RADEON_MAX_ANISO_MASK); + /* Force revalidation to account for switches from/to mipmapping. */ + t->validated = GL_FALSE; + t->pp_txfilter &= ~(RADEON_MIN_FILTER_MASK | RADEON_MAG_FILTER_MASK); /* r100 chips can't handle mipmaps/aniso for cubemap/volume textures */ - if ( t->base.tObj->Target == GL_TEXTURE_CUBE_MAP ) { + if ( t->base.Target == GL_TEXTURE_CUBE_MAP ) { switch ( minf ) { case GL_NEAREST: case GL_NEAREST_MIPMAP_NEAREST: @@ -249,433 +253,13 @@ static void radeonSetTexBorderColor( radeonTexObjPtr t, const GLfloat color[4] ) t->pp_border_color = radeonPackColor( 4, c[0], c[1], c[2], c[3] ); } - -/** - * Allocate space for and load the mesa images into the texture memory block. - * This will happen before drawing with a new texture, or drawing with a - * texture after it was swapped out or teximaged again. - */ - -static radeonTexObjPtr radeonAllocTexObj( struct gl_texture_object *texObj ) -{ - radeonTexObjPtr t; - - t = CALLOC_STRUCT( radeon_tex_obj ); - texObj->DriverData = t; - if ( t != NULL ) { - if ( RADEON_DEBUG & DEBUG_TEXTURE ) { - fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)texObj, (void *)t ); - } - - /* Initialize non-image-dependent parts of the state: - */ - t->base.tObj = texObj; - t->border_fallback = GL_FALSE; - - t->pp_txfilter = RADEON_BORDER_MODE_OGL; - t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP | - RADEON_TXFORMAT_PERSPECTIVE_ENABLE); - - make_empty_list( & t->base ); - - radeonSetTexWrap( t, texObj->WrapS, texObj->WrapT ); - radeonSetTexMaxAnisotropy( t, texObj->MaxAnisotropy ); - radeonSetTexFilter( t, texObj->MinFilter, texObj->MagFilter ); - radeonSetTexBorderColor( t, texObj->BorderColor ); - } - - return t; -} - - -static const struct gl_texture_format * -radeonChooseTextureFormat( GLcontext *ctx, GLint internalFormat, - GLenum format, GLenum type ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - const GLboolean do32bpt = - ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32 ); - const GLboolean force16bpt = - ( rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16 ); - (void) format; - - switch ( internalFormat ) { - case 4: - case GL_RGBA: - case GL_COMPRESSED_RGBA: - switch ( type ) { - case GL_UNSIGNED_INT_10_10_10_2: - case GL_UNSIGNED_INT_2_10_10_10_REV: - return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb1555; - case GL_UNSIGNED_SHORT_4_4_4_4: - case GL_UNSIGNED_SHORT_4_4_4_4_REV: - return _dri_texformat_argb4444; - case GL_UNSIGNED_SHORT_5_5_5_1: - case GL_UNSIGNED_SHORT_1_5_5_5_REV: - return _dri_texformat_argb1555; - default: - return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_argb4444; - } - - case 3: - case GL_RGB: - case GL_COMPRESSED_RGB: - switch ( type ) { - case GL_UNSIGNED_SHORT_4_4_4_4: - case GL_UNSIGNED_SHORT_4_4_4_4_REV: - return _dri_texformat_argb4444; - case GL_UNSIGNED_SHORT_5_5_5_1: - case GL_UNSIGNED_SHORT_1_5_5_5_REV: - return _dri_texformat_argb1555; - case GL_UNSIGNED_SHORT_5_6_5: - case GL_UNSIGNED_SHORT_5_6_5_REV: - return _dri_texformat_rgb565; - default: - return do32bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; - } - - case GL_RGBA8: - case GL_RGB10_A2: - case GL_RGBA12: - case GL_RGBA16: - return !force16bpt ? - _dri_texformat_argb8888 : _dri_texformat_argb4444; - - case GL_RGBA4: - case GL_RGBA2: - return _dri_texformat_argb4444; - - case GL_RGB5_A1: - return _dri_texformat_argb1555; - - case GL_RGB8: - case GL_RGB10: - case GL_RGB12: - case GL_RGB16: - return !force16bpt ? _dri_texformat_argb8888 : _dri_texformat_rgb565; - - case GL_RGB5: - case GL_RGB4: - case GL_R3_G3_B2: - return _dri_texformat_rgb565; - - case GL_ALPHA: - case GL_ALPHA4: - case GL_ALPHA8: - case GL_ALPHA12: - case GL_ALPHA16: - case GL_COMPRESSED_ALPHA: - return _dri_texformat_a8; - - case 1: - case GL_LUMINANCE: - case GL_LUMINANCE4: - case GL_LUMINANCE8: - case GL_LUMINANCE12: - case GL_LUMINANCE16: - case GL_COMPRESSED_LUMINANCE: - return _dri_texformat_l8; - - case 2: - case GL_LUMINANCE_ALPHA: - case GL_LUMINANCE4_ALPHA4: - case GL_LUMINANCE6_ALPHA2: - case GL_LUMINANCE8_ALPHA8: - case GL_LUMINANCE12_ALPHA4: - case GL_LUMINANCE12_ALPHA12: - case GL_LUMINANCE16_ALPHA16: - case GL_COMPRESSED_LUMINANCE_ALPHA: - return _dri_texformat_al88; - - case GL_INTENSITY: - case GL_INTENSITY4: - case GL_INTENSITY8: - case GL_INTENSITY12: - case GL_INTENSITY16: - case GL_COMPRESSED_INTENSITY: - return _dri_texformat_i8; - - case GL_YCBCR_MESA: - if (type == GL_UNSIGNED_SHORT_8_8_APPLE || - type == GL_UNSIGNED_BYTE) - return &_mesa_texformat_ycbcr; - else - return &_mesa_texformat_ycbcr_rev; - - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return &_mesa_texformat_rgb_dxt1; - - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - return &_mesa_texformat_rgba_dxt1; - - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - return &_mesa_texformat_rgba_dxt3; - - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - return &_mesa_texformat_rgba_dxt5; - - default: - _mesa_problem(ctx, "unexpected texture format in %s", __FUNCTION__); - return NULL; - } - - return NULL; /* never get here */ -} - - -static void radeonTexImage1D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - - if ( t ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) radeonAllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage1D"); - return; - } - } - - /* Note, this will call ChooseTextureFormat */ - _mesa_store_teximage1d(ctx, target, level, internalFormat, - width, border, format, type, pixels, - &ctx->Unpack, texObj, texImage); - - t->dirty_images[0] |= (1 << level); -} - - -static void radeonTexSubImage1D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, - GLsizei width, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - - assert( t ); /* this _should_ be true */ - if ( t ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) radeonAllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage1D"); - return; - } - } - - _mesa_store_texsubimage1d(ctx, target, level, xoffset, width, - format, type, pixels, packing, texObj, - texImage); - - t->dirty_images[0] |= (1 << level); -} - - -static void radeonTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLenum format, GLenum type, const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - GLuint face; - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - if ( t != NULL ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) radeonAllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage2D"); - return; - } - } - - /* Note, this will call ChooseTextureFormat */ - _mesa_store_teximage2d(ctx, target, level, internalFormat, - width, height, border, format, type, pixels, - &ctx->Unpack, texObj, texImage); - - t->dirty_images[face] |= (1 << level); -} - - -static void radeonTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, GLenum type, - const GLvoid *pixels, - const struct gl_pixelstore_attrib *packing, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - GLuint face; - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - assert( t ); /* this _should_ be true */ - if ( t ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) radeonAllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage2D"); - return; - } - } - - _mesa_store_texsubimage2d(ctx, target, level, xoffset, yoffset, width, - height, format, type, pixels, packing, texObj, - texImage); - - t->dirty_images[face] |= (1 << level); -} - -static void radeonCompressedTexImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint border, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - GLuint face; - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - if ( t != NULL ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) radeonAllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexImage2D"); - return; - } - } - - /* Note, this will call ChooseTextureFormat */ - _mesa_store_compressed_teximage2d(ctx, target, level, internalFormat, width, - height, border, imageSize, data, texObj, texImage); - - t->dirty_images[face] |= (1 << level); -} - - -static void radeonCompressedTexSubImage2D( GLcontext *ctx, GLenum target, GLint level, - GLint xoffset, GLint yoffset, - GLsizei width, GLsizei height, - GLenum format, - GLsizei imageSize, const GLvoid *data, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage ) -{ - driTextureObject * t = (driTextureObject *) texObj->DriverData; - GLuint face; - - - /* which cube face or ordinary 2D image */ - switch (target) { - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: - face = (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; - ASSERT(face < 6); - break; - default: - face = 0; - } - - assert( t ); /* this _should_ be true */ - if ( t ) { - driSwapOutTextureObject( t ); - } - else { - t = (driTextureObject *) radeonAllocTexObj( texObj ); - if (!t) { - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage2D"); - return; - } - } - - _mesa_store_compressed_texsubimage2d(ctx, target, level, xoffset, yoffset, width, - height, format, imageSize, data, texObj, texImage); - - t->dirty_images[face] |= (1 << level); -} - #define SCALED_FLOAT_TO_BYTE( x, scale ) \ (((GLuint)((255.0F / scale) * (x))) / 2) static void radeonTexEnv( GLcontext *ctx, GLenum target, GLenum pname, const GLfloat *param ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint unit = ctx->Texture.CurrentUnit; struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; @@ -706,7 +290,7 @@ static void radeonTexEnv( GLcontext *ctx, GLenum target, * functions, one mapping [-1.0,0.0] to [-128,0] and one mapping * [0.0,4.0] to [0,127]. */ - min = driQueryOptionb (&rmesa->optionCache, "no_neg_lod_bias") ? + min = driQueryOptionb (&rmesa->radeon.optionCache, "no_neg_lod_bias") ? 0.0 : -1.0; bias = CLAMP( *param, min, 4.0 ); if ( bias == 0 ) { @@ -739,7 +323,7 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, struct gl_texture_object *texObj, GLenum pname, const GLfloat *params ) { - radeonTexObjPtr t = (radeonTexObjPtr) texObj->DriverData; + radeonTexObj* t = radeon_tex_obj(texObj); if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { fprintf( stderr, "%s( %s )\n", __FUNCTION__, @@ -767,57 +351,51 @@ static void radeonTexParameter( GLcontext *ctx, GLenum target, case GL_TEXTURE_MAX_LEVEL: case GL_TEXTURE_MIN_LOD: case GL_TEXTURE_MAX_LOD: + /* This isn't the most efficient solution but there doesn't appear to * be a nice alternative. Since there's no LOD clamping, * we just have to rely on loading the right subset of mipmap levels * to simulate a clamped LOD. */ - driSwapOutTextureObject( (driTextureObject *) t ); + if (t->mt) { + radeon_miptree_unreference(t->mt); + t->mt = 0; + t->validated = GL_FALSE; + } break; default: return; } - - /* Mark this texobj as dirty (one bit per tex unit) - */ - t->dirty_state = TEX_ALL; -} - - -static void radeonBindTexture( GLcontext *ctx, GLenum target, - struct gl_texture_object *texObj ) -{ - if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { - fprintf( stderr, "%s( %p ) unit=%d\n", __FUNCTION__, (void *)texObj, - ctx->Texture.CurrentUnit ); - } - - assert( (target != GL_TEXTURE_1D && target != GL_TEXTURE_2D && - target != GL_TEXTURE_RECTANGLE_NV && target != GL_TEXTURE_CUBE_MAP) || - (texObj->DriverData != NULL) ); } - static void radeonDeleteTexture( GLcontext *ctx, struct gl_texture_object *texObj ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - driTextureObject * t = (driTextureObject *) texObj->DriverData; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + radeonTexObj* t = radeon_tex_obj(texObj); + int i; if ( RADEON_DEBUG & (DEBUG_STATE|DEBUG_TEXTURE) ) { fprintf( stderr, "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, _mesa_lookup_enum_by_nr( texObj->Target ) ); } - if ( t != NULL ) { - if ( rmesa ) { - RADEON_FIREVERTICES( rmesa ); - } - - driDestroyTextureObject( t ); + if ( rmesa ) { + radeon_firevertices(&rmesa->radeon); + for ( i = 0 ; i < rmesa->radeon.glCtx->Const.MaxTextureUnits ; i++ ) { + if ( t == rmesa->state.texture.unit[i].texobj ) { + rmesa->state.texture.unit[i].texobj = NULL; + rmesa->hw.tex[i].dirty = GL_FALSE; + rmesa->hw.cube[i].dirty = GL_FALSE; + } + } } + if (t->mt) { + radeon_miptree_unreference(t->mt); + t->mt = 0; + } /* Free mipmap images and the texture object itself */ _mesa_delete_texture_object(ctx, texObj); } @@ -837,7 +415,7 @@ static void radeonTexGen( GLcontext *ctx, GLenum pname, const GLfloat *params ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLuint unit = ctx->Texture.CurrentUnit; rmesa->recheck_texgen[unit] = GL_TRUE; } @@ -851,29 +429,40 @@ static void radeonTexGen( GLcontext *ctx, static struct gl_texture_object * radeonNewTextureObject( GLcontext *ctx, GLuint name, GLenum target ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - struct gl_texture_object *obj; - obj = _mesa_new_texture_object(ctx, name, target); - if (!obj) - return NULL; - obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; - radeonAllocTexObj( obj ); - return obj; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); + + _mesa_initialize_texture_object(&t->base, name, target); + t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy; + + t->border_fallback = GL_FALSE; + + t->pp_txfilter = RADEON_BORDER_MODE_OGL; + t->pp_txformat = (RADEON_TXFORMAT_ENDIAN_NO_SWAP | + RADEON_TXFORMAT_PERSPECTIVE_ENABLE); + + radeonSetTexWrap( t, t->base.WrapS, t->base.WrapT ); + radeonSetTexMaxAnisotropy( t, t->base.MaxAnisotropy ); + radeonSetTexFilter( t, t->base.MinFilter, t->base.MagFilter ); + radeonSetTexBorderColor( t, t->base.BorderColor ); + return &t->base; } + void radeonInitTextureFuncs( struct dd_function_table *functions ) { - functions->ChooseTextureFormat = radeonChooseTextureFormat; + functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa; functions->TexImage1D = radeonTexImage1D; functions->TexImage2D = radeonTexImage2D; functions->TexSubImage1D = radeonTexSubImage1D; functions->TexSubImage2D = radeonTexSubImage2D; + functions->GetTexImage = radeonGetTexImage; + functions->GetCompressedTexImage = radeonGetCompressedTexImage; functions->NewTextureObject = radeonNewTextureObject; - functions->BindTexture = radeonBindTexture; + // functions->BindTexture = radeonBindTexture; functions->DeleteTexture = radeonDeleteTexture; - functions->IsTextureResident = driIsTextureResident; functions->TexEnv = radeonTexEnv; functions->TexParameter = radeonTexParameter; @@ -882,5 +471,12 @@ void radeonInitTextureFuncs( struct dd_function_table *functions ) functions->CompressedTexImage2D = radeonCompressedTexImage2D; functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D; + functions->GenerateMipmap = radeonGenerateMipmap; + + functions->NewTextureImage = radeonNewTextureImage; + functions->FreeTexImageData = radeonFreeTexImageData; + functions->MapTexture = radeonMapTexture; + functions->UnmapTexture = radeonUnmapTexture; + driInitTextureFormats(); } diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.h b/src/mesa/drivers/dri/radeon/radeon_tex.h index 8000880828..a4aaddc74f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.h +++ b/src/mesa/drivers/dri/radeon/radeon_tex.h @@ -41,12 +41,16 @@ extern void radeonSetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch); +extern void radeonSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv); +extern void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, + __DRIdrawable *dPriv); + extern void radeonUpdateTextureState( GLcontext *ctx ); -extern int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, +extern int radeonUploadTexImages( r100ContextPtr rmesa, radeonTexObjPtr t, GLuint face ); -extern void radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t ); +extern void radeonDestroyTexObj( r100ContextPtr rmesa, radeonTexObjPtr t ); extern void radeonInitTextureFuncs( struct dd_function_table *functions ); diff --git a/src/mesa/drivers/dri/radeon/radeon_texmem.c b/src/mesa/drivers/dri/radeon/radeon_texmem.c deleted file mode 100644 index 5f7bbe6a4c..0000000000 --- a/src/mesa/drivers/dri/radeon/radeon_texmem.c +++ /dev/null @@ -1,404 +0,0 @@ -/************************************************************************** - -Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and - VA Linux Systems Inc., Fremont, California. - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation on the rights to use, copy, modify, merge, publish, -distribute, sub license, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial -portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR THEIR -SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR -IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Kevin E. Martin <martin@valinux.com> - * Gareth Hughes <gareth@valinux.com> - * - */ -#include <errno.h> - -#include "main/glheader.h" -#include "main/imports.h" -#include "main/context.h" -#include "main/macros.h" - -#include "radeon_context.h" -#include "radeon_ioctl.h" -#include "radeon_tex.h" - -#include <unistd.h> /* for usleep() */ - - -/** - * Destroy any device-dependent state associated with the texture. This may - * include NULLing out hardware state that points to the texture. - */ -void -radeonDestroyTexObj( radeonContextPtr rmesa, radeonTexObjPtr t ) -{ - if ( RADEON_DEBUG & DEBUG_TEXTURE ) { - fprintf( stderr, "%s( %p, %p )\n", __FUNCTION__, (void *)t, (void *)t->base.tObj ); - } - - if ( rmesa != NULL ) { - unsigned i; - - - for ( i = 0 ; i < rmesa->glCtx->Const.MaxTextureUnits ; i++ ) { - if ( t == rmesa->state.texture.unit[i].texobj ) { - rmesa->state.texture.unit[i].texobj = NULL; - } - } - } -} - - -/* ------------------------------------------------------------ - * Texture image conversions - */ - - -static void radeonUploadRectSubImage( radeonContextPtr rmesa, - radeonTexObjPtr t, - struct gl_texture_image *texImage, - GLint x, GLint y, - GLint width, GLint height ) -{ - const struct gl_texture_format *texFormat = texImage->TexFormat; - int blit_format, dstPitch, done; - - switch ( texFormat->TexelBytes ) { - case 1: - blit_format = RADEON_GMC_DST_8BPP_CI; - break; - case 2: - blit_format = RADEON_GMC_DST_16BPP; - break; - case 4: - blit_format = RADEON_GMC_DST_32BPP; - break; - default: - fprintf( stderr, "radeonUploadRectSubImage: unknown blit_format (texelbytes=%d)\n", - texFormat->TexelBytes); - return; - } - - t->image[0][0].data = texImage->Data; - - /* Currently don't need to cope with small pitches. - */ - width = texImage->Width; - height = texImage->Height; - dstPitch = t->pp_txpitch + 32; - - { /* FIXME: prefer GART-texturing if possible */ - /* Data not in GART memory, or bad pitch. - */ - for (done = 0; done < height ; ) { - struct radeon_dma_region region; - int lines = MIN2( height - done, RADEON_BUFFER_SIZE / dstPitch ); - int src_pitch; - char *tex; - - src_pitch = texImage->RowStride * texFormat->TexelBytes; - - tex = (char *)texImage->Data + done * src_pitch; - - memset(®ion, 0, sizeof(region)); - radeonAllocDmaRegion( rmesa, ®ion, lines * dstPitch, 1024 ); - - /* Copy texdata to dma: - */ - if (0) - fprintf(stderr, "%s: src_pitch %d dst_pitch %d\n", - __FUNCTION__, src_pitch, dstPitch); - - if (src_pitch == dstPitch) { - memcpy( region.address + region.start, tex, lines * src_pitch ); - } - else { - char *buf = region.address + region.start; - int i; - for (i = 0 ; i < lines ; i++) { - memcpy( buf, tex, src_pitch ); - buf += dstPitch; - tex += src_pitch; - } - } - - radeonEmitWait( rmesa, RADEON_WAIT_3D ); - - - - /* Blit to framebuffer - */ - radeonEmitBlit( rmesa, - blit_format, - dstPitch, GET_START( ®ion ), - dstPitch, t->bufAddr, - 0, 0, - 0, done, - width, lines ); - - radeonEmitWait( rmesa, RADEON_WAIT_2D ); - - radeonReleaseDmaRegion( rmesa, ®ion, __FUNCTION__ ); - done += lines; - } - } -} - - -/** - * Upload the texture image associated with texture \a t at the specified - * level at the address relative to \a start. - */ -static void uploadSubImage( radeonContextPtr rmesa, radeonTexObjPtr t, - GLint hwlevel, - GLint x, GLint y, GLint width, GLint height, - GLuint face ) -{ - struct gl_texture_image *texImage = NULL; - GLuint offset; - GLint imageWidth, imageHeight; - GLint ret; - drm_radeon_texture_t tex; - drm_radeon_tex_image_t tmp; - const int level = hwlevel + t->base.firstLevel; - - if ( RADEON_DEBUG & DEBUG_TEXTURE ) { - fprintf( stderr, "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", - __FUNCTION__, (void *)t, (void *)t->base.tObj, level, width, height, face ); - } - - ASSERT(face < 6); - - /* Ensure we have a valid texture to upload */ - if ( ( hwlevel < 0 ) || ( hwlevel >= RADEON_MAX_TEXTURE_LEVELS ) ) { - _mesa_problem(NULL, "bad texture level in %s", __FUNCTION__); - return; - } - - texImage = t->base.tObj->Image[face][level]; - - if ( !texImage ) { - if ( RADEON_DEBUG & DEBUG_TEXTURE ) - fprintf( stderr, "%s: texImage %d is NULL!\n", __FUNCTION__, level ); - return; - } - if ( !texImage->Data ) { - if ( RADEON_DEBUG & DEBUG_TEXTURE ) - fprintf( stderr, "%s: image data is NULL!\n", __FUNCTION__ ); - return; - } - - - if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - assert(level == 0); - assert(hwlevel == 0); - if ( RADEON_DEBUG & DEBUG_TEXTURE ) - fprintf( stderr, "%s: image data is rectangular\n", __FUNCTION__); - radeonUploadRectSubImage( rmesa, t, texImage, x, y, width, height ); - return; - } - - imageWidth = texImage->Width; - imageHeight = texImage->Height; - - offset = t->bufAddr + t->base.totalSize * face / 6; - - if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { - GLint imageX = 0; - GLint imageY = 0; - GLint blitX = t->image[face][hwlevel].x; - GLint blitY = t->image[face][hwlevel].y; - GLint blitWidth = t->image[face][hwlevel].width; - GLint blitHeight = t->image[face][hwlevel].height; - fprintf( stderr, " upload image: %d,%d at %d,%d\n", - imageWidth, imageHeight, imageX, imageY ); - fprintf( stderr, " upload blit: %d,%d at %d,%d\n", - blitWidth, blitHeight, blitX, blitY ); - fprintf( stderr, " blit ofs: 0x%07x level: %d/%d\n", - (GLuint)offset, hwlevel, level ); - } - - t->image[face][hwlevel].data = texImage->Data; - - /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct. - * NOTE: we're always use a 1KB-wide blit and I8 texture format. - * We used to use 1, 2 and 4-byte texels and used to use the texture - * width to dictate the blit width - but that won't work for compressed - * textures. (Brian) - * NOTE: can't do that with texture tiling. (sroland) - */ - tex.offset = offset; - tex.image = &tmp; - /* copy (x,y,width,height,data) */ - memcpy( &tmp, &t->image[face][hwlevel], sizeof(drm_radeon_tex_image_t) ); - - if (texImage->TexFormat->TexelBytes) { - /* use multi-byte upload scheme */ - tex.height = imageHeight; - tex.width = imageWidth; - tex.format = t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK; - tex.pitch = MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / 64, 1); - tex.offset += tmp.x & ~1023; - tmp.x = tmp.x % 1024; - if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) { - /* need something like "tiled coordinates" ? */ - tmp.y = tmp.x / (tex.pitch * 128) * 2; - tmp.x = tmp.x % (tex.pitch * 128) / 2 / texImage->TexFormat->TexelBytes; - tex.pitch |= RADEON_DST_TILE_MICRO >> 22; - } - else { - tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); - } - if ((t->tile_bits & RADEON_TXO_MACRO_TILE) && - (texImage->Width * texImage->TexFormat->TexelBytes >= 256)) { - /* radeon switches off macro tiling for small textures/mipmaps it seems */ - tex.pitch |= RADEON_DST_TILE_MACRO >> 22; - } - } - else { - /* In case of for instance 8x8 texture (2x2 dxt blocks), padding after the first two blocks is - needed (only with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ - /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) has 4 real pixels. Needed - so the kernel module reads the right amount of data. */ - tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ - tex.pitch = (BLIT_WIDTH_BYTES / 64); - tex.height = (imageHeight + 3) / 4; - tex.width = (imageWidth + 3) / 4; - switch (t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) { - case RADEON_TXFORMAT_DXT1: - tex.width *= 8; - break; - case RADEON_TXFORMAT_DXT23: - case RADEON_TXFORMAT_DXT45: - tex.width *= 16; - break; - } - } - - LOCK_HARDWARE( rmesa ); - do { - ret = drmCommandWriteRead( rmesa->dri.fd, DRM_RADEON_TEXTURE, - &tex, sizeof(drm_radeon_texture_t) ); - } while ( ret == -EAGAIN ); - - UNLOCK_HARDWARE( rmesa ); - - if ( ret ) { - fprintf( stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret ); - fprintf( stderr, " offset=0x%08x\n", - offset ); - fprintf( stderr, " image width=%d height=%d\n", - imageWidth, imageHeight ); - fprintf( stderr, " blit width=%d height=%d data=%p\n", - t->image[face][hwlevel].width, t->image[face][hwlevel].height, - t->image[face][hwlevel].data ); - exit( 1 ); - } -} - - -/** - * Upload the texture images associated with texture \a t. This might - * require the allocation of texture memory. - * - * \param rmesa Context pointer - * \param t Texture to be uploaded - * \param face Cube map face to be uploaded. Zero for non-cube maps. - */ - -int radeonUploadTexImages( radeonContextPtr rmesa, radeonTexObjPtr t, GLuint face ) -{ - int numLevels; - - if ( !t || t->base.totalSize == 0 || t->image_override ) - return 0; - - if ( RADEON_DEBUG & (DEBUG_TEXTURE|DEBUG_IOCTL) ) { - fprintf( stderr, "%s( %p, %p ) sz=%d lvls=%d-%d\n", __FUNCTION__, - (void *)rmesa->glCtx, (void *)t->base.tObj, t->base.totalSize, - t->base.firstLevel, t->base.lastLevel ); - } - - numLevels = t->base.lastLevel - t->base.firstLevel + 1; - - if (RADEON_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "%s: Syncing\n", __FUNCTION__ ); - radeonFinish( rmesa->glCtx ); - } - - LOCK_HARDWARE( rmesa ); - - if ( t->base.memBlock == NULL ) { - int heap; - - heap = driAllocateTexture( rmesa->texture_heaps, rmesa->nr_heaps, - (driTextureObject *) t ); - if ( heap == -1 ) { - UNLOCK_HARDWARE( rmesa ); - return -1; - } - - /* Set the base offset of the texture image */ - t->bufAddr = rmesa->radeonScreen->texOffset[heap] - + t->base.memBlock->ofs; - t->pp_txoffset = t->bufAddr; - - if (!(t->base.tObj->Image[0][0]->IsClientData)) { - /* hope it's safe to add that here... */ - t->pp_txoffset |= t->tile_bits; - } - - /* Mark this texobj as dirty on all units: - */ - t->dirty_state = TEX_ALL; - } - - - /* Let the world know we've used this memory recently. - */ - driUpdateTextureLRU( (driTextureObject *) t ); - UNLOCK_HARDWARE( rmesa ); - - - /* Upload any images that are new */ - if (t->base.dirty_images[face]) { - int i; - for ( i = 0 ; i < numLevels ; i++ ) { - if ( (t->base.dirty_images[face] & (1 << (i+t->base.firstLevel))) != 0 ) { - uploadSubImage( rmesa, t, i, 0, 0, t->image[face][i].width, - t->image[face][i].height, face ); - } - } - t->base.dirty_images[face] = 0; - } - - if (RADEON_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "%s: Syncing\n", __FUNCTION__ ); - radeonFinish( rmesa->glCtx ); - } - - return 0; -} diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index b165205c09..279bcd4df6 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -39,10 +39,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/macros.h" #include "main/texformat.h" +#include "main/teximage.h" #include "main/texobj.h" #include "main/enums.h" #include "radeon_context.h" +#include "radeon_mipmap_tree.h" #include "radeon_state.h" #include "radeon_ioctl.h" #include "radeon_swtcl.h" @@ -75,10 +77,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5) \ && (tx_table[f].format != 0xffffffff) ) -static const struct { +struct tx_table { GLuint format, filter; -} -tx_table[] = +}; + +static const struct tx_table tx_table[] = { _ALPHA(RGBA8888), _ALPHA_REV(RGBA8888), @@ -111,252 +114,6 @@ tx_table[] = #undef _ALPHA #undef _INVALID -/** - * This function computes the number of bytes of storage needed for - * the given texture object (all mipmap levels, all cube faces). - * The \c image[face][level].x/y/width/height parameters for upload/blitting - * are computed here. \c pp_txfilter, \c pp_txformat, etc. will be set here - * too. - * - * \param rmesa Context pointer - * \param tObj GL texture object whose images are to be posted to - * hardware state. - */ -static void radeonSetTexImages( radeonContextPtr rmesa, - struct gl_texture_object *tObj ) -{ - radeonTexObjPtr t = (radeonTexObjPtr)tObj->DriverData; - const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - GLint curOffset, blitWidth; - GLint i, texelBytes; - GLint numLevels; - GLint log2Width, log2Height, log2Depth; - - /* Set the hardware texture format - */ - if ( !t->image_override ) { - t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK | - RADEON_TXFORMAT_ALPHA_IN_MAP); - t->pp_txfilter &= ~RADEON_YUV_TO_RGB; - - if ( VALID_FORMAT( baseImage->TexFormat->MesaFormat ) ) { - t->pp_txformat |= tx_table[ baseImage->TexFormat->MesaFormat ].format; - t->pp_txfilter |= tx_table[ baseImage->TexFormat->MesaFormat ].filter; - } - else { - _mesa_problem(NULL, "unexpected texture format in %s", __FUNCTION__); - return; - } - } - - texelBytes = baseImage->TexFormat->TexelBytes; - - /* Compute which mipmap levels we really want to send to the hardware. - */ - - if (tObj->Target != GL_TEXTURE_CUBE_MAP) - driCalculateTextureFirstLastLevel( (driTextureObject *) t ); - else { - /* r100 can't handle mipmaps for cube/3d textures, so don't waste - memory for them */ - t->base.firstLevel = t->base.lastLevel = tObj->BaseLevel; - } - log2Width = tObj->Image[0][t->base.firstLevel]->WidthLog2; - log2Height = tObj->Image[0][t->base.firstLevel]->HeightLog2; - log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; - - numLevels = t->base.lastLevel - t->base.firstLevel + 1; - - assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); - - /* Calculate mipmap offsets and dimensions for blitting (uploading) - * The idea is that we lay out the mipmap levels within a block of - * memory organized as a rectangle of width BLIT_WIDTH_BYTES. - */ - curOffset = 0; - blitWidth = BLIT_WIDTH_BYTES; - t->tile_bits = 0; - - /* figure out if this texture is suitable for tiling. */ - if (texelBytes && (tObj->Target != GL_TEXTURE_RECTANGLE_NV)) { - if (rmesa->texmicrotile && (baseImage->Height > 1)) { - /* allow 32 (bytes) x 1 mip (which will use two times the space - the non-tiled version would use) max if base texture is large enough */ - if ((numLevels == 1) || - (((baseImage->Width * texelBytes / baseImage->Height) <= 32) && - (baseImage->Width * texelBytes > 64)) || - ((baseImage->Width * texelBytes / baseImage->Height) <= 16)) { - /* R100 has two microtile bits (only the txoffset reg, not the blitter) - weird: X2 + OPT: 32bit correct, 16bit completely hosed - X2: 32bit correct, 16bit correct - OPT: 32bit large mips correct, small mips hosed, 16bit completely hosed */ - t->tile_bits |= RADEON_TXO_MICRO_TILE_X2 /*| RADEON_TXO_MICRO_TILE_OPT*/; - } - } - if ((baseImage->Width * texelBytes >= 256) && (baseImage->Height >= 16)) { - /* R100 disables macro tiling only if mip width is smaller than 256 bytes, and not - in the case if height is smaller than 16 (not 100% sure), as does the r200, - so need to disable macro tiling in that case */ - if ((numLevels == 1) || ((baseImage->Width * texelBytes / baseImage->Height) <= 4)) { - t->tile_bits |= RADEON_TXO_MACRO_TILE; - } - } - } - - for (i = 0; i < numLevels; i++) { - const struct gl_texture_image *texImage; - GLuint size; - - texImage = tObj->Image[0][i + t->base.firstLevel]; - if ( !texImage ) - break; - - /* find image size in bytes */ - if (texImage->IsCompressed) { - /* need to calculate the size AFTER padding even though the texture is - submitted without padding. - Only handle pot textures currently - don't know if npot is even possible, - size calculation would certainly need (trivial) adjustments. - Align (and later pad) to 32byte, not sure what that 64byte blit width is - good for? */ - if ((t->pp_txformat & RADEON_TXFORMAT_FORMAT_MASK) == RADEON_TXFORMAT_DXT1) { - /* RGB_DXT1/RGBA_DXT1, 8 bytes per block */ - if ((texImage->Width + 3) < 8) /* width one block */ - size = texImage->CompressedSize * 4; - else if ((texImage->Width + 3) < 16) - size = texImage->CompressedSize * 2; - else size = texImage->CompressedSize; - } - else /* DXT3/5, 16 bytes per block */ - if ((texImage->Width + 3) < 8) - size = texImage->CompressedSize * 2; - else size = texImage->CompressedSize; - } - else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - size = ((texImage->Width * texelBytes + 63) & ~63) * texImage->Height; - } - else if (t->tile_bits & RADEON_TXO_MICRO_TILE_X2) { - /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, - though the actual offset may be different (if texture is less than - 32 bytes width) to the untiled case */ - int w = (texImage->Width * texelBytes * 2 + 31) & ~31; - size = (w * ((texImage->Height + 1) / 2)) * texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } - else { - int w = (texImage->Width * texelBytes + 31) & ~31; - size = w * texImage->Height * texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } - assert(size > 0); - - /* Align to 32-byte offset. It is faster to do this unconditionally - * (no branch penalty). - */ - - curOffset = (curOffset + 0x1f) & ~0x1f; - - if (texelBytes) { - t->image[0][i].x = curOffset; /* fix x and y coords up later together with offset */ - t->image[0][i].y = 0; - t->image[0][i].width = MIN2(size / texelBytes, blitWidth); - t->image[0][i].height = (size / texelBytes) / t->image[0][i].width; - } - else { - t->image[0][i].x = curOffset % BLIT_WIDTH_BYTES; - t->image[0][i].y = curOffset / BLIT_WIDTH_BYTES; - t->image[0][i].width = MIN2(size, BLIT_WIDTH_BYTES); - t->image[0][i].height = size / t->image[0][i].width; - } - -#if 0 - /* for debugging only and only applicable to non-rectangle targets */ - assert(size % t->image[0][i].width == 0); - assert(t->image[0][i].x == 0 - || (size < BLIT_WIDTH_BYTES && t->image[0][i].height == 1)); -#endif - - if (0) - fprintf(stderr, - "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", - i, texImage->Width, texImage->Height, - t->image[0][i].x, t->image[0][i].y, - t->image[0][i].width, t->image[0][i].height, size, curOffset); - - curOffset += size; - - } - - /* Align the total size of texture memory block. - */ - t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; - - /* Setup remaining cube face blits, if needed */ - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - const GLuint faceSize = t->base.totalSize; - GLuint face; - /* reuse face 0 x/y/width/height - just update the offset when uploading */ - for (face = 1; face < 6; face++) { - for (i = 0; i < numLevels; i++) { - t->image[face][i].x = t->image[0][i].x; - t->image[face][i].y = t->image[0][i].y; - t->image[face][i].width = t->image[0][i].width; - t->image[face][i].height = t->image[0][i].height; - } - } - t->base.totalSize = 6 * faceSize; /* total texmem needed */ - } - - /* Hardware state: - */ - t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK; - t->pp_txfilter |= (numLevels - 1) << RADEON_MAX_MIP_LEVEL_SHIFT; - - t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK | - RADEON_TXFORMAT_HEIGHT_MASK | - RADEON_TXFORMAT_CUBIC_MAP_ENABLE | - RADEON_TXFORMAT_F5_WIDTH_MASK | - RADEON_TXFORMAT_F5_HEIGHT_MASK); - t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) | - (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT)); - - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - assert(log2Width == log2Height); - t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) | - (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) | - (RADEON_TXFORMAT_CUBIC_MAP_ENABLE)); - t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) | - (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) | - (log2Width << RADEON_FACE_WIDTH_2_SHIFT) | - (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) | - (log2Width << RADEON_FACE_WIDTH_3_SHIFT) | - (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) | - (log2Width << RADEON_FACE_WIDTH_4_SHIFT) | - (log2Height << RADEON_FACE_HEIGHT_4_SHIFT)); - } - - t->pp_txsize = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << 0) | - ((tObj->Image[0][t->base.firstLevel]->Height - 1) << 16)); - - /* Only need to round to nearest 32 for textures, but the blitter - * requires 64-byte aligned pitches, and we may/may not need the - * blitter. NPOT only! - */ - if ( !t->image_override ) { - if (baseImage->IsCompressed) - t->pp_txpitch = (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); - else - t->pp_txpitch = ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); - t->pp_txpitch -= 32; - } - - t->dirty_state = TEX_ALL; - - /* FYI: radeonUploadTexImages( rmesa, t ); used to be called here */ -} - - - /* ================================================================ * Texture combine functions */ @@ -503,7 +260,7 @@ do { \ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; GLuint color_combine, alpha_combine; const GLuint color_combine0 = RADEON_COLOR_ARG_A_ZERO | RADEON_COLOR_ARG_B_ZERO @@ -846,22 +603,21 @@ static GLboolean radeonUpdateTextureEnv( GLcontext *ctx, int unit ) void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch) { - radeonContextPtr rmesa = pDRICtx->driverPrivate; + r100ContextPtr rmesa = pDRICtx->driverPrivate; struct gl_texture_object *tObj = - _mesa_lookup_texture(rmesa->glCtx, texname); - radeonTexObjPtr t; + _mesa_lookup_texture(rmesa->radeon.glCtx, texname); + radeonTexObjPtr t = radeon_tex_obj(tObj); if (tObj == NULL) return; - t = (radeonTexObjPtr) tObj->DriverData; - t->image_override = GL_TRUE; if (!offset) return; - - t->pp_txoffset = offset; + + t->bo = NULL; + t->override_offset = offset; t->pp_txpitch = pitch - 32; switch (depth) { @@ -881,6 +637,125 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname, } } +void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, + __DRIdrawable *dPriv) +{ + struct gl_texture_unit *texUnit; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + struct radeon_renderbuffer *rb; + radeon_texture_image *rImage; + radeonContextPtr radeon; + r100ContextPtr rmesa; + struct radeon_framebuffer *rfb; + radeonTexObjPtr t; + uint32_t pitch_val; + uint32_t internalFormat, type, format; + + type = GL_BGRA; + format = GL_UNSIGNED_BYTE; + internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4); + + radeon = pDRICtx->driverPrivate; + rmesa = pDRICtx->driverPrivate; + + rfb = dPriv->driverPrivate; + texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit]; + texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target); + texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0); + + rImage = get_radeon_texture_image(texImage); + t = radeon_tex_obj(texObj); + if (t == NULL) { + return; + } + + radeon_update_renderbuffers(pDRICtx, dPriv); + /* back & depth buffer are useless free them right away */ + rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; + if (rb && rb->bo) { + radeon_bo_unref(rb->bo); + rb->bo = NULL; + } + rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; + if (rb && rb->bo) { + radeon_bo_unref(rb->bo); + rb->bo = NULL; + } + rb = rfb->color_rb[0]; + if (rb->bo == NULL) { + /* Failed to BO for the buffer */ + return; + } + + _mesa_lock_texture(radeon->glCtx, texObj); + if (t->bo) { + radeon_bo_unref(t->bo); + t->bo = NULL; + } + if (rImage->bo) { + radeon_bo_unref(rImage->bo); + rImage->bo = NULL; + } + if (t->mt) { + radeon_miptree_unreference(t->mt); + t->mt = NULL; + } + if (rImage->mt) { + radeon_miptree_unreference(rImage->mt); + rImage->mt = NULL; + } + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, + rb->width, rb->height, 1, 0, rb->cpp); + texImage->RowStride = rb->pitch / rb->cpp; + texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx, + internalFormat, + type, format, 0); + rImage->bo = rb->bo; + radeon_bo_ref(rImage->bo); + t->bo = rb->bo; + radeon_bo_ref(t->bo); + t->tile_bits = 0; + t->image_override = GL_TRUE; + t->override_offset = 0; + t->pp_txpitch &= (1 << 13) -1; + pitch_val = rb->pitch; + switch (rb->cpp) { + case 4: + if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format; + else + t->pp_txformat = tx_table[MESA_FORMAT_ARGB8888].format; + t->pp_txfilter |= tx_table[MESA_FORMAT_ARGB8888].filter; + break; + case 3: + default: + t->pp_txformat = tx_table[MESA_FORMAT_RGB888].format; + t->pp_txfilter |= tx_table[MESA_FORMAT_RGB888].filter; + break; + case 2: + t->pp_txformat = tx_table[MESA_FORMAT_RGB565].format; + t->pp_txfilter |= tx_table[MESA_FORMAT_RGB565].filter; + break; + } + t->pp_txsize = ((rb->width - 1) << RADEON_TEX_USIZE_SHIFT) + | ((rb->height - 1) << RADEON_TEX_VSIZE_SHIFT); + t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2; + t->pp_txpitch = pitch_val; + t->pp_txpitch -= 32; + + t->validated = GL_TRUE; + _mesa_unlock_texture(radeon->glCtx, texObj); + return; +} + + +void radeonSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +{ + radeonSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); +} + + #define TEXOBJ_TXFILTER_MASK (RADEON_MAX_MIP_LEVEL_MASK | \ RADEON_MIN_FILTER_MASK | \ RADEON_MAG_FILTER_MASK | \ @@ -901,12 +776,53 @@ void radeonSetTexOffset(__DRIcontext * pDRICtx, GLint texname, RADEON_TXFORMAT_NON_POWER2) -static void import_tex_obj_state( radeonContextPtr rmesa, +static void disable_tex_obj_state( r100ContextPtr rmesa, + int unit ) +{ + RADEON_STATECHANGE( rmesa, tex[unit] ); + + RADEON_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) | + RADEON_Q_BIT(unit)); + + if (rmesa->radeon.TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) { + TCL_FALLBACK( rmesa->radeon.glCtx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE); + rmesa->recheck_texgen[unit] = GL_TRUE; + } + + if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) { + /* this seems to be a genuine (r100 only?) hw bug. Need to remove the + cubic_map bit on unit 2 when the unit is disabled, otherwise every + 2nd (2d) mipmap on unit 0 will be broken (may not be needed for other + units, better be safe than sorry though).*/ + RADEON_STATECHANGE( rmesa, tex[unit] ); + rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE; + } + + { + GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4; + GLuint tmp = rmesa->TexGenEnabled; + + rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit); + rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit); + rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift); + rmesa->TexGenNeedNormals[unit] = 0; + rmesa->TexGenEnabled |= + (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift; + + if (tmp != rmesa->TexGenEnabled) { + rmesa->recheck_texgen[unit] = GL_TRUE; + rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX; + } + } +} + +static void import_tex_obj_state( r100ContextPtr rmesa, int unit, radeonTexObjPtr texobj ) { /* do not use RADEON_DB_STATE to avoid stale texture caches */ - int *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0]; + uint32_t *cmd = &rmesa->hw.tex[unit].cmd[TEX_CMD_0]; GLuint se_coord_fmt = rmesa->hw.set.cmd[SET_SE_COORDFMT]; RADEON_STATECHANGE( rmesa, tex[unit] ); @@ -915,10 +831,9 @@ static void import_tex_obj_state( radeonContextPtr rmesa, cmd[TEX_PP_TXFILTER] |= texobj->pp_txfilter & TEXOBJ_TXFILTER_MASK; cmd[TEX_PP_TXFORMAT] &= ~TEXOBJ_TXFORMAT_MASK; cmd[TEX_PP_TXFORMAT] |= texobj->pp_txformat & TEXOBJ_TXFORMAT_MASK; - cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset; cmd[TEX_PP_BORDER_COLOR] = texobj->pp_border_color; - if (texobj->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + if (texobj->base.Target == GL_TEXTURE_RECTANGLE_NV) { GLuint *txr_cmd = RADEON_DB_STATE( txr[unit] ); txr_cmd[TXR_PP_TEX_SIZE] = texobj->pp_txsize; /* NPOT only! */ txr_cmd[TXR_PP_TEX_PITCH] = texobj->pp_txpitch; /* NPOT only! */ @@ -928,22 +843,12 @@ static void import_tex_obj_state( radeonContextPtr rmesa, else { se_coord_fmt &= ~(RADEON_VTX_ST0_NONPARAMETRIC << unit); - if (texobj->base.tObj->Target == GL_TEXTURE_CUBE_MAP) { - int *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; - GLuint bytesPerFace = texobj->base.totalSize / 6; - ASSERT(texobj->base.totalSize % 6 == 0); + if (texobj->base.Target == GL_TEXTURE_CUBE_MAP) { + uint32_t *cube_cmd = &rmesa->hw.cube[unit].cmd[CUBE_CMD_0]; RADEON_STATECHANGE( rmesa, cube[unit] ); cube_cmd[CUBE_PP_CUBIC_FACES] = texobj->pp_cubic_faces; - /* dont know if this setup conforms to OpenGL.. - * at least it matches the behavior of mesa software renderer - */ - cube_cmd[CUBE_PP_CUBIC_OFFSET_0] = texobj->pp_txoffset; /* right */ - cube_cmd[CUBE_PP_CUBIC_OFFSET_1] = texobj->pp_txoffset + 1 * bytesPerFace; /* left */ - cube_cmd[CUBE_PP_CUBIC_OFFSET_2] = texobj->pp_txoffset + 2 * bytesPerFace; /* top */ - cube_cmd[CUBE_PP_CUBIC_OFFSET_3] = texobj->pp_txoffset + 3 * bytesPerFace; /* bottom */ - cube_cmd[CUBE_PP_CUBIC_OFFSET_4] = texobj->pp_txoffset + 4 * bytesPerFace; /* front */ - cmd[TEX_PP_TXOFFSET] = texobj->pp_txoffset + 5 * bytesPerFace; /* back */ + /* state filled out in the cube_emit */ } } @@ -952,13 +857,11 @@ static void import_tex_obj_state( radeonContextPtr rmesa, rmesa->hw.set.cmd[SET_SE_COORDFMT] = se_coord_fmt; } - texobj->dirty_state &= ~(1<<unit); + rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX; } - - -static void set_texgen_matrix( radeonContextPtr rmesa, +static void set_texgen_matrix( r100ContextPtr rmesa, GLuint unit, const GLfloat *s_plane, const GLfloat *t_plane, @@ -986,14 +889,14 @@ static void set_texgen_matrix( radeonContextPtr rmesa, rmesa->TexGenMatrix[unit].m[15] = q_plane[3]; rmesa->TexGenEnabled |= RADEON_TEXMAT_0_ENABLE << unit; - rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; + rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX; } /* Returns GL_FALSE if fallback required. */ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4; GLuint tmp = rmesa->TexGenEnabled; @@ -1094,283 +997,187 @@ static GLboolean radeon_validate_texgen( GLcontext *ctx, GLuint unit ) } if (tmp != rmesa->TexGenEnabled) { - rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; + rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX; } return GL_TRUE; } - -static void disable_tex( GLcontext *ctx, int unit ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - - if (rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit)) { - /* Texture unit disabled */ - if ( rmesa->state.texture.unit[unit].texobj != NULL ) { - /* The old texture is no longer bound to this texture unit. - * Mark it as such. - */ - - rmesa->state.texture.unit[unit].texobj->base.bound &= ~(1UL << unit); - rmesa->state.texture.unit[unit].texobj = NULL; - } - - RADEON_STATECHANGE( rmesa, ctx ); - rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= - ~((RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit); - - RADEON_STATECHANGE( rmesa, tcl ); - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_ST_BIT(unit) | - RADEON_Q_BIT(unit)); - - if (rmesa->TclFallback & (RADEON_TCL_FALLBACK_TEXGEN_0<<unit)) { - TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), GL_FALSE); - rmesa->recheck_texgen[unit] = GL_TRUE; - } - - if (rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) { - /* this seems to be a genuine (r100 only?) hw bug. Need to remove the - cubic_map bit on unit 2 when the unit is disabled, otherwise every - 2nd (2d) mipmap on unit 0 will be broken (may not be needed for other - units, better be safe than sorry though).*/ - RADEON_STATECHANGE( rmesa, tex[unit] ); - rmesa->hw.tex[unit].cmd[TEX_PP_TXFORMAT] &= ~RADEON_TXFORMAT_CUBIC_MAP_ENABLE; - } - - { - GLuint inputshift = RADEON_TEXGEN_0_INPUT_SHIFT + unit*4; - GLuint tmp = rmesa->TexGenEnabled; - - rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_TEXMAT_0_ENABLE<<unit); - rmesa->TexGenEnabled &= ~(RADEON_TEXMAT_0_ENABLE<<unit); - rmesa->TexGenEnabled &= ~(RADEON_TEXGEN_INPUT_MASK<<inputshift); - rmesa->TexGenNeedNormals[unit] = 0; - rmesa->TexGenEnabled |= - (RADEON_TEXGEN_INPUT_TEXCOORD_0+unit) << inputshift; - - if (tmp != rmesa->TexGenEnabled) { - rmesa->recheck_texgen[unit] = GL_TRUE; - rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; - } - } - } -} - -static GLboolean enable_tex_2d( GLcontext *ctx, int unit ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData; - - /* Need to load the 2d images associated with this unit. - */ - if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) { - t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2; - t->base.dirty_images[0] = ~0; - } - - ASSERT(tObj->Target == GL_TEXTURE_2D || tObj->Target == GL_TEXTURE_1D); - - if ( t->base.dirty_images[0] ) { - RADEON_FIREVERTICES( rmesa ); - radeonSetTexImages( rmesa, tObj ); - radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 ); - if ( !t->base.memBlock && !t->image_override ) - return GL_FALSE; - } - - return GL_TRUE; -} - -static GLboolean enable_tex_cube( GLcontext *ctx, int unit ) +/** + * Compute the cached hardware register values for the given texture object. + * + * \param rmesa Context pointer + * \param t the r300 texture object + */ +static GLboolean setup_hardware_state(r100ContextPtr rmesa, radeonTexObj *t, int unit) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData; - GLuint face; + const struct gl_texture_image *firstImage; + GLint log2Width, log2Height, log2Depth, texelBytes; - /* Need to load the 2d images associated with this unit. - */ - if (t->pp_txformat & RADEON_TXFORMAT_NON_POWER2) { - t->pp_txformat &= ~RADEON_TXFORMAT_NON_POWER2; - for (face = 0; face < 6; face++) - t->base.dirty_images[face] = ~0; + if ( t->bo ) { + return GL_TRUE; } - ASSERT(tObj->Target == GL_TEXTURE_CUBE_MAP); + firstImage = t->base.Image[0][t->mt->firstLevel]; - if ( t->base.dirty_images[0] || t->base.dirty_images[1] || - t->base.dirty_images[2] || t->base.dirty_images[3] || - t->base.dirty_images[4] || t->base.dirty_images[5] ) { - /* flush */ - RADEON_FIREVERTICES( rmesa ); - /* layout memory space, once for all faces */ - radeonSetTexImages( rmesa, tObj ); + if (firstImage->Border > 0) { + fprintf(stderr, "%s: border\n", __FUNCTION__); + return GL_FALSE; } - /* upload (per face) */ - for (face = 0; face < 6; face++) { - if (t->base.dirty_images[face]) { - radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, face ); + log2Width = firstImage->WidthLog2; + log2Height = firstImage->HeightLog2; + log2Depth = firstImage->DepthLog2; + texelBytes = firstImage->TexFormat->TexelBytes; + + if (!t->image_override) { + if (VALID_FORMAT(firstImage->TexFormat->MesaFormat)) { + const struct tx_table *table = tx_table; + + t->pp_txformat &= ~(RADEON_TXFORMAT_FORMAT_MASK | + RADEON_TXFORMAT_ALPHA_IN_MAP); + t->pp_txfilter &= ~RADEON_YUV_TO_RGB; + + t->pp_txformat |= table[ firstImage->TexFormat->MesaFormat ].format; + t->pp_txfilter |= table[ firstImage->TexFormat->MesaFormat ].filter; + } else { + _mesa_problem(NULL, "unexpected texture format in %s", + __FUNCTION__); + return GL_FALSE; } } - - if ( !t->base.memBlock ) { - /* texmem alloc failed, use s/w fallback */ - return GL_FALSE; + + t->pp_txfilter &= ~RADEON_MAX_MIP_LEVEL_MASK; + t->pp_txfilter |= (t->mt->lastLevel - t->mt->firstLevel) << RADEON_MAX_MIP_LEVEL_SHIFT; + + t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK | + RADEON_TXFORMAT_HEIGHT_MASK | + RADEON_TXFORMAT_CUBIC_MAP_ENABLE | + RADEON_TXFORMAT_F5_WIDTH_MASK | + RADEON_TXFORMAT_F5_HEIGHT_MASK); + t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_WIDTH_SHIFT) | + (log2Height << RADEON_TXFORMAT_HEIGHT_SHIFT)); + + t->tile_bits = 0; + + if (t->base.Target == GL_TEXTURE_CUBE_MAP) { + ASSERT(log2Width == log2Height); + t->pp_txformat |= ((log2Width << RADEON_TXFORMAT_F5_WIDTH_SHIFT) | + (log2Height << RADEON_TXFORMAT_F5_HEIGHT_SHIFT) | + /* don't think we need this bit, if it exists at all - fglrx does not set it */ + (RADEON_TXFORMAT_CUBIC_MAP_ENABLE)); + t->pp_cubic_faces = ((log2Width << RADEON_FACE_WIDTH_1_SHIFT) | + (log2Height << RADEON_FACE_HEIGHT_1_SHIFT) | + (log2Width << RADEON_FACE_WIDTH_2_SHIFT) | + (log2Height << RADEON_FACE_HEIGHT_2_SHIFT) | + (log2Width << RADEON_FACE_WIDTH_3_SHIFT) | + (log2Height << RADEON_FACE_HEIGHT_3_SHIFT) | + (log2Width << RADEON_FACE_WIDTH_4_SHIFT) | + (log2Height << RADEON_FACE_HEIGHT_4_SHIFT)); } - return GL_TRUE; -} - -static GLboolean enable_tex_rect( GLcontext *ctx, int unit ) -{ - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData; + t->pp_txsize = (((firstImage->Width - 1) << RADEON_TEX_USIZE_SHIFT) + | ((firstImage->Height - 1) << RADEON_TEX_VSIZE_SHIFT)); - if (!(t->pp_txformat & RADEON_TXFORMAT_NON_POWER2)) { - t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2; - t->base.dirty_images[0] = ~0; + if ( !t->image_override ) { + if (firstImage->IsCompressed) + t->pp_txpitch = (firstImage->Width + 63) & ~(63); + else + t->pp_txpitch = ((firstImage->Width * texelBytes) + 63) & ~(63); + t->pp_txpitch -= 32; } - ASSERT(tObj->Target == GL_TEXTURE_RECTANGLE_NV); - - if ( t->base.dirty_images[0] ) { - RADEON_FIREVERTICES( rmesa ); - radeonSetTexImages( rmesa, tObj ); - radeonUploadTexImages( rmesa, (radeonTexObjPtr) tObj->DriverData, 0 ); - if ( !t->base.memBlock && - !t->image_override /* && !rmesa->prefer_gart_client_texturing FIXME */ ) { - fprintf(stderr, "%s: upload failed\n", __FUNCTION__); - return GL_FALSE; - } + if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) { + t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2; } return GL_TRUE; } - -static GLboolean update_tex_common( GLcontext *ctx, int unit ) +static GLboolean radeon_validate_texture(GLcontext *ctx, struct gl_texture_object *texObj, int unit) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *tObj = texUnit->_Current; - radeonTexObjPtr t = (radeonTexObjPtr) tObj->DriverData; - GLenum format; + r100ContextPtr rmesa = R100_CONTEXT(ctx); + radeonTexObj *t = radeon_tex_obj(texObj); + int ret; - /* Fallback if there's a texture border */ - if ( tObj->Image[0][tObj->BaseLevel]->Border > 0 ) { - fprintf(stderr, "%s: border\n", __FUNCTION__); + if (!radeon_validate_texture_miptree(ctx, texObj)) return GL_FALSE; - } + + ret = setup_hardware_state(rmesa, t, unit); + if (ret == GL_FALSE) + return GL_FALSE; + /* yuv conversion only works in first unit */ if (unit != 0 && (t->pp_txfilter & RADEON_YUV_TO_RGB)) return GL_FALSE; - /* Update state if this is a different texture object to last - * time. - */ - if ( rmesa->state.texture.unit[unit].texobj != t ) { - if ( rmesa->state.texture.unit[unit].texobj != NULL ) { - /* The old texture is no longer bound to this texture unit. - * Mark it as such. - */ - - rmesa->state.texture.unit[unit].texobj->base.bound &= - ~(1UL << unit); - } - - rmesa->state.texture.unit[unit].texobj = t; - t->base.bound |= (1UL << unit); - t->dirty_state |= 1<<unit; - driUpdateTextureLRU( (driTextureObject *) t ); /* XXX: should be locked! */ - } + RADEON_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= + (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit; + RADEON_STATECHANGE( rmesa, tcl ); + rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit); - /* Newly enabled? - */ - if ( !(rmesa->hw.ctx.cmd[CTX_PP_CNTL] & (RADEON_TEX_0_ENABLE<<unit))) { - RADEON_STATECHANGE( rmesa, ctx ); - rmesa->hw.ctx.cmd[CTX_PP_CNTL] |= - (RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE) << unit; + rmesa->recheck_texgen[unit] = GL_TRUE; - RADEON_STATECHANGE( rmesa, tcl ); - - rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_ST_BIT(unit); - - rmesa->recheck_texgen[unit] = GL_TRUE; - } - - if (t->dirty_state & (1<<unit)) { - import_tex_obj_state( rmesa, unit, t ); - /* may need to update texture matrix (for texrect adjustments) */ - rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; - } + import_tex_obj_state( rmesa, unit, t ); if (rmesa->recheck_texgen[unit]) { GLboolean fallback = !radeon_validate_texgen( ctx, unit ); TCL_FALLBACK( ctx, (RADEON_TCL_FALLBACK_TEXGEN_0<<unit), fallback); rmesa->recheck_texgen[unit] = 0; - rmesa->NewGLState |= _NEW_TEXTURE_MATRIX; + rmesa->radeon.NewGLState |= _NEW_TEXTURE_MATRIX; } - format = tObj->Image[0][tObj->BaseLevel]->_BaseFormat; - if ( rmesa->state.texture.unit[unit].format != format || - rmesa->state.texture.unit[unit].envMode != texUnit->EnvMode ) { - rmesa->state.texture.unit[unit].format = format; - rmesa->state.texture.unit[unit].envMode = texUnit->EnvMode; - if ( ! radeonUpdateTextureEnv( ctx, unit ) ) { - return GL_FALSE; - } + if ( ! radeonUpdateTextureEnv( ctx, unit ) ) { + return GL_FALSE; } - FALLBACK( rmesa, RADEON_FALLBACK_BORDER_MODE, t->border_fallback ); + + t->validated = GL_TRUE; return !t->border_fallback; } - - static GLboolean radeonUpdateTextureUnit( GLcontext *ctx, int unit ) { - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + r100ContextPtr rmesa = R100_CONTEXT(ctx); - if ( texUnit->_ReallyEnabled & (TEXTURE_RECT_BIT) ) { - return (enable_tex_rect( ctx, unit ) && - update_tex_common( ctx, unit )); - } - else if ( texUnit->_ReallyEnabled & (TEXTURE_1D_BIT | TEXTURE_2D_BIT) ) { - return (enable_tex_2d( ctx, unit ) && - update_tex_common( ctx, unit )); - } - else if ( texUnit->_ReallyEnabled & (TEXTURE_CUBE_BIT) ) { - return (enable_tex_cube( ctx, unit ) && - update_tex_common( ctx, unit )); + if (ctx->Texture.Unit[unit]._ReallyEnabled & TEXTURE_3D_BIT) { + return GL_FALSE; } - else if ( texUnit->_ReallyEnabled ) { - return GL_FALSE; + + if (!ctx->Texture.Unit[unit]._ReallyEnabled) { + /* disable the unit */ + disable_tex_obj_state(rmesa, unit); + return GL_TRUE; } - else { - disable_tex( ctx, unit ); - return GL_TRUE; + + if (!radeon_validate_texture(ctx, ctx->Texture.Unit[unit]._Current, unit)) { + _mesa_warning(ctx, + "failed to validate texture for unit %d.\n", + unit); + rmesa->state.texture.unit[unit].texobj = NULL; + return GL_FALSE; } + rmesa->state.texture.unit[unit].texobj = radeon_tex_obj(ctx->Texture.Unit[unit]._Current); + return GL_TRUE; } void radeonUpdateTextureState( GLcontext *ctx ) { - radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + r100ContextPtr rmesa = R100_CONTEXT(ctx); GLboolean ok; + /* set the ctx all textures off */ + RADEON_STATECHANGE( rmesa, ctx ); + rmesa->hw.ctx.cmd[CTX_PP_CNTL] &= ~((RADEON_TEX_ENABLE_MASK) | (RADEON_TEX_BLEND_ENABLE_MASK)); + ok = (radeonUpdateTextureUnit( ctx, 0 ) && radeonUpdateTextureUnit( ctx, 1 ) && radeonUpdateTextureUnit( ctx, 2 )); FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok ); - if (rmesa->TclFallback) + if (rmesa->radeon.TclFallback) radeonChooseVertexState( ctx ); } diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c new file mode 100644 index 0000000000..7dfed2c456 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -0,0 +1,1032 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + * + * The Weather Channel (TM) funded Tungsten Graphics to develop the + * initial release of the Radeon 8500 driver under the XFree86 license. + * This notice must be preserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "main/glheader.h" +#include "main/imports.h" +#include "main/context.h" +#include "main/convolve.h" +#include "main/mipmap.h" +#include "main/texcompress.h" +#include "main/texformat.h" +#include "main/texstore.h" +#include "main/teximage.h" +#include "main/texobj.h" +#include "main/texgetimage.h" + +#include "xmlpool.h" /* for symbolic values of enum-type options */ + +#include "radeon_common.h" + +#include "radeon_mipmap_tree.h" + + +static void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride, + GLuint numrows, GLuint rowsize) +{ + assert(rowsize <= dststride); + assert(rowsize <= srcstride); + + if (rowsize == srcstride && rowsize == dststride) { + memcpy(dst, src, numrows*rowsize); + } else { + GLuint i; + for(i = 0; i < numrows; ++i) { + memcpy(dst, src, rowsize); + dst += dststride; + src += srcstride; + } + } +} + +/* textures */ +/** + * Allocate an empty texture image object. + */ +struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx) +{ + return CALLOC(sizeof(radeon_texture_image)); +} + +/** + * Free memory associated with this texture image. + */ +void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage) +{ + radeon_texture_image* image = get_radeon_texture_image(timage); + + if (image->mt) { + radeon_miptree_unreference(image->mt); + image->mt = 0; + assert(!image->base.Data); + } else { + _mesa_free_texture_image_data(ctx, timage); + } + if (image->bo) { + radeon_bo_unref(image->bo); + image->bo = NULL; + } + if (timage->Data) { + _mesa_free_texmemory(timage->Data); + timage->Data = NULL; + } +} + +/* Set Data pointer and additional data for mapped texture image */ +static void teximage_set_map_data(radeon_texture_image *image) +{ + radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel]; + + image->base.Data = image->mt->bo->ptr + lvl->faces[image->mtface].offset; + image->base.RowStride = lvl->rowstride / image->mt->bpp; +} + + +/** + * Map a single texture image for glTexImage and friends. + */ +void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable) +{ + if (image->mt) { + assert(!image->base.Data); + + radeon_bo_map(image->mt->bo, write_enable); + teximage_set_map_data(image); + } +} + + +void radeon_teximage_unmap(radeon_texture_image *image) +{ + if (image->mt) { + assert(image->base.Data); + + image->base.Data = 0; + radeon_bo_unmap(image->mt->bo); + } +} + +static void map_override(GLcontext *ctx, radeonTexObj *t) +{ + radeon_texture_image *img = get_radeon_texture_image(t->base.Image[0][0]); + + radeon_bo_map(t->bo, GL_FALSE); + + img->base.Data = t->bo->ptr; + _mesa_set_fetch_functions(&img->base, 2); +} + +static void unmap_override(GLcontext *ctx, radeonTexObj *t) +{ + radeon_texture_image *img = get_radeon_texture_image(t->base.Image[0][0]); + + radeon_bo_unmap(t->bo); + + img->base.Data = NULL; +} + +/** + * Map a validated texture for reading during software rendering. + */ +void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj) +{ + radeonTexObj* t = radeon_tex_obj(texObj); + int face, level; + + if (!radeon_validate_texture_miptree(ctx, texObj)) + return; + + /* for r100 3D sw fallbacks don't have mt */ + if (t->image_override && t->bo) + map_override(ctx, t); + + if (!t->mt) + return; + + radeon_bo_map(t->mt->bo, GL_FALSE); + for(face = 0; face < t->mt->faces; ++face) { + for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level) + teximage_set_map_data(get_radeon_texture_image(texObj->Image[face][level])); + } +} + +void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj) +{ + radeonTexObj* t = radeon_tex_obj(texObj); + int face, level; + + if (t->image_override && t->bo) + unmap_override(ctx, t); + /* for r100 3D sw fallbacks don't have mt */ + if (!t->mt) + return; + + for(face = 0; face < t->mt->faces; ++face) { + for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level) + texObj->Image[face][level]->Data = 0; + } + radeon_bo_unmap(t->mt->bo); +} + +GLuint radeon_face_for_target(GLenum target) +{ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: + return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + default: + return 0; + } +} + +/** + * Wraps Mesa's implementation to ensure that the base level image is mapped. + * + * This relies on internal details of _mesa_generate_mipmap, in particular + * the fact that the memory for recreated texture images is always freed. + */ +static void radeon_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + radeonTexObj* t = radeon_tex_obj(texObj); + GLuint nr_faces = (t->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + int i, face; + + + _mesa_generate_mipmap(ctx, target, texObj); + + for (face = 0; face < nr_faces; face++) { + for (i = texObj->BaseLevel + 1; i < texObj->MaxLevel; i++) { + radeon_texture_image *image; + + image = get_radeon_texture_image(texObj->Image[face][i]); + + if (image == NULL) + break; + + image->mtlevel = i; + image->mtface = face; + + radeon_miptree_unreference(image->mt); + image->mt = NULL; + } + } + +} + +void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj) +{ + GLuint face = radeon_face_for_target(target); + radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[face][texObj->BaseLevel]); + + radeon_teximage_map(baseimage, GL_FALSE); + radeon_generate_mipmap(ctx, target, texObj); + radeon_teximage_unmap(baseimage); +} + + +/* try to find a format which will only need a memcopy */ +static const struct gl_texture_format *radeonChoose8888TexFormat(radeonContextPtr rmesa, + GLenum srcFormat, + GLenum srcType, GLboolean fbo) +{ + const GLuint ui = 1; + const GLubyte littleEndian = *((const GLubyte *)&ui); + + /* r100 can only do this */ + if (IS_R100_CLASS(rmesa->radeonScreen) || fbo) + return _dri_texformat_argb8888; + + if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8) || + (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && !littleEndian) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && littleEndian)) { + return &_mesa_texformat_rgba8888; + } else if ((srcFormat == GL_RGBA && srcType == GL_UNSIGNED_INT_8_8_8_8_REV) || + (srcFormat == GL_RGBA && srcType == GL_UNSIGNED_BYTE && littleEndian) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_INT_8_8_8_8) || + (srcFormat == GL_ABGR_EXT && srcType == GL_UNSIGNED_BYTE && !littleEndian)) { + return &_mesa_texformat_rgba8888_rev; + } else if (IS_R200_CLASS(rmesa->radeonScreen)) { + return _dri_texformat_argb8888; + } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && !littleEndian) || + srcType == GL_UNSIGNED_INT_8_8_8_8)) { + return &_mesa_texformat_argb8888_rev; + } else if (srcFormat == GL_BGRA && ((srcType == GL_UNSIGNED_BYTE && littleEndian) || + srcType == GL_UNSIGNED_INT_8_8_8_8_REV)) { + return &_mesa_texformat_argb8888; + } else + return _dri_texformat_argb8888; +} + +const struct gl_texture_format *radeonChooseTextureFormat_mesa(GLcontext * ctx, + GLint internalFormat, + GLenum format, + GLenum type) +{ + return radeonChooseTextureFormat(ctx, internalFormat, format, + type, 0); +} + +const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx, + GLint internalFormat, + GLenum format, + GLenum type, GLboolean fbo) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + const GLboolean do32bpt = + (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_32); + const GLboolean force16bpt = + (rmesa->texture_depth == DRI_CONF_TEXTURE_DEPTH_FORCE_16); + (void)format; + +#if 0 + fprintf(stderr, "InternalFormat=%s(%d) type=%s format=%s\n", + _mesa_lookup_enum_by_nr(internalFormat), internalFormat, + _mesa_lookup_enum_by_nr(type), _mesa_lookup_enum_by_nr(format)); + fprintf(stderr, "do32bpt=%d force16bpt=%d\n", do32bpt, force16bpt); +#endif + + switch (internalFormat) { + case 4: + case GL_RGBA: + case GL_COMPRESSED_RGBA: + switch (type) { + case GL_UNSIGNED_INT_10_10_10_2: + case GL_UNSIGNED_INT_2_10_10_10_REV: + return do32bpt ? _dri_texformat_argb8888 : + _dri_texformat_argb1555; + case GL_UNSIGNED_SHORT_4_4_4_4: + case GL_UNSIGNED_SHORT_4_4_4_4_REV: + return _dri_texformat_argb4444; + case GL_UNSIGNED_SHORT_5_5_5_1: + case GL_UNSIGNED_SHORT_1_5_5_5_REV: + return _dri_texformat_argb1555; + default: + return do32bpt ? radeonChoose8888TexFormat(rmesa, format, type, fbo) : + _dri_texformat_argb4444; + } + + case 3: + case GL_RGB: + case GL_COMPRESSED_RGB: + switch (type) { + case GL_UNSIGNED_SHORT_4_4_4_4: + case GL_UNSIGNED_SHORT_4_4_4_4_REV: + return _dri_texformat_argb4444; + case GL_UNSIGNED_SHORT_5_5_5_1: + case GL_UNSIGNED_SHORT_1_5_5_5_REV: + return _dri_texformat_argb1555; + case GL_UNSIGNED_SHORT_5_6_5: + case GL_UNSIGNED_SHORT_5_6_5_REV: + return _dri_texformat_rgb565; + default: + return do32bpt ? _dri_texformat_argb8888 : + _dri_texformat_rgb565; + } + + case GL_RGBA8: + case GL_RGB10_A2: + case GL_RGBA12: + case GL_RGBA16: + return !force16bpt ? + radeonChoose8888TexFormat(rmesa, format, type, fbo) : + _dri_texformat_argb4444; + + case GL_RGBA4: + case GL_RGBA2: + return _dri_texformat_argb4444; + + case GL_RGB5_A1: + return _dri_texformat_argb1555; + + case GL_RGB8: + case GL_RGB10: + case GL_RGB12: + case GL_RGB16: + return !force16bpt ? _dri_texformat_argb8888 : + _dri_texformat_rgb565; + + case GL_RGB5: + case GL_RGB4: + case GL_R3_G3_B2: + return _dri_texformat_rgb565; + + case GL_ALPHA: + case GL_ALPHA4: + case GL_ALPHA8: + case GL_ALPHA12: + case GL_ALPHA16: + case GL_COMPRESSED_ALPHA: + /* r200: can't use a8 format since interpreting hw I8 as a8 would result + in wrong rgb values (same as alpha value instead of 0). */ + if (IS_R200_CLASS(rmesa->radeonScreen)) + return _dri_texformat_al88; + else + return _dri_texformat_a8; + case 1: + case GL_LUMINANCE: + case GL_LUMINANCE4: + case GL_LUMINANCE8: + case GL_LUMINANCE12: + case GL_LUMINANCE16: + case GL_COMPRESSED_LUMINANCE: + return _dri_texformat_l8; + + case 2: + case GL_LUMINANCE_ALPHA: + case GL_LUMINANCE4_ALPHA4: + case GL_LUMINANCE6_ALPHA2: + case GL_LUMINANCE8_ALPHA8: + case GL_LUMINANCE12_ALPHA4: + case GL_LUMINANCE12_ALPHA12: + case GL_LUMINANCE16_ALPHA16: + case GL_COMPRESSED_LUMINANCE_ALPHA: + return _dri_texformat_al88; + + case GL_INTENSITY: + case GL_INTENSITY4: + case GL_INTENSITY8: + case GL_INTENSITY12: + case GL_INTENSITY16: + case GL_COMPRESSED_INTENSITY: + return _dri_texformat_i8; + + case GL_YCBCR_MESA: + if (type == GL_UNSIGNED_SHORT_8_8_APPLE || + type == GL_UNSIGNED_BYTE) + return &_mesa_texformat_ycbcr; + else + return &_mesa_texformat_ycbcr_rev; + + case GL_RGB_S3TC: + case GL_RGB4_S3TC: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + return &_mesa_texformat_rgb_dxt1; + + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + return &_mesa_texformat_rgba_dxt1; + + case GL_RGBA_S3TC: + case GL_RGBA4_S3TC: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + return &_mesa_texformat_rgba_dxt3; + + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + return &_mesa_texformat_rgba_dxt5; + + case GL_ALPHA16F_ARB: + return &_mesa_texformat_alpha_float16; + case GL_ALPHA32F_ARB: + return &_mesa_texformat_alpha_float32; + case GL_LUMINANCE16F_ARB: + return &_mesa_texformat_luminance_float16; + case GL_LUMINANCE32F_ARB: + return &_mesa_texformat_luminance_float32; + case GL_LUMINANCE_ALPHA16F_ARB: + return &_mesa_texformat_luminance_alpha_float16; + case GL_LUMINANCE_ALPHA32F_ARB: + return &_mesa_texformat_luminance_alpha_float32; + case GL_INTENSITY16F_ARB: + return &_mesa_texformat_intensity_float16; + case GL_INTENSITY32F_ARB: + return &_mesa_texformat_intensity_float32; + case GL_RGB16F_ARB: + return &_mesa_texformat_rgba_float16; + case GL_RGB32F_ARB: + return &_mesa_texformat_rgba_float32; + case GL_RGBA16F_ARB: + return &_mesa_texformat_rgba_float16; + case GL_RGBA32F_ARB: + return &_mesa_texformat_rgba_float32; + + case GL_DEPTH_COMPONENT: + case GL_DEPTH_COMPONENT16: + case GL_DEPTH_COMPONENT24: + case GL_DEPTH_COMPONENT32: + case GL_DEPTH_STENCIL_EXT: + case GL_DEPTH24_STENCIL8_EXT: + return &_mesa_texformat_s8_z24; + + /* EXT_texture_sRGB */ + case GL_SRGB: + case GL_SRGB8: + case GL_SRGB_ALPHA: + case GL_SRGB8_ALPHA8: + case GL_COMPRESSED_SRGB: + case GL_COMPRESSED_SRGB_ALPHA: + return &_mesa_texformat_srgba8; + + case GL_SLUMINANCE: + case GL_SLUMINANCE8: + case GL_COMPRESSED_SLUMINANCE: + return &_mesa_texformat_sl8; + + case GL_SLUMINANCE_ALPHA: + case GL_SLUMINANCE8_ALPHA8: + case GL_COMPRESSED_SLUMINANCE_ALPHA: + return &_mesa_texformat_sla8; + + default: + _mesa_problem(ctx, + "unexpected internalFormat 0x%x in %s", + (int)internalFormat, __func__); + return NULL; + } + + return NULL; /* never get here */ +} + +/** + * All glTexImage calls go through this function. + */ +static void radeon_teximage( + GLcontext *ctx, int dims, + GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint depth, + GLsizei imageSize, + GLenum format, GLenum type, const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, + int compressed) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + radeonTexObj* t = radeon_tex_obj(texObj); + radeon_texture_image* image = get_radeon_texture_image(texImage); + GLuint dstRowStride; + GLint postConvWidth = width; + GLint postConvHeight = height; + GLuint texelBytes; + GLuint face = radeon_face_for_target(target); + + radeon_firevertices(rmesa); + + t->validated = GL_FALSE; + + if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) { + _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth, + &postConvHeight); + } + + /* Choose and fill in the texture format for this image */ + texImage->TexFormat = radeonChooseTextureFormat(ctx, internalFormat, format, type, 0); + _mesa_set_fetch_functions(texImage, dims); + + if (texImage->TexFormat->TexelBytes == 0) { + texelBytes = 0; + texImage->IsCompressed = GL_TRUE; + texImage->CompressedSize = + ctx->Driver.CompressedTextureSize(ctx, texImage->Width, + texImage->Height, texImage->Depth, + texImage->TexFormat->MesaFormat); + } else { + texImage->IsCompressed = GL_FALSE; + texImage->CompressedSize = 0; + + texelBytes = texImage->TexFormat->TexelBytes; + /* Minimum pitch of 32 bytes */ + if (postConvWidth * texelBytes < 32) { + postConvWidth = 32 / texelBytes; + texImage->RowStride = postConvWidth; + } + if (!image->mt) { + assert(texImage->RowStride == postConvWidth); + } + } + + /* Allocate memory for image */ + radeonFreeTexImageData(ctx, texImage); /* Mesa core only clears texImage->Data but not image->mt */ + + if (t->mt && + t->mt->firstLevel == level && + t->mt->lastLevel == level && + t->mt->target != GL_TEXTURE_CUBE_MAP_ARB && + !radeon_miptree_matches_image(t->mt, texImage, face, level)) { + radeon_miptree_unreference(t->mt); + t->mt = NULL; + } + + if (!t->mt) + radeon_try_alloc_miptree(rmesa, t, texImage, face, level); + if (t->mt && radeon_miptree_matches_image(t->mt, texImage, face, level)) { + radeon_mipmap_level *lvl; + image->mt = t->mt; + image->mtlevel = level - t->mt->firstLevel; + image->mtface = face; + radeon_miptree_reference(t->mt); + lvl = &image->mt->levels[image->mtlevel]; + dstRowStride = lvl->rowstride; + } else { + int size; + if (texImage->IsCompressed) { + size = texImage->CompressedSize; + } else { + size = texImage->Width * texImage->Height * texImage->Depth * texImage->TexFormat->TexelBytes; + } + texImage->Data = _mesa_alloc_texmemory(size); + } + + /* Upload texture image; note that the spec allows pixels to be NULL */ + if (compressed) { + pixels = _mesa_validate_pbo_compressed_teximage( + ctx, imageSize, pixels, packing, "glCompressedTexImage"); + } else { + pixels = _mesa_validate_pbo_teximage( + ctx, dims, width, height, depth, + format, type, pixels, packing, "glTexImage"); + } + + if (pixels) { + radeon_teximage_map(image, GL_TRUE); + + if (compressed) { + memcpy(texImage->Data, pixels, imageSize); + } else { + GLuint dstRowStride; + GLuint *dstImageOffsets; + + if (image->mt) { + radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel]; + dstRowStride = lvl->rowstride; + } else { + dstRowStride = texImage->Width * texImage->TexFormat->TexelBytes; + } + + if (dims == 3) { + int i; + + dstImageOffsets = _mesa_malloc(depth * sizeof(GLuint)) ; + if (!dstImageOffsets) + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); + + for (i = 0; i < depth; ++i) { + dstImageOffsets[i] = dstRowStride/texImage->TexFormat->TexelBytes * height * i; + } + } else { + dstImageOffsets = texImage->ImageOffsets; + } + + if (!texImage->TexFormat->StoreImage(ctx, dims, + texImage->_BaseFormat, + texImage->TexFormat, + texImage->Data, 0, 0, 0, /* dstX/Y/Zoffset */ + dstRowStride, + dstImageOffsets, + width, height, depth, + format, type, pixels, packing)) + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); + + if (dims == 3) + _mesa_free(dstImageOffsets); + } + + /* SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + radeon_generate_mipmap(ctx, target, texObj); + } + } + + _mesa_unmap_teximage_pbo(ctx, packing); + + if (pixels) + radeon_teximage_unmap(image); + + +} + +void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint border, + GLenum format, GLenum type, const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + radeon_teximage(ctx, 1, target, level, internalFormat, width, 1, 1, + 0, format, type, pixels, packing, texObj, texImage, 0); +} + +void radeonTexImage2D(GLcontext * ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLenum format, GLenum type, const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) + +{ + radeon_teximage(ctx, 2, target, level, internalFormat, width, height, 1, + 0, format, type, pixels, packing, texObj, texImage, 0); +} + +void radeonCompressedTexImage2D(GLcontext * ctx, GLenum target, + GLint level, GLint internalFormat, + GLint width, GLint height, GLint border, + GLsizei imageSize, const GLvoid * data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + radeon_teximage(ctx, 2, target, level, internalFormat, width, height, 1, + imageSize, 0, 0, data, &ctx->Unpack, texObj, texImage, 1); +} + +void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint depth, + GLint border, + GLenum format, GLenum type, const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + radeon_teximage(ctx, 3, target, level, internalFormat, width, height, depth, + 0, format, type, pixels, packing, texObj, texImage, 0); +} + +/** + * Update a subregion of the given texture image. + */ +static void radeon_texsubimage(GLcontext* ctx, int dims, GLenum target, int level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLsizei imageSize, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, + int compressed) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + radeonTexObj* t = radeon_tex_obj(texObj); + radeon_texture_image* image = get_radeon_texture_image(texImage); + + radeon_firevertices(rmesa); + + t->validated = GL_FALSE; + if (compressed) { + pixels = _mesa_validate_pbo_compressed_teximage( + ctx, imageSize, pixels, packing, "glCompressedTexImage"); + } else { + pixels = _mesa_validate_pbo_teximage(ctx, dims, + width, height, depth, format, type, pixels, packing, "glTexSubImage1D"); + } + + if (pixels) { + GLint dstRowStride; + radeon_teximage_map(image, GL_TRUE); + + if (image->mt) { + radeon_mipmap_level *lvl = &image->mt->levels[image->mtlevel]; + dstRowStride = lvl->rowstride; + } else { + dstRowStride = texImage->RowStride * texImage->TexFormat->TexelBytes; + } + + if (compressed) { + uint32_t srcRowStride, bytesPerRow, rows; + dstRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, texImage->Width); + srcRowStride = _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); + bytesPerRow = srcRowStride; + rows = height / 4; + + copy_rows(texImage->Data, dstRowStride, image->base.Data, srcRowStride, rows, + bytesPerRow); + + } else { + if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, + texImage->TexFormat, texImage->Data, + xoffset, yoffset, zoffset, + dstRowStride, + texImage->ImageOffsets, + width, height, depth, + format, type, pixels, packing)) + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage"); + } + + /* GL_SGIS_generate_mipmap */ + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + radeon_generate_mipmap(ctx, target, texObj); + } + } + + radeon_teximage_unmap(image); + + _mesa_unmap_teximage_pbo(ctx, packing); + + +} + +void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, + GLsizei width, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + radeon_texsubimage(ctx, 1, target, level, xoffset, 0, 0, width, 1, 1, 0, + format, type, pixels, packing, texObj, texImage, 0); +} + +void radeonTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + radeon_texsubimage(ctx, 2, target, level, xoffset, yoffset, 0, width, height, 1, + 0, format, type, pixels, packing, texObj, texImage, + 0); +} + +void radeonCompressedTexSubImage2D(GLcontext * ctx, GLenum target, + GLint level, GLint xoffset, + GLint yoffset, GLsizei width, + GLsizei height, GLenum format, + GLsizei imageSize, const GLvoid * data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + radeon_texsubimage(ctx, 2, target, level, xoffset, yoffset, 0, width, height, 1, + imageSize, format, 0, data, &ctx->Unpack, texObj, texImage, 1); +} + + +void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + radeon_texsubimage(ctx, 3, target, level, xoffset, yoffset, zoffset, width, height, depth, 0, + format, type, pixels, packing, texObj, texImage, 0); +} + + + +/** + * Ensure that the given image is stored in the given miptree from now on. + */ +static void migrate_image_to_miptree(radeon_mipmap_tree *mt, radeon_texture_image *image, int face, int level) +{ + radeon_mipmap_level *dstlvl = &mt->levels[level - mt->firstLevel]; + unsigned char *dest; + + assert(image->mt != mt); + assert(dstlvl->width == image->base.Width); + assert(dstlvl->height == image->base.Height); + assert(dstlvl->depth == image->base.Depth); + + + radeon_bo_map(mt->bo, GL_TRUE); + dest = mt->bo->ptr + dstlvl->faces[face].offset; + + if (image->mt) { + /* Format etc. should match, so we really just need a memcpy(). + * In fact, that memcpy() could be done by the hardware in many + * cases, provided that we have a proper memory manager. + */ + radeon_mipmap_level *srclvl = &image->mt->levels[image->mtlevel-image->mt->firstLevel]; + + assert(srclvl->size == dstlvl->size); + assert(srclvl->rowstride == dstlvl->rowstride); + + radeon_bo_map(image->mt->bo, GL_FALSE); + + memcpy(dest, + image->mt->bo->ptr + srclvl->faces[face].offset, + dstlvl->size); + radeon_bo_unmap(image->mt->bo); + + radeon_miptree_unreference(image->mt); + } else { + uint32_t srcrowstride; + uint32_t height; + /* need to confirm this value is correct */ + if (mt->compressed) { + height = image->base.Height / 4; + srcrowstride = image->base.RowStride * mt->bpp; + } else { + height = image->base.Height * image->base.Depth; + srcrowstride = image->base.Width * image->base.TexFormat->TexelBytes; + } + +// if (mt->tilebits) +// WARN_ONCE("%s: tiling not supported yet", __FUNCTION__); + + copy_rows(dest, dstlvl->rowstride, image->base.Data, srcrowstride, + height, srcrowstride); + + _mesa_free_texmemory(image->base.Data); + image->base.Data = 0; + } + + radeon_bo_unmap(mt->bo); + + image->mt = mt; + image->mtface = face; + image->mtlevel = level; + radeon_miptree_reference(image->mt); +} + +int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj) +{ + radeonContextPtr rmesa = RADEON_CONTEXT(ctx); + radeonTexObj *t = radeon_tex_obj(texObj); + radeon_texture_image *baseimage = get_radeon_texture_image(texObj->Image[0][texObj->BaseLevel]); + int face, level; + + if (t->validated || t->image_override) + return GL_TRUE; + + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "%s: Validating texture %p now\n", __FUNCTION__, texObj); + + if (baseimage->base.Border > 0) + return GL_FALSE; + + /* Ensure a matching miptree exists. + * + * Differing mipmap trees can result when the app uses TexImage to + * change texture dimensions. + * + * Prefer to use base image's miptree if it + * exists, since that most likely contains more valid data (remember + * that the base level is usually significantly larger than the rest + * of the miptree, so cubemaps are the only possible exception). + */ + if (baseimage->mt && + baseimage->mt != t->mt && + radeon_miptree_matches_texture(baseimage->mt, &t->base)) { + radeon_miptree_unreference(t->mt); + t->mt = baseimage->mt; + radeon_miptree_reference(t->mt); + } else if (t->mt && !radeon_miptree_matches_texture(t->mt, &t->base)) { + radeon_miptree_unreference(t->mt); + t->mt = 0; + } + + if (!t->mt) { + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, " Allocate new miptree\n"); + radeon_try_alloc_miptree(rmesa, t, &baseimage->base, 0, texObj->BaseLevel); + if (!t->mt) { + _mesa_problem(ctx, "r300_validate_texture failed to alloc miptree"); + return GL_FALSE; + } + } + + /* Ensure all images are stored in the single main miptree */ + for(face = 0; face < t->mt->faces; ++face) { + for(level = t->mt->firstLevel; level <= t->mt->lastLevel; ++level) { + radeon_texture_image *image = get_radeon_texture_image(texObj->Image[face][level]); + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, " face %i, level %i... %p vs %p ", face, level, t->mt, image->mt); + if (t->mt == image->mt) { + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "OK\n"); + continue; + } + + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "migrating\n"); + migrate_image_to_miptree(t->mt, image, face, level); + } + } + + return GL_TRUE; +} + + +/** + * Need to map texture image into memory before copying image data, + * then unmap it. + */ +static void +radeon_get_tex_image(GLcontext * ctx, GLenum target, GLint level, + GLenum format, GLenum type, GLvoid * pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, int compressed) +{ + radeon_texture_image *image = get_radeon_texture_image(texImage); + + if (image->mt) { + /* Map the texture image read-only */ + radeon_teximage_map(image, GL_FALSE); + } else { + /* Image hasn't been uploaded to a miptree yet */ + assert(image->base.Data); + } + + if (compressed) { + _mesa_get_compressed_teximage(ctx, target, level, pixels, + texObj, texImage); + } else { + _mesa_get_teximage(ctx, target, level, format, type, pixels, + texObj, texImage); + } + + if (image->mt) { + radeon_teximage_unmap(image); + } +} + +void +radeonGetTexImage(GLcontext * ctx, GLenum target, GLint level, + GLenum format, GLenum type, GLvoid * pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + radeon_get_tex_image(ctx, target, level, format, type, pixels, + texObj, texImage, 0); +} + +void +radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, + GLvoid *pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + radeon_get_tex_image(ctx, target, level, 0, 0, pixels, + texObj, texImage, 1); +} diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h new file mode 100644 index 0000000000..888a55ba91 --- /dev/null +++ b/src/mesa/drivers/dri/radeon/radeon_texture.h @@ -0,0 +1,122 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. + * + * The Weather Channel (TM) funded Tungsten Graphics to develop the + * initial release of the Radeon 8500 driver under the XFree86 license. + * This notice must be preserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef RADEON_TEXTURE_H +#define RADEON_TEXTURE_H +struct gl_texture_image *radeonNewTextureImage(GLcontext *ctx); +void radeonFreeTexImageData(GLcontext *ctx, struct gl_texture_image *timage); + +void radeon_teximage_map(radeon_texture_image *image, GLboolean write_enable); +void radeon_teximage_unmap(radeon_texture_image *image); +void radeonMapTexture(GLcontext *ctx, struct gl_texture_object *texObj); +void radeonUnmapTexture(GLcontext *ctx, struct gl_texture_object *texObj); +void radeonGenerateMipmap(GLcontext* ctx, GLenum target, struct gl_texture_object *texObj); +int radeon_validate_texture_miptree(GLcontext * ctx, struct gl_texture_object *texObj); +GLuint radeon_face_for_target(GLenum target); +const struct gl_texture_format *radeonChooseTextureFormat_mesa(GLcontext * ctx, + GLint internalFormat, + GLenum format, + GLenum type); +const struct gl_texture_format *radeonChooseTextureFormat(GLcontext * ctx, + GLint internalFormat, + GLenum format, + GLenum type, GLboolean fbo); + +void radeonTexImage1D(GLcontext * ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint border, + GLenum format, GLenum type, const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); +void radeonTexImage2D(GLcontext * ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLenum format, GLenum type, const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); +void radeonCompressedTexImage2D(GLcontext * ctx, GLenum target, + GLint level, GLint internalFormat, + GLint width, GLint height, GLint border, + GLsizei imageSize, const GLvoid * data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); +void radeonTexImage3D(GLcontext * ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint depth, + GLint border, + GLenum format, GLenum type, const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); +void radeonTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, + GLsizei width, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); +void radeonTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); +void radeonCompressedTexSubImage2D(GLcontext * ctx, GLenum target, + GLint level, GLint xoffset, + GLint yoffset, GLsizei width, + GLsizei height, GLenum format, + GLsizei imageSize, const GLvoid * data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); + +void radeonTexSubImage3D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); + +void radeonGetTexImage(GLcontext * ctx, GLenum target, GLint level, + GLenum format, GLenum type, GLvoid * pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); +void radeonGetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, + GLvoid *pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage); + +#endif diff --git a/src/mesa/drivers/dri/radeon/server/radeon_reg.h b/src/mesa/drivers/dri/radeon/server/radeon_reg.h index ae2ccdf292..866807462a 100644 --- a/src/mesa/drivers/dri/radeon/server/radeon_reg.h +++ b/src/mesa/drivers/dri/radeon/server/radeon_reg.h @@ -2031,6 +2031,9 @@ #define RADEON_CP_PACKET3_3D_DRAW_INDX 0xC0002A00 #define RADEON_CP_PACKET3_LOAD_PALETTE 0xC0002C00 #define RADEON_CP_PACKET3_3D_LOAD_VBPNTR 0xC0002F00 +#define R200_CP_CMD_3D_DRAW_VBUF_2 0xC0003400 +#define R200_CP_CMD_3D_DRAW_IMMD_2 0xC0003500 +#define R200_CP_CMD_3D_DRAW_INDX_2 0xC0003600 #define RADEON_CP_PACKET3_CNTL_PAINT 0xC0009100 #define RADEON_CP_PACKET3_CNTL_BITBLT 0xC0009200 #define RADEON_CP_PACKET3_CNTL_SMALLTEXT 0xC0009300 diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index 305df548fa..9a01465bdf 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -912,8 +912,9 @@ xmesa_update_state( GLcontext *ctx, GLbitfield new_state ) /* * GL_DITHER, GL_READ/DRAW_BUFFER, buffer binding state, etc. effect * renderbuffer span/clear funcs. + * Check _NEW_COLOR to detect dither enable/disable. */ - if (new_state & (_NEW_COLOR | _NEW_PIXEL | _NEW_BUFFERS)) { + if (new_state & (_NEW_COLOR | _NEW_BUFFERS)) { XMesaBuffer xmbuf = XMESA_BUFFER(ctx->DrawBuffer); struct xmesa_renderbuffer *front_xrb, *back_xrb; |