diff options
Diffstat (limited to 'src/mesa/drivers/dri/r600')
-rw-r--r-- | src/mesa/drivers/dri/r600/Makefile | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_cmdbuf.c | 18 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_context.c | 273 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_context.h | 56 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r600_texstate.c | 12 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_assembler.c | 89 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_assembler.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_chip.c | 175 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_fragprog.c | 42 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_oglprog.c | 55 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_render.c | 444 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_shader.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_state.c | 61 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_vertprog.c | 83 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_vertprog.h | 8 |
15 files changed, 534 insertions, 785 deletions
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 7d5a7b1ab6..9b7c42042e 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -76,4 +76,3 @@ DRI_LIB_DEPS += $(RADEON_LDFLAGS) include ../Makefile.template -symlinks: diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index 3cfe03a45f..d27a3245a3 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -254,7 +254,7 @@ static int r600_cs_process_relocs(struct radeon_cs *cs, relocs = (struct r600_cs_reloc_legacy *)cs->relocs; restart: for (i = 0; i < cs->crelocs; i++) { - uint32_t soffset, eoffset, asicoffset; + uint32_t soffset, eoffset; r = radeon_bo_legacy_validate(relocs[i].base.bo, &soffset, &eoffset); @@ -262,24 +262,12 @@ restart: goto restart; } if (r) { - fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", + fprintf(stderr, "invalid bo(%p) [0x%08X, 0x%08X]\n", relocs[i].base.bo, soffset, eoffset); return r; } - asicoffset = soffset; for (j = 0; j < relocs[i].cindices; j++) { - if (asicoffset >= eoffset) { - /* radeon_bo_debug(relocs[i].base.bo, 12); */ - fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", - relocs[i].base.bo, soffset, eoffset); - fprintf(stderr, "above end: %p 0x%08X 0x%08X\n", - relocs[i].base.bo, - cs->packets[relocs[i].indices[j]], - eoffset); - exit(0); - return -EINVAL; - } /* pkt3 nop header in ib chunk */ cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000; /* reloc index in ib chunk */ @@ -287,7 +275,7 @@ restart: } /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */ - reloc_chunk[offset_dw] = asicoffset; + reloc_chunk[offset_dw] = soffset; reloc_chunk[offset_dw + 3] = 0; offset_dw += 4; diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 969144ba12..6de151d51b 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -64,6 +64,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r600_cmdbuf.h" #include "r600_emit.h" #include "radeon_bocs_wrapper.h" +#include "radeon_queryobj.h" #include "r700_state.h" #include "r700_ioctl.h" @@ -73,11 +74,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ -/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */ -int future_hw_tcl_on = 1; -int hw_tcl_on = 1; - #define need_GL_VERSION_2_0 +#define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters #define need_GL_ARB_vertex_program #define need_GL_EXT_blend_equation_separate @@ -92,14 +90,13 @@ int hw_tcl_on = 1; #define need_GL_ATI_separate_stencil #define need_GL_NV_vertex_program -#include "extension_helper.h" - -extern const struct tnl_pipeline_stage *r700_pipeline[]; +#include "main/remap_helper.h" -const struct dri_extension card_extensions[] = { +static const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ {"GL_ARB_depth_texture", NULL}, {"GL_ARB_fragment_program", NULL}, + {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, {"GL_ARB_multitexture", NULL}, {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, {"GL_ARB_shadow", NULL}, @@ -131,6 +128,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_texture_lod_bias", NULL}, {"GL_EXT_texture_mirror_clamp", NULL}, {"GL_EXT_texture_rectangle", NULL}, + {"GL_EXT_vertex_array_bgra", NULL}, {"GL_EXT_texture_sRGB", NULL}, {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions}, {"GL_ATI_texture_env_combine3", NULL}, @@ -146,7 +144,7 @@ const struct dri_extension card_extensions[] = { }; -const struct dri_extension mm_extensions[] = { +static const struct dri_extension mm_extensions[] = { { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, { NULL, NULL } }; @@ -155,21 +153,24 @@ const struct dri_extension mm_extensions[] = { * The GL 2.0 functions are needed to make display lists work with * functions added by GL_ATI_separate_stencil. */ -const struct dri_extension gl_20_extension[] = { +static const struct dri_extension gl_20_extension[] = { {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, }; - -static void r600RunPipeline(GLcontext * ctx) -{ - _mesa_lock_context_textures(ctx); - - if (ctx->NewState) - _mesa_update_state_locked(ctx); - - _tnl_run_pipeline(ctx); - _mesa_unlock_context_textures(ctx); -} +static const struct tnl_pipeline_stage *r600_pipeline[] = { + /* Catch any t&l fallbacks + */ + &_tnl_vertex_transform_stage, + &_tnl_normal_transform_stage, + &_tnl_lighting_stage, + &_tnl_fog_coordinate_stage, + &_tnl_texgen_stage, + &_tnl_texture_transform_stage, + &_tnl_point_attenuation_stage, + &_tnl_vertex_program_stage, + &_tnl_render_stage, + 0, +}; static void r600_get_lock(radeonContextPtr rmesa) { @@ -180,7 +181,7 @@ static void r600_get_lock(radeonContextPtr rmesa) if (!rmesa->radeonScreen->kernel_mm) radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom); } -} +} static void r600_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) { @@ -202,6 +203,24 @@ static void r600_fallback(GLcontext *ctx, GLuint bit, GLboolean mode) context->radeon.Fallback &= ~bit; } +static void r600_emit_query_finish(radeonContextPtr radeon) +{ + context_t *context = (context_t*) radeon; + BATCH_LOCALS(&context->radeon); + + struct radeon_query_object *query = radeon->query.current; + + BEGIN_BATCH_NO_AUTOSTATE(4 + 2); + R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2)); + R600_OUT_BATCH(ZPASS_DONE); + R600_OUT_BATCH(query->curr_offset + 8); /* hw writes qwords */ + R600_OUT_BATCH(0x00000000); + R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + static void r600_init_vtbl(radeonContextPtr radeon) { radeon->vtbl.get_lock = r600_get_lock; @@ -210,6 +229,101 @@ static void r600_init_vtbl(radeonContextPtr radeon) radeon->vtbl.swtcl_flush = NULL; radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms; radeon->vtbl.fallback = r600_fallback; + radeon->vtbl.emit_query_finish = r600_emit_query_finish; +} + +static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen) +{ + context_t *r600 = R700_CONTEXT(ctx); + + ctx->Const.MaxTextureImageUnits = + driQueryOptioni(&r600->radeon.optionCache, "texture_image_units"); + ctx->Const.MaxTextureCoordUnits = + driQueryOptioni(&r600->radeon.optionCache, "texture_coord_units"); + ctx->Const.MaxTextureUnits = + MIN2(ctx->Const.MaxTextureImageUnits, + ctx->Const.MaxTextureCoordUnits); + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + ctx->Const.MaxTextureLodBias = 16.0; + + ctx->Const.MaxTextureLevels = 13; /* hw support 14 */ + ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */ + + ctx->Const.MinPointSize = 0x0001 / 8.0; + ctx->Const.MinPointSizeAA = 0x0001 / 8.0; + ctx->Const.MaxPointSize = 0xffff / 8.0; + ctx->Const.MaxPointSizeAA = 0xffff / 8.0; + + ctx->Const.MinLineWidth = 0x0001 / 8.0; + ctx->Const.MinLineWidthAA = 0x0001 / 8.0; + ctx->Const.MaxLineWidth = 0xffff / 8.0; + ctx->Const.MaxLineWidthAA = 0xffff / 8.0; + + ctx->Const.MaxDrawBuffers = 1; /* hw supports 8 */ + + /* 256 for reg-based consts, inline consts also supported */ + ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ + ctx->Const.VertexProgram.MaxNativeInstructions = 8192; + ctx->Const.VertexProgram.MaxNativeAttribs = 160; + ctx->Const.VertexProgram.MaxTemps = 128; + ctx->Const.VertexProgram.MaxNativeTemps = 128; + ctx->Const.VertexProgram.MaxNativeParameters = 256; + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */ + + ctx->Const.FragmentProgram.MaxNativeTemps = 128; + ctx->Const.FragmentProgram.MaxNativeAttribs = 32; + ctx->Const.FragmentProgram.MaxNativeParameters = 256; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; + /* 8 per clause on r6xx, 16 on rv670/r7xx */ + if ((screen->chip_family == CHIP_FAMILY_RV670) || + (screen->chip_family >= CHIP_FAMILY_RV770)) + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16; + else + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8; + ctx->Const.FragmentProgram.MaxNativeInstructions = 8192; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */ + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ +} + +static void r600ParseOptions(context_t *r600, radeonScreenPtr screen) +{ + /* Parse configuration files. + * Do this here so that initialMaxAnisotropy is set before we create + * the default textures. + */ + driParseConfigFiles(&r600->radeon.optionCache, &screen->optionCache, + screen->driScreen->myNum, "r600"); + + r600->radeon.initialMaxAnisotropy = driQueryOptionf(&r600->radeon.optionCache, + "def_max_anisotropy"); + +} + +static void r600InitGLExtensions(GLcontext *ctx) +{ + context_t *r600 = R700_CONTEXT(ctx); + + driInitExtensions(ctx, card_extensions, GL_TRUE); + if (r600->radeon.radeonScreen->kernel_mm) + driInitExtensions(ctx, mm_extensions, GL_FALSE); + + if (driQueryOptionb + (&r600->radeon.optionCache, "disable_stencil_two_side")) + _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); + + if (r600->radeon.glCtx->Mesa_DXTn + && !driQueryOptionb(&r600->radeon.optionCache, "disable_s3tc")) { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + _mesa_enable_extension(ctx, "GL_S3_s3tc"); + } else + if (driQueryOptionb(&r600->radeon.optionCache, "force_s3tc_enable")) + { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + } + + /* XXX: RV740 only seems to report results from half of its DBs */ + if (r600->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV740) + _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); } /* Create the device specific rendering context. @@ -235,19 +349,10 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, return GL_FALSE; } - if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) - hw_tcl_on = future_hw_tcl_on = 0; + r600ParseOptions(r600, screen); + r600->radeon.radeonScreen = screen; r600_init_vtbl(&r600->radeon); - /* Parse configuration files. - * Do this here so that initialMaxAnisotropy is set before we create - * the default textures. - */ - driParseConfigFiles(&r600->radeon.optionCache, &screen->optionCache, - screen->driScreen->myNum, "r600"); - - r600->radeon.initialMaxAnisotropy = driQueryOptionf(&r600->radeon.optionCache, - "def_max_anisotropy"); /* Init default driver functions then plug in our R600-specific functions * (the texture functions are especially important) @@ -257,8 +362,9 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, r700InitStateFuncs(&functions); r600InitTextureFuncs(&functions); r700InitShaderFuncs(&functions); + radeonInitQueryObjFunctions(&functions); r700InitIoctlFuncs(&functions); - radeonInitBufferObjectFuncs(&functions); + radeonInitBufferObjectFuncs(&functions); if (!radeonInitContext(&r600->radeon, &functions, glVisual, driContextPriv, @@ -268,44 +374,14 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, return GL_FALSE; } - /* Init r600 context data */ - /* Set the maximum texture size small enough that we can guarentee that - * all texture units can bind a maximal texture and have them both in - * texturable memory at once. - */ - ctx = r600->radeon.glCtx; - ctx->Const.MaxTextureImageUnits = - driQueryOptioni(&r600->radeon.optionCache, "texture_image_units"); - ctx->Const.MaxTextureCoordUnits = - driQueryOptioni(&r600->radeon.optionCache, "texture_coord_units"); - ctx->Const.MaxTextureUnits = - MIN2(ctx->Const.MaxTextureImageUnits, - ctx->Const.MaxTextureCoordUnits); - ctx->Const.MaxTextureMaxAnisotropy = 16.0; - ctx->Const.MaxTextureLodBias = 16.0; - - ctx->Const.MaxTextureLevels = 13; /* hw support 14 */ - ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */ - - ctx->Const.MinPointSize = 0x0001 / 8.0; - ctx->Const.MinPointSizeAA = 0x0001 / 8.0; - ctx->Const.MaxPointSize = 0xffff / 8.0; - ctx->Const.MaxPointSizeAA = 0xffff / 8.0; - - ctx->Const.MinLineWidth = 0x0001 / 8.0; - ctx->Const.MinLineWidthAA = 0x0001 / 8.0; - ctx->Const.MaxLineWidth = 0xffff / 8.0; - ctx->Const.MaxLineWidthAA = 0xffff / 8.0; + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - /* Needs further modifications */ -#if 0 - ctx->Const.MaxArrayLockSize = - ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4); -#endif + r600InitConstValues(ctx, screen); - ctx->Const.MaxDrawBuffers = 1; + _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); /* Initialize the software rasterizer and helper modules. */ @@ -314,16 +390,12 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_CreateContext(ctx); _swsetup_CreateContext(ctx); _swsetup_Wakeup(ctx); - _ae_create_context(ctx); /* Install the customized pipeline: */ _tnl_destroy_pipeline(ctx); - _tnl_install_pipeline(ctx, r700_pipeline); - - /* Try and keep materials and vertices separate: - */ -/* _tnl_isolate_materials(ctx, GL_TRUE); */ + _tnl_install_pipeline(ctx, r600_pipeline); + TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; /* Configure swrast and TNL to match hardware characteristics: */ @@ -332,65 +404,16 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_allow_pixel_fog(ctx, GL_FALSE); _tnl_allow_vertex_fog(ctx, GL_TRUE); - /* 256 for reg-based consts, inline consts also supported */ - ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ - ctx->Const.VertexProgram.MaxNativeInstructions = 8192; - ctx->Const.VertexProgram.MaxNativeAttribs = 160; - ctx->Const.VertexProgram.MaxTemps = 128; - ctx->Const.VertexProgram.MaxNativeTemps = 128; - ctx->Const.VertexProgram.MaxNativeParameters = 256; - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */ - - ctx->Const.FragmentProgram.MaxNativeTemps = 128; - ctx->Const.FragmentProgram.MaxNativeAttribs = 32; - ctx->Const.FragmentProgram.MaxNativeParameters = 256; - ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; - /* 8 per clause on r6xx, 16 on rv670/r7xx */ - if ((screen->chip_family == CHIP_FAMILY_RV670) || - (screen->chip_family >= CHIP_FAMILY_RV770)) - ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16; - else - ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8; - ctx->Const.FragmentProgram.MaxNativeInstructions = 8192; - ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */ - ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ - ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; - ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - radeon_init_debug(); - driInitExtensions(ctx, card_extensions, GL_TRUE); - if (r600->radeon.radeonScreen->kernel_mm) - driInitExtensions(ctx, mm_extensions, GL_FALSE); - - if (driQueryOptionb - (&r600->radeon.optionCache, "disable_stencil_two_side")) - _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); - - if (r600->radeon.glCtx->Mesa_DXTn - && !driQueryOptionb(&r600->radeon.optionCache, "disable_s3tc")) { - _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); - _mesa_enable_extension(ctx, "GL_S3_s3tc"); - } else - if (driQueryOptionb(&r600->radeon.optionCache, "force_s3tc_enable")) - { - _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); - } - - r700InitDraw(ctx); + r700InitDraw(ctx); radeon_fbo_init(&r600->radeon); radeonInitSpanFuncs( ctx ); - r600InitCmdBuf(r600); - r700InitState(r600->radeon.glCtx); - TNL_CONTEXT(ctx)->Driver.RunPipeline = r600RunPipeline; - - if (driQueryOptionb(&r600->radeon.optionCache, "no_rast")) { - radeon_warning("disabling 3D acceleration\n"); - } + r600InitGLExtensions(ctx); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index a296ea23fa..394fd757d4 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -58,29 +58,6 @@ typedef struct r600_context context_t; #include "main/mm.h" -/************ DMA BUFFERS **************/ - -/* The blit width for texture uploads - */ -#define R600_BLIT_WIDTH_BYTES 1024 -#define R600_MAX_TEXTURE_UNITS 8 - -struct r600_texture_state { - int tc_count; /* number of incoming texture coordinates from VAP */ -}; - -/* Perhaps more if we store programs in vmem? */ -/* drm_r600_cmd_header_t->vpu->count is unsigned char */ -#define VSF_MAX_FRAGMENT_LENGTH (255*4) - -/* Can be tested with colormat currently. */ -#define VSF_MAX_FRAGMENT_TEMPS (14) - -#define STATE_R600_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) -#define STATE_R600_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) - -extern int hw_tcl_on; - #define COLOR_IS_RGBA #define TAG(x) r600##x #include "tnl_dd/t_dd_vertex.h" @@ -126,32 +103,30 @@ struct r600_hw_state { struct radeon_state_atom tx_brdr_clr; }; -typedef struct StreamDesc +typedef struct StreamDesc { GLint size; //number of data element GLenum type; //data element type GLsizei stride; - struct radeon_bo *bo; - GLint bo_offset; + struct radeon_bo *bo; + GLint bo_offset; - GLuint dwords; + GLuint dwords; GLuint dst_loc; GLuint _signed; GLboolean normalize; - GLboolean is_named_bo; - GLubyte element; + GLboolean is_named_bo; + GLubyte element; } StreamDesc; -typedef struct r700_index_buffer +typedef struct r700_index_buffer { - struct radeon_bo *bo; - int bo_offset; + struct radeon_bo *bo; + int bo_offset; - GLboolean is_32bit; - GLuint count; - - GLboolean bHostIb; + GLboolean is_32bit; + GLuint count; } r700_index_buffer; /** @@ -169,10 +144,7 @@ struct r600_context { /* Vertex buffers */ - GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; - GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; - - GLint nNumActiveAos; + GLint nNumActiveAos; StreamDesc stream_desc[VERT_ATTRIB_MAX]; struct r700_index_buffer ind_buf; }; @@ -205,7 +177,6 @@ extern GLboolean r700SyncSurf(context_t *context, uint32_t write_domain, uint32_t sync_type); -extern void r700SetupStreams(GLcontext * ctx); extern void r700Start3D(context_t *context); extern void r600InitAtoms(context_t *context); extern void r700InitDraw(GLcontext *ctx); @@ -215,7 +186,4 @@ extern void r700InitDraw(GLcontext *ctx); #define RADEON_D_PLAYBACK_RAW 2 #define RADEON_D_T 3 -#define r600PackFloat32 radeonPackFloat32 -#define r600PackFloat24 radeonPackFloat24 - #endif /* __R600_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index a083f9afc0..c2f2be1d4c 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -763,7 +763,9 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, struct gl_texture_object *tObj = _mesa_lookup_texture(rmesa->radeon.glCtx, texname); radeonTexObjPtr t = radeon_tex_obj(tObj); - uint32_t pitch_val, size; + int firstlevel = t->mt ? t->mt->firstLevel : 0; + const struct gl_texture_image *firstImage; + uint32_t pitch_val, size, row_align, bpp; if (!tObj) return; @@ -773,7 +775,13 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, if (!offset) return; - size = pitch;//h * w * (depth / 8); + bpp = depth / 8; + if (bpp == 3) + bpp = 4; + + firstImage = t->base.Image[0][firstlevel]; + row_align = rmesa->radeon.texture_row_align - 1; + size = ((firstImage->Width * bpp + row_align) & ~row_align) * firstImage->Height; if (t->bo) { radeon_bo_unref(t->bo); t->bo = NULL; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 903b6968be..917318c02a 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -336,7 +336,8 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) switch (pAsm->D.dst.opcode) { - case SQ_OP2_INST_ADD: + case SQ_OP2_INST_ADD: + case SQ_OP2_INST_KILLGT: case SQ_OP2_INST_MUL: case SQ_OP2_INST_MAX: case SQ_OP2_INST_MIN: @@ -354,9 +355,9 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) return 2; case SQ_OP2_INST_MOV: + case SQ_OP2_INST_MOVA_FLOOR: case SQ_OP2_INST_FRACT: case SQ_OP2_INST_FLOOR: - case SQ_OP2_INST_KILLGT: case SQ_OP2_INST_EXP_IEEE: case SQ_OP2_INST_LOG_CLAMPED: case SQ_OP2_INST_LOG_IEEE: @@ -1307,8 +1308,10 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) case PROGRAM_INPUT: switch (pILInst->SrcReg[0].Index) { + case FRAG_ATTRIB_WPOS: case FRAG_ATTRIB_COL0: case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_FOGC: case FRAG_ATTRIB_TEX0: case FRAG_ATTRIB_TEX1: case FRAG_ATTRIB_TEX2: @@ -1321,7 +1324,16 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; pAsm->S[0].src.rtype = SRC_REG_INPUT; - break; + break; + case FRAG_ATTRIB_FACE: + fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); + break; + case FRAG_ATTRIB_PNTC: + fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); + break; + case FRAG_ATTRIB_VAR0: + fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n"); + break; } break; } @@ -2180,7 +2192,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) } //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; + alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X; if( (is_single_scalar_operation == GL_TRUE) || (GL_TRUE == bSplitInst) ) @@ -2514,6 +2526,35 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm) return GL_TRUE; } +GLboolean assemble_ARL(r700_AssemblerBase *pAsm) +{ /* TODO: ar values dont' persist between clauses */ + if( GL_FALSE == checkop1(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + GLboolean assemble_BAD(char *opcode_str) { radeon_error("Not yet implemented instruction (%s)\n", opcode_str); @@ -2635,7 +2676,7 @@ GLboolean assemble_DOT(r700_AssemblerBase *pAsm) } else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH) { - onecomp_PVSSRC(&(pAsm->S[1].src), 3); + onecomp_PVSSRC(&(pAsm->S[0].src), 3); } if ( GL_FALSE == next_ins(pAsm) ) @@ -2744,15 +2785,15 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm) GLboolean assemble_KIL(r700_AssemblerBase *pAsm) { + /* TODO: doc says KILL has to be last(end) ALU clause */ + checkop1(pAsm); pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT; - - if ( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; pAsm->D.dst.writex = 0; pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; @@ -2765,20 +2806,11 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm) setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; - - if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File) + if ( GL_FALSE == assemble_src(pAsm, 0, 1) ) { - pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number; - } - else - { //PROGRAM_OUTPUT - pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index]; + return GL_FALSE; } - setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); - noswizzle_PVSSRC(&(pAsm->S[1].src)); - if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; @@ -3035,6 +3067,7 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = tmp; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); @@ -3936,8 +3969,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, break; case OPCODE_ARL: - radeon_error("Not yet implemented instruction OPCODE_ARL \n"); - //if ( GL_FALSE == assemble_BAD("ARL") ) + if ( GL_FALSE == assemble_ARL(pR700AsmCode) ) return GL_FALSE; break; case OPCODE_ARR: @@ -4282,6 +4314,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten) { unsigned int unBit; + GLuint export_count = 0; if(pR700AsmCode->depth_export_register_number >= 0) { @@ -4303,6 +4336,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, { return GL_FALSE; } + export_count++; } unBit = 1 << FRAG_RESULT_DEPTH; if(OutputsWritten & unBit) @@ -4316,8 +4350,15 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, { return GL_FALSE; } + export_count++; } - + /* Need to export something, otherwise we'll hang + * results are undefined anyway */ + if(export_count == 0) + { + Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE); + } + if(pR700AsmCode->cf_last_export_ptr != NULL) { pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index 0d4283e4ba..8cbca066e9 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -470,6 +470,7 @@ GLboolean next_ins(r700_AssemblerBase *pAsm); GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode); GLboolean assemble_ABS(r700_AssemblerBase *pAsm); GLboolean assemble_ADD(r700_AssemblerBase *pAsm); +GLboolean assemble_ARL(r700_AssemblerBase *pAsm); GLboolean assemble_BAD(char *opcode_str); GLboolean assemble_CMP(r700_AssemblerBase *pAsm); GLboolean assemble_COS(r700_AssemblerBase *pAsm); diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 3b7f6fffe0..ace3d24f06 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -141,77 +141,10 @@ static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom } } +extern int getTypeSize(GLenum type); static void r700SetupVTXConstants(GLcontext * ctx, - unsigned int nStreamID, void * pAos, - unsigned int size, /* number of elements in vector */ - unsigned int stride, - unsigned int count) /* number of vectors in stream */ -{ - context_t *context = R700_CONTEXT(ctx); - struct radeon_aos * paos = (struct radeon_aos *)pAos; - BATCH_LOCALS(&context->radeon); - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - - unsigned int uSQ_VTX_CONSTANT_WORD0_0; - unsigned int uSQ_VTX_CONSTANT_WORD1_0; - unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0; - unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0; - unsigned int uSQ_VTX_CONSTANT_WORD6_0 = 0; - - if (!paos->bo) - return; - - if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV610) || - (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV620) || - (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS780) || - (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RS880) || - (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV710)) - r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, TC_ACTION_ENA_bit); - else - r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); - - uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; - uSQ_VTX_CONSTANT_WORD1_0 = count * (size * 4) - 1; - - SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */ - SETfield(uSQ_VTX_CONSTANT_WORD2_0, stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, - SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask); - SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(GL_FLOAT, size, NULL), - SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, - SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */ - SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED, - SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); - SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); - - SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask); - SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER, - SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask); - - BEGIN_BATCH_NO_AUTOSTATE(9 + 2); - - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); - R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0); - R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); - R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); - R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0); - R600_OUT_BATCH(0); - R600_OUT_BATCH(0); - R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD6_0); - R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0, - paos->bo, - uSQ_VTX_CONSTANT_WORD0_0, - RADEON_GEM_DOMAIN_GTT, 0, 0); - END_BATCH(); - COMMIT_BATCH(); - -} - -extern int getTypeSize(GLenum type); -static void r700SetupVTXConstants2(GLcontext * ctx, - void * pAos, - StreamDesc * pStreamDesc) + StreamDesc * pStreamDesc) { context_t *context = R700_CONTEXT(ctx); struct radeon_aos * paos = (struct radeon_aos *)pAos; @@ -295,31 +228,6 @@ static void r700SetupVTXConstants2(GLcontext * ctx, } -void r700SetupStreams(GLcontext *ctx) -{ - context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vp = context->selected_vp; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - unsigned int i, j = 0; - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - - R600_STATECHANGE(context, vtx); - - for(i=0; i<VERT_ATTRIB_MAX; i++) { - if(vp->mesa_program->Base.InputsRead & (1 << i)) { - rcommon_emit_vector(ctx, - &context->radeon.tcl.aos[j], - vb->AttribPtr[i]->data, - vb->AttribPtr[i]->size, - vb->AttribPtr[i]->stride, - vb->Count); - j++; - } - } - context->radeon.tcl.aos_count = j; -} - static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); @@ -343,25 +251,12 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) COMMIT_BATCH(); for(i=0; i<VERT_ATTRIB_MAX; i++) { - if(vp->mesa_program->Base.InputsRead & (1 << i)) - { - if(1 == context->selected_vp->uiVersion) - { - /* currently aos are packed */ - r700SetupVTXConstants(ctx, - i, - (void*)(&context->radeon.tcl.aos[j]), - (unsigned int)context->radeon.tcl.aos[j].components, - (unsigned int)context->radeon.tcl.aos[j].stride * 4, - (unsigned int)context->radeon.tcl.aos[j].count); - } - else - { /* context->selected_vp->uiVersion == 2 : aos not always packed */ - r700SetupVTXConstants2(ctx, - (void*)(&context->radeon.tcl.aos[j]), - &(context->stream_desc[j])); - } - j++; + if(vp->mesa_program->Base.InputsRead & (1 << i)) + { + r700SetupVTXConstants(ctx, + (void*)(&context->radeon.tcl.aos[j]), + &(context->stream_desc[j])); + j++; } } } @@ -1205,6 +1100,32 @@ static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom) COMMIT_BATCH(); } +static void r700SendQueryBegin(GLcontext *ctx, struct radeon_state_atom *atom) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = radeon->query.current; + BATCH_LOCALS(radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + /* clear the buffer */ + radeon_bo_map(query->bo, GL_FALSE); + memset(query->bo->ptr, 0, 4 * 2 * sizeof(uint64_t)); /* 4 DBs, 2 qwords each */ + radeon_bo_unmap(query->bo); + + radeon_cs_space_check_with_bo(radeon->cmdbuf.cs, + query->bo, + 0, RADEON_GEM_DOMAIN_GTT); + + BEGIN_BATCH_NO_AUTOSTATE(4 + 2); + R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2)); + R600_OUT_BATCH(ZPASS_DONE); + R600_OUT_BATCH(query->curr_offset); /* hw writes qwords */ + R600_OUT_BATCH(0x00000000); + R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + query->emitted_begin = GL_TRUE; +} + static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) { return atom->cmd_size; @@ -1313,6 +1234,20 @@ static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom) return count; } +static int check_queryobj(GLcontext *ctx, struct radeon_state_atom *atom) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = radeon->query.current; + int count; + + if (!query || query->emitted_begin) + count = 0; + else + count = atom->cmd_size; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + return count; +} + #define ALLOC_STATE( ATOM, CHK, SZ, EMIT ) \ do { \ context->atoms.ATOM.cmd_size = (SZ); \ @@ -1326,6 +1261,19 @@ do { \ insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \ } while (0) +static void r600_init_query_stateobj(radeonContextPtr radeon, int SZ) +{ + radeon->query.queryobj.cmd_size = (SZ); + radeon->query.queryobj.cmd = NULL; + radeon->query.queryobj.name = "queryobj"; + radeon->query.queryobj.idx = 0; + radeon->query.queryobj.check = check_queryobj; + radeon->query.queryobj.dirty = GL_FALSE; + radeon->query.queryobj.emit = r700SendQueryBegin; + radeon->hw.max_state_size += (SZ); + insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj); +} + void r600InitAtoms(context_t *context) { radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context); @@ -1365,6 +1313,7 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState); ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState); ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState); + r600_init_query_stateobj(&context->radeon, 6 * 2); context->radeon.hw.is_dirty = GL_TRUE; context->radeon.hw.all_dirty = GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 78ce3ae436..0f549ead9c 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -135,15 +135,19 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, { GLuint i, j; GLint * puiTEMPwrites; + GLint * puiTEMPreads; struct prog_instruction * pILInst; InstDeps *pInstDeps; struct prog_instruction * texcoord_DepInst; GLint nDepInstID; puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries); + puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries); + for(i=0; i<mesa_fp->Base.NumTemporaries; i++) { puiTEMPwrites[i] = -1; + puiTEMPreads[i] = -1; } pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions); @@ -167,6 +171,11 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, { //Set dep. pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index]; + //Set first read + if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 ) + { + puiTEMPreads[pILInst->SrcReg[j].Index] = i; + } } else { @@ -177,8 +186,6 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, fp->r700AsmCode.pInstDeps = pInstDeps; - FREE(puiTEMPwrites); - //Find dep for tex inst for(i=0; i<mesa_fp->Base.NumInstructions; i++) { @@ -203,9 +210,25 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, { //... other deps? } } + // make sure that we dont overwrite src used earlier + nDepInstID = puiTEMPreads[pILInst->DstReg.Index]; + if(nDepInstID < i) + { + pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index]; + texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]); + if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) ) + { + pInstDeps[nDepInstID].nDstDep = i; + } + + } + } } + FREE(puiTEMPwrites); + FREE(puiTEMPreads); + return GL_TRUE; } @@ -251,7 +274,15 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, number_of_colors_exported--; } - fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled; + /* illegal to set this to 0 */ + if(number_of_colors_exported || z_enabled) + { + fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled; + } + else + { + fp->r700Shader.exportMode = (1 << 1); + } fp->translated = GL_TRUE; @@ -341,6 +372,11 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); } + else + { + CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); + CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); + } ui = (unNumOfReg < ui) ? ui : unNumOfReg; diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c index 5290ef31be..0d476fcd86 100644 --- a/src/mesa/drivers/dri/r600/r700_oglprog.c +++ b/src/mesa/drivers/dri/r600/r700_oglprog.c @@ -40,6 +40,24 @@ #include "r700_vertprog.h" +static void freeVertProgCache(GLcontext *ctx, struct r700_vertex_program_cont *cache) +{ + struct r700_vertex_program *tmp, *vp = cache->progs; + + while (vp) { + tmp = vp->next; + /* Release DMA region */ + r600DeleteShader(ctx, vp->shaderbo); + /* Clean up */ + Clean_Up_Assembler(&(vp->r700AsmCode)); + Clean_Up_Shader(&(vp->r700Shader)); + + _mesa_reference_vertprog(ctx, &vp->mesa_program, NULL); + _mesa_free(vp); + vp = tmp; + } +} + static struct gl_program *r700NewProgram(GLcontext * ctx, GLenum target, GLuint id) @@ -84,8 +102,7 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { - struct r700_vertex_program_cont * vpc; - struct r700_vertex_program *vp, *tmp; + struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog; struct r700_fragment_program * fp; radeon_print(RADEON_SHADER, RADEON_VERBOSE, @@ -95,20 +112,7 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { case GL_VERTEX_STATE_PROGRAM_NV: case GL_VERTEX_PROGRAM_ARB: - vpc = (struct r700_vertex_program_cont*)prog; - vp = vpc->progs; - while (vp) { - tmp = vp->next; - /* Release DMA region */ - - r600DeleteShader(ctx, vp->shaderbo); - - /* Clean up */ - Clean_Up_Assembler(&(vp->r700AsmCode)); - Clean_Up_Shader(&(vp->r700Shader)); - _mesa_free(vp); - vp = tmp; - } + freeVertProgCache(ctx, vpc); break; case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: @@ -131,7 +135,24 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) static void r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { - + struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog; + struct r700_fragment_program * fp = (struct r700_fragment_program*)prog; + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + freeVertProgCache(ctx, vpc); + vpc->progs = NULL; + break; + case GL_FRAGMENT_PROGRAM_ARB: + r600DeleteShader(ctx, fp->shaderbo); + Clean_Up_Assembler(&(fp->r700AsmCode)); + Clean_Up_Shader(&(fp->r700Shader)); + fp->translated = GL_FALSE; + fp->loaded = GL_FALSE; + fp->shaderbo = NULL; + break; + } + } static GLboolean r700IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 5627984cf9..9cf984f966 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -251,19 +251,19 @@ static int r700NumVerts(int num_verts, int prim) static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) { - context_t *context = R700_CONTEXT(ctx); - BATCH_LOCALS(&context->radeon); - int type, i, total_emit; - int num_indices; - uint32_t vgt_draw_initiator = 0; - uint32_t vgt_index_type = 0; - uint32_t vgt_primitive_type = 0; - uint32_t vgt_num_indices = 0; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, i, total_emit; + int num_indices; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + TNLcontext *tnl = TNL_CONTEXT(ctx); + struct vertex_buffer *vb = &tnl->vb; GLboolean bUseDrawIndex; - if( (NULL != context->ind_buf.bo) && (GL_TRUE != context->ind_buf.bHostIb) ) + + if(NULL != context->ind_buf.bo) { bUseDrawIndex = GL_TRUE; } @@ -272,35 +272,35 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim bUseDrawIndex = GL_FALSE; } - type = r700PrimitiveType(prim); - num_indices = r700NumVerts(end - start, prim); + type = r700PrimitiveType(prim); + num_indices = r700NumVerts(end - start, prim); - radeon_print(RADEON_RENDER, RADEON_TRACE, - "%s type %x num_indices %d\n", - __func__, type, num_indices); + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); - if (type < 0 || num_indices <= 0) - return; + if (type < 0 || num_indices <= 0) + return; if(GL_TRUE == bUseDrawIndex) { total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ - + 5+2; /* DRAW_INDEX */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 5 + 2; /* DRAW_INDEX */ } else { total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ - + num_indices + 3; /* DRAW_INDEX_IMMD */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + num_indices + 3; /* DRAW_INDEX_IMMD */ } BEGIN_BATCH_NO_AUTOSTATE(total_emit); - // prim + // prim SETfield(vgt_primitive_type, type, - VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); R600_OUT_BATCH(vgt_primitive_type); @@ -319,11 +319,11 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); R600_OUT_BATCH(vgt_index_type); - // num instances - R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); - R600_OUT_BATCH(1); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); - // draw packet + // draw packet vgt_num_indices = num_indices; if(GL_TRUE == bUseDrawIndex) @@ -354,44 +354,17 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); R600_OUT_BATCH(vgt_num_indices); R600_OUT_BATCH(vgt_draw_initiator); - } - if(NULL == context->ind_buf.bo) - { - for (i = start; i < (start + num_indices); i++) { + for (i = start; i < (start + num_indices); i++) + { if(vb->Elts) { R600_OUT_BATCH(vb->Elts[i]); } else + { R600_OUT_BATCH(i); - } - } - else - { - if(GL_TRUE == context->ind_buf.bHostIb) - { - if(GL_TRUE != context->ind_buf.is_32bit) - { - GLushort * pIndex = (GLushort*)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - pIndex += start; - for (i = 0; i < num_indices; i++) - { - R600_OUT_BATCH(*pIndex); - pIndex++; - } - } - else - { - GLuint * pIndex = (GLuint*)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - pIndex += start; - - for (i = 0; i < num_indices; i++) - { - R600_OUT_BATCH(*pIndex); - pIndex++; - } - } + } } } @@ -402,173 +375,40 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim /* start 3d, idle, cb/db flush */ #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14 -static GLuint r700PredictRenderSize(GLcontext* ctx, GLuint nr_prims) +static GLuint r700PredictRenderSize(GLcontext* ctx, + const struct _mesa_prim *prim, + const struct _mesa_index_buffer *ib, + GLuint nr_prims) { context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vp = context->selected_vp; GLboolean flushed; GLuint dwords, i; GLuint state_size; - /* pre calculate aos count so state prediction works */ - context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead); dwords = PRE_EMIT_STATE_BUFSZ; - if (nr_prims) + if (ib) dwords += nr_prims * 14; else { - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - - for (i = 0; i < vb->PrimitiveCount; i++) - dwords += vb->Primitive[i].count + 10; + for (i = 0; i < nr_prims; ++i) + { + dwords += prim[i].count + 10; + } } + state_size = radeonCountStateEmitSize(&context->radeon); flushed = rcommonEnsureCmdBufSpace(&context->radeon, - dwords + state_size, __FUNCTION__); - + dwords + state_size, + __FUNCTION__); if (flushed) - dwords += radeonCountStateEmitSize(&context->radeon); + dwords += radeonCountStateEmitSize(&context->radeon); else - dwords += state_size; + dwords += state_size; - radeon_print(RADEON_RENDER, RADEON_VERBOSE, - "%s: total prediction size is %d.\n", __FUNCTION__, dwords); + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); return dwords; -} - -static GLboolean r700RunRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) -{ - context_t *context = R700_CONTEXT(ctx); - radeonContextPtr radeon = &context->radeon; - unsigned int i, id = 0; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - struct radeon_renderbuffer *rrb; - - radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n", - __func__, context->radeon.cmdbuf.cs->cdw); - - /* always emit CB base to prevent - * lock ups on some chips. - */ - R600_STATECHANGE(context, cb_target); - /* mark vtx as dirty since it changes per-draw */ - R600_STATECHANGE(context, vtx); - - r700SetScissor(context); - r700SetupVertexProgram(ctx); - r700SetupFragmentProgram(ctx); - r600UpdateTextureState(ctx); - - GLuint emit_end = r700PredictRenderSize(ctx, 0) - + context->radeon.cmdbuf.cs->cdw; - r700SetupStreams(ctx); - - radeonEmitState(radeon); - - radeon_debug_add_indent(); - /* richard test code */ - for (i = 0; i < vb->PrimitiveCount; i++) { - GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); - GLuint start = vb->Primitive[i].start; - GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; - r700RunRenderPrimitive(ctx, start, end, prim); - } - radeon_debug_remove_indent(); - - /* Flush render op cached for last several quads. */ - r700WaitForIdleClean(context); - - rrb = radeon_get_colorbuffer(&context->radeon); - if (rrb && rrb->bo) - r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, - CB_ACTION_ENA_bit | (1 << (id + 6))); - - rrb = radeon_get_depthbuffer(&context->radeon); - if (rrb && rrb->bo) - r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, - DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); - - radeonReleaseArrays(ctx, ~0); - radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n", - __func__, context->radeon.cmdbuf.cs->cdw); - - if ( emit_end < context->radeon.cmdbuf.cs->cdw ) - WARN_ONCE("Rendering was %d commands larger than predicted size." - " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); - - return GL_FALSE; -} - -static GLboolean r700RunNonTCLRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) /* -------------------- */ -{ - GLboolean bRet = GL_TRUE; - - return bRet; } -static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/ - struct tnl_pipeline_stage *stage) -{ - GLboolean bRet = GL_FALSE; - - /* TODO : sw fallback */ - - /* Need shader bo's setup before bo check */ - r700UpdateShaders(ctx); - /** - - * Ensure all enabled and complete textures are uploaded along with any buffers being used. - */ - if(!r600ValidateBuffers(ctx)) - { - return GL_TRUE; - } - - bRet = r700RunRender(ctx, stage); - - return bRet; - //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline - //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success. -} - -const struct tnl_pipeline_stage _r700_render_stage = { - "r700 Hardware Rasterization", - NULL, - NULL, - NULL, - NULL, - r700RunNonTCLRender -}; - -const struct tnl_pipeline_stage _r700_tcl_stage = { - "r700 Hardware Transform, Clipping and Lighting", - NULL, - NULL, - NULL, - NULL, - r700RunTCLRender -}; - -const struct tnl_pipeline_stage *r700_pipeline[] = -{ - &_r700_tcl_stage, - &_tnl_vertex_transform_stage, - &_tnl_normal_transform_stage, - &_tnl_lighting_stage, - &_tnl_fog_coordinate_stage, - &_tnl_texgen_stage, - &_tnl_texture_transform_stage, - &_tnl_vertex_program_stage, - - &_r700_render_stage, - &_tnl_render_stage, - 0, -}; - #define CONVERT( TYPE, MACRO ) do { \ GLuint i, j, sz; \ sz = input->Size; \ @@ -710,7 +550,7 @@ static void r700AlignDataToDword(GLcontext *ctx, attr->stride = dst_stride; } -static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *input[], int count) +static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input[], int count) { context_t *context = R700_CONTEXT(ctx); GLuint stride; @@ -778,19 +618,15 @@ static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *inpu { case 1: radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); - context->stream_desc[index].stride = 4; break; case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); - context->stream_desc[index].stride = 8; break; case 3: radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); - context->stream_desc[index].stride = 12; break; case 4: radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); - context->stream_desc[index].stride = 16; break; default: assert(0); @@ -813,7 +649,6 @@ static void r700SetupStreams2(GLcontext *ctx, const struct gl_client_array *inpu } } - context->radeon.tcl.aos_count = context->nNumActiveAos; ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs, first_elem(&context->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0); @@ -826,30 +661,21 @@ static void r700FreeData(GLcontext *ctx) * called during context destroy */ context_t *context = R700_CONTEXT(ctx); - + int i; - for (i = 0; i < context->nNumActiveAos; i++) + for (i = 0; i < context->nNumActiveAos; i++) { - if (!context->stream_desc[i].is_named_bo) + if (!context->stream_desc[i].is_named_bo) { radeon_bo_unref(context->stream_desc[i].bo); } context->radeon.tcl.aos[i].bo = NULL; } - - if (context->ind_buf.bo != NULL) + + if (context->ind_buf.bo != NULL) { - if(context->ind_buf.bHostIb != GL_TRUE) - { radeon_bo_unref(context->ind_buf.bo); - } - else - { - FREE(context->ind_buf.bo->ptr); - FREE(context->ind_buf.bo); - context->ind_buf.bo = NULL; - } } } @@ -861,7 +687,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer int i; GLboolean mapped_named_bo = GL_FALSE; - if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); mapped_named_bo = GL_TRUE; @@ -869,66 +695,46 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer } src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); - if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) { GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); GLubyte *in = (GLubyte *)src_ptr; - if(context->ind_buf.bHostIb != GL_TRUE) - { - radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, - &context->ind_buf.bo_offset, size, 4); + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); - assert(context->ind_buf.bo->ptr != NULL); - out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - } - else - { - context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); - context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); - context->ind_buf.bo_offset = 0; - out = (GLuint *)context->ind_buf.bo->ptr; - } + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; } - if (i < mesa_ind_buf->count) + if (i < mesa_ind_buf->count) { *out++ = in[i]; } #if MESA_BIG_ENDIAN - } - else + } + else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ GLushort *in = (GLushort *)src_ptr; GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); - if(context->ind_buf.bHostIb != GL_TRUE) - { - radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, - &context->ind_buf.bo_offset, size, 4); + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); - assert(context->ind_buf.bo->ptr != NULL); - out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - } - else - { - context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); - context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); - context->ind_buf.bo_offset = 0; - out = (GLuint *)context->ind_buf.bo->ptr; - } + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) { *out++ = in[i] | in[i + 1] << 16; } - if (i < mesa_ind_buf->count) + if (i < mesa_ind_buf->count) { *out++ = in[i]; } @@ -938,7 +744,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer context->ind_buf.is_32bit = GL_FALSE; context->ind_buf.count = mesa_ind_buf->count; - if (mapped_named_bo) + if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); } @@ -953,20 +759,18 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer return; } - context->ind_buf.bHostIb = GL_FALSE; - #if MESA_BIG_ENDIAN - if (mesa_ind_buf->type == GL_UNSIGNED_INT) + if (mesa_ind_buf->type == GL_UNSIGNED_INT) { #else - if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) { #endif const GLvoid *src_ptr; GLvoid *dst_ptr; GLboolean mapped_named_bo = GL_FALSE; - if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) { ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); assert(mesa_ind_buf->obj->Pointer != NULL); @@ -977,44 +781,34 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); - if(context->ind_buf.bHostIb != GL_TRUE) - { - radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, - &context->ind_buf.bo_offset, size, 4); - assert(context->ind_buf.bo->ptr != NULL); - dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); - } - else - { - context->ind_buf.bo = MALLOC_STRUCT(radeon_bo); - context->ind_buf.bo->ptr = ALIGN_MALLOC(size, 4); - context->ind_buf.bo_offset = 0; - dst_ptr = context->ind_buf.bo->ptr; - } + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + assert(context->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); _mesa_memcpy(dst_ptr, src_ptr, size); context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); context->ind_buf.count = mesa_ind_buf->count; - if (mapped_named_bo) + if (mapped_named_bo) { ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); } - } - else + } + else { r700FixupIndexBuffer(ctx, mesa_ind_buf); } } static GLboolean r700TryDrawPrims(GLcontext *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_prim *prim, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLuint min_index, - GLuint max_index ) + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) { context_t *context = R700_CONTEXT(ctx); radeonContextPtr radeon = &context->radeon; @@ -1022,15 +816,12 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, struct radeon_renderbuffer *rrb; if (ctx->NewState) - { _mesa_update_state( ctx ); - } _tnl_UpdateFixedFunctionProgram(ctx); r700SetVertexFormat(ctx, arrays, max_index + 1); - r700SetupIndexBuffer(ctx, ib); /* shaders need to be updated before buffers are validated */ - r700UpdateShaders2(ctx); + r700UpdateShaders(ctx); if (!r600ValidateBuffers(ctx)) return GL_FALSE; @@ -1046,10 +837,11 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, r700SetupFragmentProgram(ctx); r600UpdateTextureState(ctx); - GLuint emit_end = r700PredictRenderSize(ctx, nr_prims) + GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims) + context->radeon.cmdbuf.cs->cdw; - r700SetupStreams2(ctx, arrays, max_index + 1); + r700SetupIndexBuffer(ctx, ib); + r700SetupStreams(ctx, arrays, max_index + 1); radeonEmitState(radeon); @@ -1087,18 +879,18 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, return GL_TRUE; } -static void r700DrawPrimsRe(GLcontext *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_prim *prim, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLboolean index_bounds_valid, - GLuint min_index, - GLuint max_index) +static void r700DrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) { - GLboolean retval = GL_FALSE; + GLboolean retval = GL_FALSE; - /* This check should get folded into just the places that + /* This check should get folded into just the places that * min/max index are really needed. */ if (!index_bounds_valid) { @@ -1106,7 +898,7 @@ static void r700DrawPrimsRe(GLcontext *ctx, } if (min_index) { - vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrimsRe ); + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims ); return; } @@ -1118,38 +910,12 @@ static void r700DrawPrimsRe(GLcontext *ctx, _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); } -static void r700DrawPrims(GLcontext *ctx, - const struct gl_client_array *arrays[], - const struct _mesa_prim *prim, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLboolean index_bounds_valid, - GLuint min_index, - GLuint max_index) -{ - context_t *context = R700_CONTEXT(ctx); - - /* For non indexed drawing, using tnl pipe. */ - if(!ib) - { - context->ind_buf.bo = NULL; - - _tnl_vbo_draw_prims(ctx, arrays, prim, nr_prims, ib, - index_bounds_valid, min_index, max_index); - return; - } - - r700DrawPrimsRe(ctx, arrays, prim, nr_prims, ib, index_bounds_valid, min_index, max_index); -} - void r700InitDraw(GLcontext *ctx) { struct vbo_context *vbo = vbo_context(ctx); - + /* to be enabled */ - /* vbo->draw_prims = r700DrawPrims; - */ } diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h index 997cb05aaf..c6a058617e 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.h +++ b/src/mesa/drivers/dri/r600/r700_shader.h @@ -128,6 +128,7 @@ typedef struct R700_Shader //Internal void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst); +void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst); void ResolveLinks(R700_Shader *pShader); void Assemble(R700_Shader *pShader); diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index 65f83b8315..2b42bfa3f9 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -60,12 +60,9 @@ static void r700UpdatePolygonMode(GLcontext * ctx); static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state); static void r700SetStencilState(GLcontext * ctx, GLboolean state); -void r700UpdateShaders (GLcontext * ctx) //---------------------------------- +void r700UpdateShaders(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); - GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; - GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; - int i; /* should only happenen once, just after context is created */ /* TODO: shouldn't we fallback to sw here? */ @@ -76,40 +73,7 @@ void r700UpdateShaders (GLcontext * ctx) //---------------------------------- r700SelectFragmentShader(ctx); - if (context->radeon.NewGLState) { - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - /* mat states from state var not array for sw */ - dummy_attrib[i].stride = 0; - temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i]; - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &(dummy_attrib[i]); - } - - _tnl_UpdateFixedFunctionProgram(ctx); - - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = temp_attrib[i]; - } - } - - r700SelectVertexShader(ctx, 1); - r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); - context->radeon.NewGLState = 0; -} - -void r700UpdateShaders2(GLcontext * ctx) -{ - context_t *context = R700_CONTEXT(ctx); - - /* should only happenen once, just after context is created */ - /* TODO: shouldn't we fallback to sw here? */ - if (!ctx->FragmentProgram._Current) { - _mesa_fprintf(stderr, "No ctx->FragmentProgram._Current!!\n"); - return; - } - - r700SelectFragmentShader(ctx); - - r700SelectVertexShader(ctx, 2); + r700SelectVertexShader(ctx); r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); context->radeon.NewGLState = 0; } @@ -492,10 +456,10 @@ static void r700SetBlendState(GLcontext * ctx) eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask); SETfield(blend_reg, - blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE), + blend_factor(ctx->Color.BlendSrcA, GL_TRUE), ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask); SETfield(blend_reg, - blend_factor(ctx->Color.BlendDstRGB, GL_FALSE), + blend_factor(ctx->Color.BlendDstA, GL_FALSE), ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask); switch (ctx->Color.BlendEquationA) { @@ -770,9 +734,9 @@ static void r700ColorMask(GLcontext * ctx, (b ? 4 : 0) | (a ? 8 : 0)); - if (mask != r700->CB_SHADER_MASK.u32All) { + if (mask != r700->CB_TARGET_MASK.u32All) { R600_STATECHANGE(context, cb); - SETfield(r700->CB_SHADER_MASK.u32All, mask, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask); + SETfield(r700->CB_TARGET_MASK.u32All, mask, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask); } } @@ -1294,11 +1258,15 @@ void r700SetScissor(context_t *context) //--------------- return; } if (context->radeon.state.scissor.enabled) { - /* r600 has exclusive scissors */ x1 = context->radeon.state.scissor.rect.x1; y1 = context->radeon.state.scissor.rect.y1; - x2 = context->radeon.state.scissor.rect.x2 + 1; - y2 = context->radeon.state.scissor.rect.y2 + 1; + x2 = context->radeon.state.scissor.rect.x2; + y2 = context->radeon.state.scissor.rect.y2; + /* r600 has exclusive BR scissors */ + if (context->radeon.radeonScreen->kernel_mm) { + x2++; + y2++; + } } else { if (context->radeon.radeonScreen->driScreen->dri2.enabled) { x1 = 0; @@ -1706,6 +1674,7 @@ void r700InitState(GLcontext * ctx) //------------------- SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); + SETbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit); r700->DB_ALPHA_TO_MASK.u32All = 0; SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask); @@ -1779,7 +1748,7 @@ void r700InitState(GLcontext * ctx) //------------------- r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF; /* screen/window/view */ - SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask); + SETfield(r700->CB_SHADER_MASK.u32All, 0xF, (4 * id), OUTPUT0_ENABLE_mask); context->radeon.hw.all_dirty = GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index e7a209be9d..ffc6068bd8 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -203,22 +203,11 @@ void Map_Vertex_Program(GLcontext *ctx, pAsm->number_used_registers += num_inputs; // Create VFETCH instructions for inputs - if(1 == vp->uiVersion) - { - if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) - { - radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); - return; - } - } - else - { - if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) ) - { - radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n"); - return; - } - } + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) ) + { + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n"); + return; + } // Map Outputs pAsm->number_of_exports = Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers); @@ -228,7 +217,7 @@ void Map_Vertex_Program(GLcontext *ctx, pAsm->number_used_registers += pAsm->number_of_exports; pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports); - + for(ui=0; ui<pAsm->number_of_exports; ui++) { pAsm->pucOutMask[ui] = 0x0; @@ -245,7 +234,7 @@ void Map_Vertex_Program(GLcontext *ctx, { /* fix func t_vp uses NumTemporaries */ pAsm->number_used_registers += mesa_vp->Base.NumTemporaries; } - + pAsm->uFirstHelpReg = pAsm->number_used_registers; } @@ -300,18 +289,13 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, } struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, - struct gl_vertex_program *mesa_vp, - GLint nVer) + struct gl_vertex_program *mesa_vp) { context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program *vp; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - unsigned int unBit; unsigned int i; vp = _mesa_calloc(sizeof(*vp)); - vp->uiVersion = nVer; vp->mesa_program = (struct gl_vertex_program *)_mesa_clone_program(ctx, &mesa_vp->Base); if (mesa_vp->IsPositionInvariant) @@ -319,17 +303,13 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, _mesa_insert_mvp_code(ctx, vp->mesa_program); } - for(i=0; i<VERT_ATTRIB_MAX; i++) + for(i=0; i<context->nNumActiveAos; i++) { - unBit = 1 << i; - if(vp->mesa_program->Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */ - { - vp->aos_desc[i].size = vb->AttribPtr[i]->size; - vp->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/ - vp->aos_desc[i].type = GL_FLOAT; - } + vp->aos_desc[i].size = context->stream_desc[i].size; + vp->aos_desc[i].stride = context->stream_desc[i].stride; + vp->aos_desc[i].type = context->stream_desc[i].type; } - + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { vp->r700AsmCode.bR6xx = 1; @@ -342,14 +322,14 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program)) { return NULL; - } + } if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions, - &(vp->mesa_program->Base.Instructions[0]), + &(vp->mesa_program->Base.Instructions[0]), &(vp->r700AsmCode)) ) { return NULL; - } + } if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) ) { @@ -366,14 +346,11 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, return vp; } -void r700SelectVertexShader(GLcontext *ctx, GLint nVersion) +void r700SelectVertexShader(GLcontext *ctx) { context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program_cont *vpc; struct r700_vertex_program *vp; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - unsigned int unBit; unsigned int i; GLboolean match; GLbitfield InputsRead; @@ -384,29 +361,27 @@ void r700SelectVertexShader(GLcontext *ctx, GLint nVersion) if (vpc->mesa_program.IsPositionInvariant) { InputsRead |= VERT_BIT_POS; - } - + } + for (vp = vpc->progs; vp; vp = vp->next) { - match = GL_TRUE; - for(i=0; i<VERT_ATTRIB_MAX; i++) + match = GL_TRUE; + for(i=0; i<context->nNumActiveAos; i++) { - unBit = 1 << i; - if(InputsRead & unBit) + if (vp->aos_desc[i].size != context->stream_desc[i].size) { - if (vp->aos_desc[i].size != vb->AttribPtr[i]->size) - match = GL_FALSE; - break; + match = GL_FALSE; + break; } } - if (match) + if (match) { context->selected_vp = vp; return; } } - vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program), nVersion); + vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program)); if(!vp) { radeon_error("Failed to translate vertex shader. \n"); @@ -540,6 +515,11 @@ void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], unsigned int unBit = mesa_vp->Base.InputsRead; context->nNumActiveAos = 0; + if (mesa_vp->IsPositionInvariant) + { + unBit |= VERT_BIT_POS; + } + while(unBit) { if(unBit & 1) @@ -550,6 +530,7 @@ void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], unBit >>= 1; ++unLoc; } + context->radeon.tcl.aos_count = context->nNumActiveAos; } void * r700GetActiveVpShaderBo(GLcontext * ctx) diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h index f9a3e395ee..00824c29d3 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.h +++ b/src/mesa/drivers/dri/r600/r700_vertprog.h @@ -52,8 +52,7 @@ struct r700_vertex_program GLboolean translated; GLboolean loaded; - GLint uiVersion; - + void * shaderbo; ArrayDesc aos_desc[VERT_ATTRIB_MAX]; @@ -87,11 +86,10 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, - struct gl_vertex_program *mesa_vp, - GLint nVer); + struct gl_vertex_program *mesa_vp); /* Interface */ -extern void r700SelectVertexShader(GLcontext *ctx, GLint nVersion); +extern void r700SelectVertexShader(GLcontext *ctx); extern void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count); extern GLboolean r700SetupVertexProgram(GLcontext * ctx); |