diff options
author | Michal Krol <michal@vmware.com> | 2009-12-12 16:48:32 +0100 |
---|---|---|
committer | Michal Krol <michal@vmware.com> | 2009-12-12 16:48:32 +0100 |
commit | a3eb0f718e19653a2ad8e49396c904183be456f3 (patch) | |
tree | 0092574c469ea586a6cab8b8ebb7ac62b8221a2a /src/mesa/drivers/dri/r600 | |
parent | 491f384c3958067e6c4c994041f5d8d413b806bc (diff) | |
parent | 784cca9fa527de771754d76545970f78094b9adf (diff) |
Merge branch 'master' into glsl-pp-rework-2
Conflicts:
progs/perf/drawoverhead.c
progs/perf/teximage.c
progs/perf/vbo.c
progs/perf/vertexrate.c
src/mesa/shader/slang/library/slang_common_builtin_gc.h
Diffstat (limited to 'src/mesa/drivers/dri/r600')
23 files changed, 4791 insertions, 994 deletions
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 36bf773c05..9b7c42042e 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -29,6 +29,7 @@ COMMON_SOURCES = \ RADEON_COMMON_SOURCES = \ radeon_bo_legacy.c \ radeon_common_context.c \ + radeon_buffer_objects.c \ radeon_common.c \ radeon_cs_legacy.c \ radeon_dma.c \ @@ -75,4 +76,3 @@ DRI_LIB_DEPS += $(RADEON_LDFLAGS) include ../Makefile.template -symlinks: diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index 3cfe03a45f..d27a3245a3 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -254,7 +254,7 @@ static int r600_cs_process_relocs(struct radeon_cs *cs, relocs = (struct r600_cs_reloc_legacy *)cs->relocs; restart: for (i = 0; i < cs->crelocs; i++) { - uint32_t soffset, eoffset, asicoffset; + uint32_t soffset, eoffset; r = radeon_bo_legacy_validate(relocs[i].base.bo, &soffset, &eoffset); @@ -262,24 +262,12 @@ restart: goto restart; } if (r) { - fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", + fprintf(stderr, "invalid bo(%p) [0x%08X, 0x%08X]\n", relocs[i].base.bo, soffset, eoffset); return r; } - asicoffset = soffset; for (j = 0; j < relocs[i].cindices; j++) { - if (asicoffset >= eoffset) { - /* radeon_bo_debug(relocs[i].base.bo, 12); */ - fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n", - relocs[i].base.bo, soffset, eoffset); - fprintf(stderr, "above end: %p 0x%08X 0x%08X\n", - relocs[i].base.bo, - cs->packets[relocs[i].indices[j]], - eoffset); - exit(0); - return -EINVAL; - } /* pkt3 nop header in ib chunk */ cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000; /* reloc index in ib chunk */ @@ -287,7 +275,7 @@ restart: } /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */ - reloc_chunk[offset_dw] = asicoffset; + reloc_chunk[offset_dw] = soffset; reloc_chunk[offset_dw + 3] = 0; offset_dw += 4; diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index f8fd9c13d7..25314eff56 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -59,10 +59,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "radeon_debug.h" #include "r600_context.h" #include "radeon_common_context.h" +#include "radeon_buffer_objects.h" #include "radeon_span.h" #include "r600_cmdbuf.h" #include "r600_emit.h" #include "radeon_bocs_wrapper.h" +#include "radeon_queryobj.h" #include "r700_state.h" #include "r700_ioctl.h" @@ -72,11 +74,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "utils.h" #include "xmlpool.h" /* for symbolic values of enum-type options */ -/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */ -int future_hw_tcl_on = 1; -int hw_tcl_on = 1; +//#define R600_ENABLE_GLSL_TEST 1 #define need_GL_VERSION_2_0 +#define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters #define need_GL_ARB_vertex_program #define need_GL_EXT_blend_equation_separate @@ -91,14 +92,14 @@ int hw_tcl_on = 1; #define need_GL_ATI_separate_stencil #define need_GL_NV_vertex_program -#include "extension_helper.h" +#include "main/remap_helper.h" -extern const struct tnl_pipeline_stage *r700_pipeline[]; - -const struct dri_extension card_extensions[] = { +static const struct dri_extension card_extensions[] = { /* *INDENT-OFF* */ + {"GL_ARB_depth_clamp", NULL}, {"GL_ARB_depth_texture", NULL}, {"GL_ARB_fragment_program", NULL}, + {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions}, {"GL_ARB_multitexture", NULL}, {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, {"GL_ARB_shadow", NULL}, @@ -110,6 +111,7 @@ const struct dri_extension card_extensions[] = { {"GL_ARB_texture_env_crossbar", NULL}, {"GL_ARB_texture_env_dot3", NULL}, {"GL_ARB_texture_mirrored_repeat", NULL}, + {"GL_ARB_texture_non_power_of_two", NULL}, {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, @@ -130,6 +132,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_texture_lod_bias", NULL}, {"GL_EXT_texture_mirror_clamp", NULL}, {"GL_EXT_texture_rectangle", NULL}, + {"GL_EXT_vertex_array_bgra", NULL}, {"GL_EXT_texture_sRGB", NULL}, {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions}, {"GL_ATI_texture_env_combine3", NULL}, @@ -145,7 +148,7 @@ const struct dri_extension card_extensions[] = { }; -const struct dri_extension mm_extensions[] = { +static const struct dri_extension mm_extensions[] = { { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, { NULL, NULL } }; @@ -154,21 +157,28 @@ const struct dri_extension mm_extensions[] = { * The GL 2.0 functions are needed to make display lists work with * functions added by GL_ATI_separate_stencil. */ -const struct dri_extension gl_20_extension[] = { +static const struct dri_extension gl_20_extension[] = { +#ifdef R600_ENABLE_GLSL_TEST + {"GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, +#else {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, +#endif /* R600_ENABLE_GLSL_TEST */ }; - -static void r600RunPipeline(GLcontext * ctx) -{ - _mesa_lock_context_textures(ctx); - - if (ctx->NewState) - _mesa_update_state_locked(ctx); - - _tnl_run_pipeline(ctx); - _mesa_unlock_context_textures(ctx); -} +static const struct tnl_pipeline_stage *r600_pipeline[] = { + /* Catch any t&l fallbacks + */ + &_tnl_vertex_transform_stage, + &_tnl_normal_transform_stage, + &_tnl_lighting_stage, + &_tnl_fog_coordinate_stage, + &_tnl_texgen_stage, + &_tnl_texture_transform_stage, + &_tnl_point_attenuation_stage, + &_tnl_vertex_program_stage, + &_tnl_render_stage, + 0, +}; static void r600_get_lock(radeonContextPtr rmesa) { @@ -179,7 +189,7 @@ static void r600_get_lock(radeonContextPtr rmesa) if (!rmesa->radeonScreen->kernel_mm) radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom); } -} +} static void r600_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa) { @@ -201,6 +211,24 @@ static void r600_fallback(GLcontext *ctx, GLuint bit, GLboolean mode) context->radeon.Fallback &= ~bit; } +static void r600_emit_query_finish(radeonContextPtr radeon) +{ + context_t *context = (context_t*) radeon; + BATCH_LOCALS(&context->radeon); + + struct radeon_query_object *query = radeon->query.current; + + BEGIN_BATCH_NO_AUTOSTATE(4 + 2); + R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2)); + R600_OUT_BATCH(ZPASS_DONE); + R600_OUT_BATCH(query->curr_offset + 8); /* hw writes qwords */ + R600_OUT_BATCH(0x00000000); + R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE); + query->emitted_begin = GL_FALSE; +} + static void r600_init_vtbl(radeonContextPtr radeon) { radeon->vtbl.get_lock = r600_get_lock; @@ -209,6 +237,121 @@ static void r600_init_vtbl(radeonContextPtr radeon) radeon->vtbl.swtcl_flush = NULL; radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms; radeon->vtbl.fallback = r600_fallback; + radeon->vtbl.emit_query_finish = r600_emit_query_finish; +} + +static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen) +{ + context_t *r600 = R700_CONTEXT(ctx); + + ctx->Const.MaxTextureImageUnits = + driQueryOptioni(&r600->radeon.optionCache, "texture_image_units"); + ctx->Const.MaxTextureCoordUnits = + driQueryOptioni(&r600->radeon.optionCache, "texture_coord_units"); + ctx->Const.MaxTextureUnits = + MIN2(ctx->Const.MaxTextureImageUnits, + ctx->Const.MaxTextureCoordUnits); + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + ctx->Const.MaxTextureLodBias = 16.0; + + ctx->Const.MaxTextureLevels = 13; /* hw support 14 */ + ctx->Const.MaxTextureRectSize = 4096; /* hw support 8192 */ + + ctx->Const.MinPointSize = 0x0001 / 8.0; + ctx->Const.MinPointSizeAA = 0x0001 / 8.0; + ctx->Const.MaxPointSize = 0xffff / 8.0; + ctx->Const.MaxPointSizeAA = 0xffff / 8.0; + + ctx->Const.MinLineWidth = 0x0001 / 8.0; + ctx->Const.MinLineWidthAA = 0x0001 / 8.0; + ctx->Const.MaxLineWidth = 0xffff / 8.0; + ctx->Const.MaxLineWidthAA = 0xffff / 8.0; + + ctx->Const.MaxDrawBuffers = 1; /* hw supports 8 */ + + /* 256 for reg-based consts, inline consts also supported */ + ctx->Const.VertexProgram.MaxInstructions = 8192; /* in theory no limit */ + ctx->Const.VertexProgram.MaxNativeInstructions = 8192; + ctx->Const.VertexProgram.MaxNativeAttribs = 160; + ctx->Const.VertexProgram.MaxTemps = 128; + ctx->Const.VertexProgram.MaxNativeTemps = 128; + ctx->Const.VertexProgram.MaxNativeParameters = 256; + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; /* ??? */ + + ctx->Const.FragmentProgram.MaxNativeTemps = 128; + ctx->Const.FragmentProgram.MaxNativeAttribs = 32; + ctx->Const.FragmentProgram.MaxNativeParameters = 256; + ctx->Const.FragmentProgram.MaxNativeAluInstructions = 8192; + /* 8 per clause on r6xx, 16 on rv670/r7xx */ + if ((screen->chip_family == CHIP_FAMILY_RV670) || + (screen->chip_family >= CHIP_FAMILY_RV770)) + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 16; + else + ctx->Const.FragmentProgram.MaxNativeTexInstructions = 8; + ctx->Const.FragmentProgram.MaxNativeInstructions = 8192; + ctx->Const.FragmentProgram.MaxNativeTexIndirections = 8; /* ??? */ + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ +} + +static void r600ParseOptions(context_t *r600, radeonScreenPtr screen) +{ + /* Parse configuration files. + * Do this here so that initialMaxAnisotropy is set before we create + * the default textures. + */ + driParseConfigFiles(&r600->radeon.optionCache, &screen->optionCache, + screen->driScreen->myNum, "r600"); + + r600->radeon.initialMaxAnisotropy = driQueryOptionf(&r600->radeon.optionCache, + "def_max_anisotropy"); + +} + +static void r600InitGLExtensions(GLcontext *ctx) +{ + context_t *r600 = R700_CONTEXT(ctx); + + driInitExtensions(ctx, card_extensions, GL_TRUE); + if (r600->radeon.radeonScreen->kernel_mm) + driInitExtensions(ctx, mm_extensions, GL_FALSE); + +#ifdef R600_ENABLE_GLSL_TEST + driInitExtensions(ctx, gl_20_extension, GL_TRUE); + //_mesa_enable_2_0_extensions(ctx); + //1.5 + ctx->Extensions.ARB_occlusion_query = GL_TRUE; + ctx->Extensions.ARB_vertex_buffer_object = GL_TRUE; + ctx->Extensions.EXT_shadow_funcs = GL_TRUE; + //2.0 + ctx->Extensions.ARB_draw_buffers = GL_TRUE; + ctx->Extensions.ARB_point_sprite = GL_TRUE; + ctx->Extensions.ARB_shader_objects = GL_TRUE; + ctx->Extensions.ARB_vertex_shader = GL_TRUE; + ctx->Extensions.ARB_fragment_shader = GL_TRUE; + ctx->Extensions.EXT_blend_equation_separate = GL_TRUE; + ctx->Extensions.ATI_separate_stencil = GL_TRUE; + + /* glsl compiler has problem if this is not GL_TRUE */ + ctx->Shader.EmitCondCodes = GL_TRUE; +#endif /* R600_ENABLE_GLSL_TEST */ + + if (driQueryOptionb + (&r600->radeon.optionCache, "disable_stencil_two_side")) + _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); + + if (r600->radeon.glCtx->Mesa_DXTn + && !driQueryOptionb(&r600->radeon.optionCache, "disable_s3tc")) { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + _mesa_enable_extension(ctx, "GL_S3_s3tc"); + } else + if (driQueryOptionb(&r600->radeon.optionCache, "force_s3tc_enable")) + { + _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); + } + + /* XXX: RV740 only seems to report results from half of its DBs */ + if (r600->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV740) + _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); } /* Create the device specific rendering context. @@ -234,19 +377,10 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, return GL_FALSE; } - if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) - hw_tcl_on = future_hw_tcl_on = 0; + r600ParseOptions(r600, screen); + r600->radeon.radeonScreen = screen; r600_init_vtbl(&r600->radeon); - /* Parse configuration files. - * Do this here so that initialMaxAnisotropy is set before we create - * the default textures. - */ - driParseConfigFiles(&r600->radeon.optionCache, &screen->optionCache, - screen->driScreen->myNum, "r600"); - - r600->radeon.initialMaxAnisotropy = driQueryOptionf(&r600->radeon.optionCache, - "def_max_anisotropy"); /* Init default driver functions then plug in our R600-specific functions * (the texture functions are especially important) @@ -256,7 +390,9 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, r700InitStateFuncs(&functions); r600InitTextureFuncs(&functions); r700InitShaderFuncs(&functions); + radeonInitQueryObjFunctions(&functions); r700InitIoctlFuncs(&functions); + radeonInitBufferObjectFuncs(&functions); if (!radeonInitContext(&r600->radeon, &functions, glVisual, driContextPriv, @@ -266,44 +402,14 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, return GL_FALSE; } - /* Init r600 context data */ - /* Set the maximum texture size small enough that we can guarentee that - * all texture units can bind a maximal texture and have them both in - * texturable memory at once. - */ - ctx = r600->radeon.glCtx; - ctx->Const.MaxTextureImageUnits = - driQueryOptioni(&r600->radeon.optionCache, "texture_image_units"); - ctx->Const.MaxTextureCoordUnits = - driQueryOptioni(&r600->radeon.optionCache, "texture_coord_units"); - ctx->Const.MaxTextureUnits = - MIN2(ctx->Const.MaxTextureImageUnits, - ctx->Const.MaxTextureCoordUnits); - ctx->Const.MaxTextureMaxAnisotropy = 16.0; - ctx->Const.MaxTextureLodBias = 16.0; - - ctx->Const.MaxTextureLevels = 13; - ctx->Const.MaxTextureRectSize = 4096; - - ctx->Const.MinPointSize = 0x0001 / 8.0; - ctx->Const.MinPointSizeAA = 0x0001 / 8.0; - ctx->Const.MaxPointSize = 0xffff / 8.0; - ctx->Const.MaxPointSizeAA = 0xffff / 8.0; - - ctx->Const.MinLineWidth = 0x0001 / 8.0; - ctx->Const.MinLineWidthAA = 0x0001 / 8.0; - ctx->Const.MaxLineWidth = 0xffff / 8.0; - ctx->Const.MaxLineWidthAA = 0xffff / 8.0; + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - /* Needs further modifications */ -#if 0 - ctx->Const.MaxArrayLockSize = - ( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4); -#endif + r600InitConstValues(ctx, screen); - ctx->Const.MaxDrawBuffers = 1; + _mesa_set_mvp_with_dp4( ctx, GL_TRUE ); /* Initialize the software rasterizer and helper modules. */ @@ -312,16 +418,12 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_CreateContext(ctx); _swsetup_CreateContext(ctx); _swsetup_Wakeup(ctx); - _ae_create_context(ctx); /* Install the customized pipeline: */ _tnl_destroy_pipeline(ctx); - _tnl_install_pipeline(ctx, r700_pipeline); - - /* Try and keep materials and vertices separate: - */ -/* _tnl_isolate_materials(ctx, GL_TRUE); */ + _tnl_install_pipeline(ctx, r600_pipeline); + TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; /* Configure swrast and TNL to match hardware characteristics: */ @@ -330,62 +432,16 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _tnl_allow_pixel_fog(ctx, GL_FALSE); _tnl_allow_vertex_fog(ctx, GL_TRUE); - /* currently bogus data */ - ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeInstructions = - VSF_MAX_FRAGMENT_LENGTH / 4; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */ - ctx->Const.VertexProgram.MaxTemps = 32; - ctx->Const.VertexProgram.MaxNativeTemps = - /*VSF_MAX_FRAGMENT_TEMPS */ 32; - ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */ - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; - - ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS; - ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */ - ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS; - ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST; - ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeInstructions = - PFS_MAX_ALU_INST + PFS_MAX_TEX_INST; - ctx->Const.FragmentProgram.MaxNativeTexIndirections = - PFS_MAX_TEX_INDIRECT; - ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */ - ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; - ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - radeon_init_debug(); - driInitExtensions(ctx, card_extensions, GL_TRUE); - if (r600->radeon.radeonScreen->kernel_mm) - driInitExtensions(ctx, mm_extensions, GL_FALSE); - - if (driQueryOptionb - (&r600->radeon.optionCache, "disable_stencil_two_side")) - _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side"); - - if (r600->radeon.glCtx->Mesa_DXTn - && !driQueryOptionb(&r600->radeon.optionCache, "disable_s3tc")) { - _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); - _mesa_enable_extension(ctx, "GL_S3_s3tc"); - } else - if (driQueryOptionb(&r600->radeon.optionCache, "force_s3tc_enable")) - { - _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); - } + r700InitDraw(ctx); radeon_fbo_init(&r600->radeon); radeonInitSpanFuncs( ctx ); - r600InitCmdBuf(r600); - r700InitState(r600->radeon.glCtx); - TNL_CONTEXT(ctx)->Driver.RunPipeline = r600RunPipeline; - - if (driQueryOptionb(&r600->radeon.optionCache, "no_rast")) { - radeon_warning("disabling 3D acceleration\n"); - } + r600InitGLExtensions(ctx); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index c59df7505a..394fd757d4 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -58,57 +58,15 @@ typedef struct r600_context context_t; #include "main/mm.h" -/************ DMA BUFFERS **************/ - -/* The blit width for texture uploads - */ -#define R600_BLIT_WIDTH_BYTES 1024 -#define R600_MAX_TEXTURE_UNITS 8 - -struct r600_texture_state { - int tc_count; /* number of incoming texture coordinates from VAP */ -}; - -/* Perhaps more if we store programs in vmem? */ -/* drm_r600_cmd_header_t->vpu->count is unsigned char */ -#define VSF_MAX_FRAGMENT_LENGTH (255*4) - -/* Can be tested with colormat currently. */ -#define VSF_MAX_FRAGMENT_TEMPS (14) - -#define STATE_R600_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0) -#define STATE_R600_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1) - -extern int hw_tcl_on; - #define COLOR_IS_RGBA #define TAG(x) r600##x #include "tnl_dd/t_dd_vertex.h" #undef TAG -#define PFS_MAX_ALU_INST 64 -#define PFS_MAX_TEX_INST 64 -#define PFS_MAX_TEX_INDIRECT 4 -#define PFS_NUM_TEMP_REGS 32 -#define PFS_NUM_CONST_REGS 16 - -#define R600_MAX_AOS_ARRAYS 16 - -#define REG_COORDS 0 -#define REG_COLOR0 1 -#define REG_TEX0 2 - #define R600_FALLBACK_NONE 0 #define R600_FALLBACK_TCL 1 #define R600_FALLBACK_RAST 2 -enum -{ - NO_SHIFT = 0, - LEFT_SHIFT = 1, - RIGHT_SHIFT = 2, -}; - struct r600_hw_state { struct radeon_state_atom sq; struct radeon_state_atom db; @@ -145,6 +103,32 @@ struct r600_hw_state { struct radeon_state_atom tx_brdr_clr; }; +typedef struct StreamDesc +{ + GLint size; //number of data element + GLenum type; //data element type + GLsizei stride; + + struct radeon_bo *bo; + GLint bo_offset; + + GLuint dwords; + GLuint dst_loc; + GLuint _signed; + GLboolean normalize; + GLboolean is_named_bo; + GLubyte element; +} StreamDesc; + +typedef struct r700_index_buffer +{ + struct radeon_bo *bo; + int bo_offset; + + GLboolean is_32bit; + GLuint count; +} r700_index_buffer; + /** * \brief R600 context structure. */ @@ -160,9 +144,9 @@ struct r600_context { /* Vertex buffers */ - GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; - GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; - + GLint nNumActiveAos; + StreamDesc stream_desc[VERT_ATTRIB_MAX]; + struct r700_index_buffer ind_buf; }; #define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) @@ -193,16 +177,13 @@ extern GLboolean r700SyncSurf(context_t *context, uint32_t write_domain, uint32_t sync_type); -extern void r700SetupStreams(GLcontext * ctx); extern void r700Start3D(context_t *context); extern void r600InitAtoms(context_t *context); +extern void r700InitDraw(GLcontext *ctx); #define RADEON_D_CAPTURE 0 #define RADEON_D_PLAYBACK 1 #define RADEON_D_PLAYBACK_RAW 2 #define RADEON_D_T 3 -#define r600PackFloat32 radeonPackFloat32 -#define r600PackFloat24 radeonPackFloat24 - #endif /* __R600_CONTEXT_H__ */ diff --git a/src/mesa/drivers/dri/r600/r600_reg_r6xx.h b/src/mesa/drivers/dri/r600/r600_reg_r6xx.h index f7702c46de..74af7b4fed 100644 --- a/src/mesa/drivers/dri/r600/r600_reg_r6xx.h +++ b/src/mesa/drivers/dri/r600/r600_reg_r6xx.h @@ -415,11 +415,11 @@ enum { ALPHA_TO_MASK_ENABLE = 1 << 0, ALPHA_TO_MASK_OFFSET0_mask = 0x03 << 8, ALPHA_TO_MASK_OFFSET0_shift = 8, - ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET1_mask = 0x03 << 10, ALPHA_TO_MASK_OFFSET1_shift = 10, - ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET2_mask = 0x03 << 12, ALPHA_TO_MASK_OFFSET2_shift = 12, - ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 8, + ALPHA_TO_MASK_OFFSET3_mask = 0x03 << 14, ALPHA_TO_MASK_OFFSET3_shift = 14, // SQ_VTX_CONSTANT_WORD2_0 = 0x00038008, diff --git a/src/mesa/drivers/dri/r600/r600_reg_r7xx.h b/src/mesa/drivers/dri/r600/r600_reg_r7xx.h index e5c01c861a..eb169bd885 100644 --- a/src/mesa/drivers/dri/r600/r600_reg_r7xx.h +++ b/src/mesa/drivers/dri/r600/r600_reg_r7xx.h @@ -143,6 +143,8 @@ enum { // SQ_TEX_SAMPLER_MISC_0 = 0x0003d03c, R7xx_TRUNCATE_COORD_bit = 1 << 9, R7xx_DISABLE_CUBE_WRAP_bit = 1 << 10, +// DB_RENDER_CONTROL = 0x00028d0c, + PERFECT_ZPASS_COUNTS_bit = 1 << 15, } ; diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index d105b90cd1..9d83a64e22 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -40,7 +40,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/image.h" #include "main/mipmap.h" #include "main/simple_list.h" -#include "main/texformat.h" #include "main/texstore.h" #include "main/teximage.h" #include "main/texobj.h" @@ -286,6 +285,7 @@ static void r600TexParameter(GLcontext * ctx, GLenum target, GLenum pname, const GLfloat * params) { radeonTexObj* t = radeon_tex_obj(texObj); + GLenum baseFormat; radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_VERBOSE, "%s( %s )\n", __FUNCTION__, @@ -312,23 +312,15 @@ static void r600TexParameter(GLcontext * ctx, GLenum target, case GL_TEXTURE_MAX_LEVEL: case GL_TEXTURE_MIN_LOD: case GL_TEXTURE_MAX_LOD: - /* This isn't the most efficient solution but there doesn't appear to - * be a nice alternative. Since there's no LOD clamping, - * we just have to rely on loading the right subset of mipmap levels - * to simulate a clamped LOD. - */ - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = 0; - t->validated = GL_FALSE; - } + t->validated = GL_FALSE; break; case GL_DEPTH_TEXTURE_MODE: if (!texObj->Image[0][texObj->BaseLevel]) return; - if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat - == GL_DEPTH_COMPONENT) { + baseFormat = texObj->Image[0][texObj->BaseLevel]->_BaseFormat; + if (baseFormat == GL_DEPTH_COMPONENT || + baseFormat == GL_DEPTH_STENCIL) { r600SetDepthTexMode(texObj); break; } else { @@ -368,10 +360,8 @@ static void r600DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) t->bo = NULL; } - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = 0; - } + radeon_miptree_unreference(&t->mt); + _mesa_delete_texture_object(ctx, texObj); } diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 7d7e77d355..2a4a6e6ee1 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -39,7 +39,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "main/context.h" #include "main/macros.h" -#include "main/texformat.h" #include "main/teximage.h" #include "main/texobj.h" #include "main/enums.h" @@ -78,7 +77,7 @@ void r600UpdateTextureState(GLcontext * ctx) } } -static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_format) +static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa_format) { radeonTexObj *t = radeon_tex_obj(tObj); @@ -87,9 +86,19 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + switch (mesa_format) /* This is mesa format. */ { case MESA_FORMAT_RGBA8888: + case MESA_FORMAT_SIGNED_RGBA8888: SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); @@ -101,8 +110,19 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888) { + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + } break; case MESA_FORMAT_RGBA8888_REV: + case MESA_FORMAT_SIGNED_RGBA8888_REV: SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); @@ -114,6 +134,16 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); + if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888_REV) { + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_X_shift, FORMAT_COMP_X_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED, + FORMAT_COMP_W_shift, FORMAT_COMP_W_mask); + } break; case MESA_FORMAT_ARGB8888: SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, @@ -480,13 +510,21 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; case MESA_FORMAT_Z16: + case MESA_FORMAT_X8_Z24: + case MESA_FORMAT_S8_Z24: case MESA_FORMAT_Z24_S8: case MESA_FORMAT_Z32: + case MESA_FORMAT_S8: switch (mesa_format) { case MESA_FORMAT_Z16: SETfield(t->SQ_TEX_RESOURCE1, FMT_16, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); break; + case MESA_FORMAT_X8_Z24: + case MESA_FORMAT_S8_Z24: + SETfield(t->SQ_TEX_RESOURCE1, FMT_8_24, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + break; case MESA_FORMAT_Z24_S8: SETfield(t->SQ_TEX_RESOURCE1, FMT_24_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); @@ -495,6 +533,12 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, GLuint mesa_fo SETfield(t->SQ_TEX_RESOURCE1, FMT_32, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); break; + case MESA_FORMAT_S8: + SETfield(t->SQ_TEX_RESOURCE1, FMT_8, + SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); + break; + default: + break; }; switch (tObj->DepthMode) { case GL_LUMINANCE: /* X, X, X, ONE */ @@ -591,7 +635,7 @@ void r600SetDepthTexMode(struct gl_texture_object *tObj) t = radeon_tex_obj(tObj); - r600GetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat); + r600GetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat); } @@ -605,7 +649,6 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex { radeonTexObj *t = radeon_tex_obj(texObj); const struct gl_texture_image *firstImage; - int firstlevel = t->mt ? t->mt->firstLevel : 0; GLuint uTexelPitch, row_align; if (rmesa->radeon.radeonScreen->driScreen->dri2.enabled && @@ -613,10 +656,10 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex t->bo) return; - firstImage = t->base.Image[0][firstlevel]; + firstImage = t->base.Image[0][t->minLod]; if (!t->image_override) { - if (!r600GetTexFormat(texObj, firstImage->TexFormat->MesaFormat)) { + if (!r600GetTexFormat(texObj, firstImage->TexFormat)) { radeon_error("unexpected texture format in %s\n", __FUNCTION__); return; @@ -648,7 +691,8 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex } row_align = rmesa->radeon.texture_row_align - 1; - uTexelPitch = ((firstImage->Width * t->mt->bpp + row_align) & ~row_align) / t->mt->bpp; + uTexelPitch = (_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align; + uTexelPitch = uTexelPitch / _mesa_get_format_bytes(firstImage->TexFormat); uTexelPitch = (uTexelPitch + R700_TEXEL_PITCH_ALIGNMENT_MASK) & ~R700_TEXEL_PITCH_ALIGNMENT_MASK; @@ -662,10 +706,10 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex SETfield(t->SQ_TEX_RESOURCE1, firstImage->Height - 1, TEX_HEIGHT_shift, TEX_HEIGHT_mask); - if ((t->mt->lastLevel - t->mt->firstLevel) > 0) { - t->SQ_TEX_RESOURCE3 = t->mt->levels[0].size / 256; - SETfield(t->SQ_TEX_RESOURCE4, t->mt->firstLevel, BASE_LEVEL_shift, BASE_LEVEL_mask); - SETfield(t->SQ_TEX_RESOURCE5, t->mt->lastLevel, LAST_LEVEL_shift, LAST_LEVEL_mask); + if ((t->maxLod - t->minLod) > 0) { + t->SQ_TEX_RESOURCE3 = t->mt->levels[t->minLod].size / 256; + SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask); + SETfield(t->SQ_TEX_RESOURCE5, t->maxLod - t->minLod, LAST_LEVEL_shift, LAST_LEVEL_mask); } } @@ -764,7 +808,8 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, struct gl_texture_object *tObj = _mesa_lookup_texture(rmesa->radeon.glCtx, texname); radeonTexObjPtr t = radeon_tex_obj(tObj); - uint32_t pitch_val, size; + const struct gl_texture_image *firstImage; + uint32_t pitch_val, size, row_align; if (!tObj) return; @@ -774,7 +819,9 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname, if (!offset) return; - size = pitch;//h * w * (depth / 8); + firstImage = t->base.Image[0][t->minLod]; + row_align = rmesa->radeon.texture_row_align - 1; + size = ((_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align) * firstImage->Height; if (t->bo) { radeon_bo_unref(t->bo); t->bo = NULL; @@ -870,18 +917,7 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo return; } - radeon_update_renderbuffers(pDRICtx, dPriv); - /* back & depth buffer are useless free them right away */ - rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } - rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer; - if (rb && rb->bo) { - radeon_bo_unref(rb->bo); - rb->bo = NULL; - } + radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE); rb = rfb->color_rb[0]; if (rb->bo == NULL) { /* Failed to BO for the buffer */ @@ -897,20 +933,14 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo radeon_bo_unref(rImage->bo); rImage->bo = NULL; } - if (t->mt) { - radeon_miptree_unreference(t->mt); - t->mt = NULL; - } - if (rImage->mt) { - radeon_miptree_unreference(rImage->mt); - rImage->mt = NULL; - } + + radeon_miptree_unreference(&t->mt); + radeon_miptree_unreference(&rImage->mt); + _mesa_init_teximage_fields(radeon->glCtx, target, texImage, rb->base.Width, rb->base.Height, 1, 0, rb->cpp); texImage->RowStride = rb->pitch / rb->cpp; - texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx, - internalFormat, - type, format, 0); + rImage->bo = rb->bo; radeon_bo_ref(rImage->bo); t->bo = rb->bo; diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index efeccb25f1..e84f524525 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -32,12 +32,49 @@ #include "main/mtypes.h" #include "main/imports.h" +#include "shader/prog_parameter.h" #include "radeon_debug.h" #include "r600_context.h" #include "r700_assembler.h" +#define USE_CF_FOR_CONTINUE_BREAK 1 +#define USE_CF_FOR_POP_AFTER 1 + +struct prog_instruction noise1_insts[12] = { + {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0} +}; +float noise1_const[2][4] = { + {0.300000f, 0.900000f, 0.500000f, 0.300000f} +}; + +COMPILED_SUB noise1_presub = { + &(noise1_insts[0]), + 12, + 2, + 1, + 0, + &(noise1_const[0]), + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + {0,0,0}, + 0 +}; + BITS addrmode_PVSDST(PVSDST * pPVSDST) { return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1); @@ -213,7 +250,7 @@ GLboolean is_reduction_opcode(PVSDWORD* dest) { if (dest->dst.op3 == 0) { - if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE) ) + if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) ) { return GL_TRUE; } @@ -327,21 +364,27 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) return(format); } -unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3) { - if(pAsm->D.dst.op3) + if(nIsOp3 > 0) { return 3; } - switch (pAsm->D.dst.opcode) + switch (opcode) { - case SQ_OP2_INST_ADD: + case SQ_OP2_INST_ADD: + case SQ_OP2_INST_KILLE: + case SQ_OP2_INST_KILLGT: + case SQ_OP2_INST_KILLGE: + case SQ_OP2_INST_KILLNE: case SQ_OP2_INST_MUL: case SQ_OP2_INST_MAX: case SQ_OP2_INST_MIN: //case SQ_OP2_INST_MAX_DX10: //case SQ_OP2_INST_MIN_DX10: + case SQ_OP2_INST_SETE: + case SQ_OP2_INST_SETNE: case SQ_OP2_INST_SETGT: case SQ_OP2_INST_SETGE: case SQ_OP2_INST_PRED_SETE: @@ -350,12 +393,14 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_PRED_SETNE: case SQ_OP2_INST_DOT4: case SQ_OP2_INST_DOT4_IEEE: + case SQ_OP2_INST_CUBE: return 2; case SQ_OP2_INST_MOV: + case SQ_OP2_INST_MOVA_FLOOR: case SQ_OP2_INST_FRACT: case SQ_OP2_INST_FLOOR: - case SQ_OP2_INST_KILLGT: + case SQ_OP2_INST_TRUNC: case SQ_OP2_INST_EXP_IEEE: case SQ_OP2_INST_LOG_CLAMPED: case SQ_OP2_INST_LOG_IEEE: @@ -367,7 +412,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) return 1; default: radeon_error( - "Need instruction operand number for %x.\n", pAsm->D.dst.opcode); + "Need instruction operand number for %x.\n", opcode); }; return 3; @@ -381,95 +426,120 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700 pAsm->pR700Shader = pShader; pAsm->currentShaderType = spt; - pAsm->cf_last_export_ptr = NULL; + pAsm->cf_last_export_ptr = NULL; - pAsm->cf_current_export_clause_ptr = NULL; - pAsm->cf_current_alu_clause_ptr = NULL; - pAsm->cf_current_tex_clause_ptr = NULL; - pAsm->cf_current_vtx_clause_ptr = NULL; - pAsm->cf_current_cf_clause_ptr = NULL; + pAsm->cf_current_export_clause_ptr = NULL; + pAsm->cf_current_alu_clause_ptr = NULL; + pAsm->cf_current_tex_clause_ptr = NULL; + pAsm->cf_current_vtx_clause_ptr = NULL; + pAsm->cf_current_cf_clause_ptr = NULL; - // No clause has been created yet - pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + // No clause has been created yet + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; - pAsm->number_of_colorandz_exports = 0; - pAsm->number_of_exports = 0; - pAsm->number_of_export_opcodes = 0; + pAsm->number_of_colorandz_exports = 0; + pAsm->number_of_exports = 0; + pAsm->number_of_export_opcodes = 0; + pAsm->alu_x_opcode = 0; - pAsm->D.bits = 0; - pAsm->S[0].bits = 0; - pAsm->S[1].bits = 0; - pAsm->S[2].bits = 0; + pAsm->D2.bits = 0; + + pAsm->D.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; - pAsm->uLastPosUpdate = 0; + pAsm->uLastPosUpdate = 0; - *(BITS *) &pAsm->fp_stOutFmt0 = 0; + *(BITS *) &pAsm->fp_stOutFmt0 = 0; - pAsm->uIIns = 0; - pAsm->uOIns = 0; - pAsm->number_used_registers = 0; - pAsm->uUsedConsts = 256; + pAsm->uIIns = 0; + pAsm->uOIns = 0; + pAsm->number_used_registers = 0; + pAsm->uUsedConsts = 256; - // Fragment programs - pAsm->uBoolConsts = 0; - pAsm->uIntConsts = 0; - pAsm->uInsts = 0; - pAsm->uConsts = 0; + // Fragment programs + pAsm->uBoolConsts = 0; + pAsm->uIntConsts = 0; + pAsm->uInsts = 0; + pAsm->uConsts = 0; - pAsm->FCSP = 0; - pAsm->fc_stack[0].type = FC_NONE; + pAsm->FCSP = 0; + pAsm->fc_stack[0].type = FC_NONE; - pAsm->branch_depth = 0; - pAsm->max_branch_depth = 0; + pAsm->aArgSubst[0] = + pAsm->aArgSubst[1] = + pAsm->aArgSubst[2] = + pAsm->aArgSubst[3] = (-1); - pAsm->aArgSubst[0] = - pAsm->aArgSubst[1] = - pAsm->aArgSubst[2] = - pAsm->aArgSubst[3] = (-1); + pAsm->uOutputs = 0; - pAsm->uOutputs = 0; + for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++) + { + pAsm->color_export_register_number[i] = (-1); + } - for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++) - { - pAsm->color_export_register_number[i] = (-1); - } + pAsm->depth_export_register_number = (-1); + pAsm->stencil_export_register_number = (-1); + pAsm->coverage_to_mask_export_register_number = (-1); + pAsm->mask_export_register_number = (-1); - pAsm->depth_export_register_number = (-1); - pAsm->stencil_export_register_number = (-1); - pAsm->coverage_to_mask_export_register_number = (-1); - pAsm->mask_export_register_number = (-1); + pAsm->starting_export_register_number = 0; + pAsm->starting_vfetch_register_number = 0; + pAsm->starting_temp_register_number = 0; + pAsm->uFirstHelpReg = 0; - pAsm->starting_export_register_number = 0; - pAsm->starting_vfetch_register_number = 0; - pAsm->starting_temp_register_number = 0; - pAsm->uFirstHelpReg = 0; + pAsm->input_position_is_used = GL_FALSE; + pAsm->input_normal_is_used = GL_FALSE; + for (i=0; i<NUMBER_OF_INPUT_COLORS; i++) + { + pAsm->input_color_is_used[ i ] = GL_FALSE; + } - pAsm->input_position_is_used = GL_FALSE; - pAsm->input_normal_is_used = GL_FALSE; + for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++) + { + pAsm->input_texture_unit_is_used[ i ] = GL_FALSE; + } + for (i=0; i<VERT_ATTRIB_MAX; i++) + { + pAsm->vfetch_instruction_ptr_array[ i ] = NULL; + } - for (i=0; i<NUMBER_OF_INPUT_COLORS; i++) - { - pAsm->input_color_is_used[ i ] = GL_FALSE; - } + pAsm->number_of_inputs = 0; - for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++) - { - pAsm->input_texture_unit_is_used[ i ] = GL_FALSE; - } + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; - for (i=0; i<VERT_ATTRIB_MAX; i++) - { - pAsm->vfetch_instruction_ptr_array[ i ] = NULL; - } + pAsm->subs = NULL; + pAsm->unSubArraySize = 0; + pAsm->unSubArrayPointer = 0; + pAsm->callers = NULL; + pAsm->unCallerArraySize = 0; + pAsm->unCallerArrayPointer = 0; + + pAsm->CALLSP = 0; + pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0; + pAsm->CALLSTACK[0].plstCFInstructions_local + = &(pAsm->pR700Shader->lstCFInstructions); + + pAsm->CALLSTACK[0].max = 0; + pAsm->CALLSTACK[0].current = 0; + + SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local); - pAsm->number_of_inputs = 0; + pAsm->unCFflags = 0; - return 0; + pAsm->presubs = NULL; + pAsm->unPresubArraySize = 0; + pAsm->unNumPresub = 0; + pAsm->unCurNumILInsts = 0; + + return 0; } GLboolean IsTex(gl_inst_opcode Opcode) @@ -587,6 +657,31 @@ int check_current_clause(r700_AssemblerBase* pAsm, return GL_TRUE; } +GLboolean add_cf_instruction(r700_AssemblerBase* pAsm) +{ + if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE)) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr = + (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause); + + if (pAsm->cf_current_cf_clause_ptr != NULL) + { + Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr ); + } + else + { + radeon_error("Could not allocate a new VFetch CF instruction.\n"); + return GL_FALSE; + } + + return GL_TRUE; +} + GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm, R700VertexInstruction* vertex_instruction_ptr) { @@ -682,7 +777,7 @@ GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, // If this clause constains any TEX instruction that is dependent on a previous instruction, // set the barrier bit - if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) + if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE ) { pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1; } @@ -786,6 +881,133 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, return GL_TRUE; } +GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + VTX_FETCH_METHOD * pFetchMethod) +{ + GLuint client_size_inbyte; + GLuint data_format; + GLuint mega_fetch_count; + GLuint is_mega_fetch_flag; + + R700VertexGenericFetch* vfetch_instruction_ptr; + R700VertexGenericFetch* assembled_vfetch_instruction_ptr + = pAsm->vfetch_instruction_ptr_array[element]; + + if (assembled_vfetch_instruction_ptr == NULL) + { + vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch); + if (vfetch_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700VertexGenericFetch(vfetch_instruction_ptr); + } + else + { + vfetch_instruction_ptr = assembled_vfetch_instruction_ptr; + } + + data_format = GetSurfaceFormat(type, size, &client_size_inbyte); + + if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here + { + //TODO : mini fetch + } + else + { + mega_fetch_count = MEGA_FETCH_BYTES - 1; + is_mega_fetch_flag = 0x1; + pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte; + } + + vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH; + vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA; + vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + + vfetch_instruction_ptr->m_Word0.f.buffer_id = element; + vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0; + vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X; + vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count; + + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + + vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1; + vfetch_instruction_ptr->m_Word1.f.data_format = data_format; + vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE; + + if(1 == _signed) + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED; + } + else + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED; + } + + if(GL_TRUE == normalize) + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM; + } + else + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT; + } + + // Destination register + vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; + vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE; + + vfetch_instruction_ptr->m_Word2.f.offset = 0; + vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0; + + vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag; + + if (assembled_vfetch_instruction_ptr == NULL) + { + if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) + { + return GL_FALSE; + } + + if (pAsm->vfetch_instruction_ptr_array[element] != NULL) + { + return GL_FALSE; + } + else + { + pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr; + } + } + + return GL_TRUE; +} + +GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm) +{ + GLint i; + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + pAsm->cf_current_vtx_clause_ptr = NULL; + + for (i=0; i<VERT_ATTRIB_MAX; i++) + { + pAsm->vfetch_instruction_ptr_array[ i ] = NULL; + } + + cleanup_vfetch_shaderinst(pAsm->pR700Shader); + + return GL_TRUE; +} + GLuint gethelpr(r700_AssemblerBase* pAsm) { GLuint r = pAsm->uHelpReg; @@ -855,7 +1077,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm) checkop_init(pAsm); - if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) @@ -866,7 +1089,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm) { bSrcConst[0] = GL_FALSE; } - if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) @@ -899,7 +1123,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) checkop_init(pAsm); - if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) @@ -910,7 +1135,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) { bSrcConst[0] = GL_FALSE; } - if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) @@ -921,7 +1147,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm) { bSrcConst[1] = GL_FALSE; } - if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) || + if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) || (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) || (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) || (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) ) @@ -1021,6 +1248,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: if (1 == pILInst->SrcReg[src].RelAddr) { setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0); @@ -1034,7 +1262,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index; break; case PROGRAM_INPUT: - setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); pAsm->S[fld].src.rtype = SRC_REG_INPUT; switch (pAsm->currentShaderType) { @@ -1047,7 +1275,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, } break; default: - radeon_error("Invalid source argument type\n"); + radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File); return GL_FALSE; } } @@ -1103,6 +1331,15 @@ GLboolean assemble_dst(r700_AssemblerBase *pAsm) pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1; pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1; + if(pILInst->SaturateMode == SATURATE_ZERO_ONE) + { + pAsm->D2.dst2.SaturateMode = 1; + } + else + { + pAsm->D2.dst2.SaturateMode = 0; + } + return GL_TRUE; } @@ -1152,42 +1389,67 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) GLboolean bValidTexCoord = GL_FALSE; + if(pAsm->aArgSubst[1] >= 0) + { + bValidTexCoord = GL_TRUE; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->aArgSubst[1]; + } + else + { switch (pILInst->SrcReg[0].File) { - case PROGRAM_CONSTANT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_STATE_VAR: - bValidTexCoord = GL_TRUE; - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = pAsm->aArgSubst[1]; - break; - case PROGRAM_TEMPORARY: - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + - pAsm->starting_temp_register_number; - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - break; - case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) - { - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = - pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; - break; - } - break; + case PROGRAM_UNIFORM: + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + break; + case PROGRAM_TEMPORARY: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + + pAsm->starting_temp_register_number; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + break; + case PROGRAM_INPUT: + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_WPOS: + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_FOGC: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + case FRAG_ATTRIB_FACE: + fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); + break; + case FRAG_ATTRIB_PNTC: + fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); + break; + } + + if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || + (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) + { + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + } + + break; + } } if(GL_TRUE == bValidTexCoord) @@ -1368,6 +1630,10 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, { src_sel = pSource->reg + CFILE_REGISTER_OFFSET; } + else if (pSource->rtype == SRC_REC_LITERAL) + { + src_sel = SQ_ALU_SRC_LITERAL; + } else { radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n", @@ -1457,7 +1723,8 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, return GL_FALSE; } - if ( pAsm->cf_current_alu_clause_ptr == NULL || + if ( pAsm->alu_x_opcode != 0 || + pAsm->cf_current_alu_clause_ptr == NULL || ( (pAsm->cf_current_alu_clause_ptr != NULL) && (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) ) ) ) @@ -1487,9 +1754,17 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0; pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0; - //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1; pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0; - pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU; + + if(pAsm->alu_x_opcode != 0) + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode; + pAsm->alu_x_opcode = 0; + } + else + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU; + } pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; @@ -1497,7 +1772,7 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, } else { - pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2); } // If this clause constains any instruction that is forward dependent on a TEX instruction, @@ -1774,7 +2049,7 @@ GLboolean check_scalar(r700_AssemblerBase* pAsm, GLuint swizzle_key; - GLuint number_of_operands = r700GetNumOperands(pAsm); + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); for (src=0; src<number_of_operands; src++) { @@ -1863,7 +2138,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLuint swizzle_key; - GLuint number_of_operands = r700GetNumOperands(pAsm); + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); for (src=0; src<number_of_operands; src++) { @@ -1896,7 +2171,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, if( is_gpr(sel) ) { if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) ) - { + { return GL_FALSE; } @@ -1908,7 +2183,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, else { if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) ) - { + { return GL_FALSE; } } @@ -1920,7 +2195,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, if( is_cfile(sel) ) { if( GL_FALSE == reserve_cfile(pAsm, sel, chan) ) - { + { return GL_FALSE; } } @@ -1932,6 +2207,10 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { + R700ALUInstruction * alu_instruction_ptr; + R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl; + R700ALUInstructionFullLiteral * alu_instruction_ptr_fl; + GLuint number_of_scalar_operations; GLboolean is_single_scalar_operation; GLuint scalar_channel_index; @@ -1940,10 +2219,10 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) int current_source_index; GLuint contiguous_slots_needed; - GLuint uNumSrc = r700GetNumOperands(pAsm); - GLuint channel_swizzle, j; - GLuint chan_counter[4] = {0, 0, 0, 0}; - PVSSRC * pSource[3]; + GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); + //GLuint channel_swizzle, j; + //GLuint chan_counter[4] = {0, 0, 0, 0}; + //PVSSRC * pSource[3]; GLboolean bSplitInst = GL_FALSE; if (1 == pAsm->D.dst.math) @@ -1955,7 +2234,9 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { is_single_scalar_operation = GL_FALSE; number_of_scalar_operations = 4; - + +/* current assembler doesn't do more than 1 register per source */ +#if 0 /* check read port, only very preliminary algorithm, not count in src0/1 same comp case and prev slot repeat case; also not count relative addressing. TODO: improve performance. */ @@ -1990,27 +2271,49 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { bSplitInst = GL_TRUE; } +#endif } contiguous_slots_needed = 0; - if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) + if(!is_single_scalar_operation) { contiguous_slots_needed = 4; } + contiguous_slots_needed += pAsm->D2.dst2.literal_slots; + initialize(pAsm); for (scalar_channel_index=0; scalar_channel_index < number_of_scalar_operations; scalar_channel_index++) { - R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); - if (alu_instruction_ptr == NULL) - { - return GL_FALSE; - } - Init_R700ALUInstruction(alu_instruction_ptr); + if(scalar_channel_index == (number_of_scalar_operations-1)) + { + switch(pAsm->D2.dst2.literal_slots) + { + case 0: + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + break; + case 1: + alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral); + Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl; + break; + case 2: + alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral); + Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl; + break; + }; + } + else + { + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + } //src 0 current_source_index = 0; @@ -2020,11 +2323,11 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } - if (pAsm->D.dst.math == 0) + if (uNumSrc > 1) { // Process source 1 current_source_index = 1; @@ -2034,13 +2337,13 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) current_source_index, pcurrent_source, scalar_channel_index) ) - { + { return GL_FALSE; } } //other bits - alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; + alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode; if( (is_single_scalar_operation == GL_TRUE) || (GL_TRUE == bSplitInst) ) @@ -2052,9 +2355,17 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0; } - alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0; + if(1 == pAsm->D.dst.predicated) + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; + } + else + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + } // dst if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || @@ -2063,7 +2374,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; } else - { + { radeon_error("Only temp destination registers supported for ALU dest regs.\n"); return GL_FALSE; } @@ -2093,7 +2404,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; - alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; + alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; if (pAsm->D.dst.op3) { @@ -2120,8 +2431,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; @@ -2149,8 +2460,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) { alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; - alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; - alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs; //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; @@ -2177,7 +2488,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) } if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) - { + { return GL_FALSE; } @@ -2188,19 +2499,19 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) if (is_single_scalar_operation) { if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) - { + { return GL_FALSE; } } else { if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) - { - return 1; + { + return GL_FALSE; } } - contiguous_slots_needed = 0; + contiguous_slots_needed -= 1; } return GL_TRUE; @@ -2210,9 +2521,7 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - if( GL_TRUE == IsTex(pILInst->Opcode) && - /* handle const moves to temp register */ - !(pAsm->D.dst.opcode == SQ_OP2_INST_MOV) ) + if( GL_TRUE == pAsm->is_tex ) { if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) @@ -2253,10 +2562,14 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) //reset for next inst. pAsm->D.bits = 0; + pAsm->D2.bits = 0; pAsm->S[0].bits = 0; pAsm->S[1].bits = 0; pAsm->S[2].bits = 0; - + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; + pAsm->D2.bits = 0; + pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0; return GL_TRUE; } @@ -2375,6 +2688,35 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm) return GL_TRUE; } +GLboolean assemble_ARL(r700_AssemblerBase *pAsm) +{ /* TODO: ar values dont' persist between clauses */ + if( GL_FALSE == checkop1(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + GLboolean assemble_BAD(char *opcode_str) { radeon_error("Not yet implemented instruction (%s)\n", opcode_str); @@ -2460,9 +2802,44 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_COS(r700_AssemblerBase *pAsm) +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { - return assemble_math_function(pAsm, SQ_OP2_INST_COS); + int tmp; + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + assemble_src(pAsm, 0, -1); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; + next_ins(pAsm); + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + + assemble_dst(pAsm); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + + //TODO - replicate if more channels set in WriteMask + return GL_TRUE; + } GLboolean assemble_DOT(r700_AssemblerBase *pAsm) @@ -2496,7 +2873,7 @@ GLboolean assemble_DOT(r700_AssemblerBase *pAsm) } else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH) { - onecomp_PVSSRC(&(pAsm->S[1].src), 3); + onecomp_PVSSRC(&(pAsm->S[0].src), 3); } if ( GL_FALSE == next_ins(pAsm) ) @@ -2549,6 +2926,133 @@ GLboolean assemble_EX2(r700_AssemblerBase *pAsm) { return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE); } + +GLboolean assemble_EXP(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // FLOOR tmp.x, a.x + // EX2 dst.x tmp.x + + if (pAsm->pILInst->DstReg.WriteMask & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + // FRACT dst.y a.x + + if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + // EX2 dst.z, a.x + + if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + // MOV dst.w 1.0 + + if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} GLboolean assemble_FLR(r700_AssemblerBase *pAsm) { @@ -2603,17 +3107,19 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_KIL(r700_AssemblerBase *pAsm) -{ - checkop1(pAsm); +GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT; - - if ( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + if(pILInst->Opcode == OPCODE_KIL) + checkop1(pAsm); + + pAsm->D.dst.opcode = opcode; + //pAsm->D.dst.math = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; pAsm->D.dst.writex = 0; pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; @@ -2622,30 +3128,34 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm) setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; pAsm->S[0].src.reg = 0; - setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; - - if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File) + if(pILInst->Opcode == OPCODE_KIL_NV) { - pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = 0; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1); + neg_PVSSRC(&(pAsm->S[1].src)); } else - { //PROGRAM_OUTPUT - pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index]; + { + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + } - - setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); - noswizzle_PVSSRC(&(pAsm->S[1].src)); - + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } + /* Doc says KILL has to be last(end) ALU clause */ pAsm->pR700Shader->killIsUsed = GL_TRUE; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; return GL_TRUE; } @@ -2709,6 +3219,7 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm) { return GL_FALSE; } + if( GL_FALSE == assemble_src(pAsm, 2, -1) ) { return GL_FALSE; @@ -2739,6 +3250,217 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm) return GL_TRUE; } +GLboolean assemble_LOG(r700_AssemblerBase *pAsm) +{ + BITS tmp1, tmp2, tmp3; + + checkop1(pAsm); + + tmp1 = gethelpr(pAsm); + tmp2 = gethelpr(pAsm); + tmp3 = gethelpr(pAsm); + + // FIXME: The hardware can do fabs() directly on input + // elements, but the compiler doesn't have the + // capability to use that. + + // MAX tmp1.x, a.x, -a.x (fabs(a.x)) + + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->S[1].bits = pAsm->S[0].bits; + flipneg_PVSSRC(&(pAsm->S[1].src)); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // Entire algo: + // + // LG2 tmp2.x, tmp1.x + // FLOOR tmp3.x, tmp2.x + // MOV dst.x, tmp3.x + // ADD tmp3.x, tmp2.x, -tmp3.x + // EX2 dst.y, tmp3.x + // MOV dst.z, tmp2.x + // MOV dst.w, 1.0 + + // LG2 tmp2.x, tmp1.x + // FLOOR tmp3.x, tmp2.x + + pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp3; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.x, tmp3.x + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp3; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // ADD tmp3.x, tmp2.x, -tmp3.x + // EX2 dst.y, tmp3.x + + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp3; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = DST_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp3; + + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + neg_PVSSRC(&(pAsm->S[1].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp3; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.z, tmp2.x + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.w 1.0 + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm) { int tmp, ii; @@ -2880,6 +3602,11 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + /* dst.y = max(src.x, 0.0) */ pAsm->D.dst.opcode = SQ_OP2_INST_MAX; pAsm->D.dst.rtype = dstType; @@ -2891,11 +3618,7 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_X; - pAsm->S[0].src.swizzley = SQ_SEL_X; - pAsm->S[0].src.swizzlez = SQ_SEL_X; - pAsm->S[0].src.swizzlew = SQ_SEL_X; + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = tmp; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); @@ -2909,34 +3632,47 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* before: dst.w = log(src.y) - * after : dst.x = log(src.y) - * why change dest register is that dst.w has been initialized as 1 before - */ + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y); + + /* dst.z = log(src.y) */ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED; pAsm->D.dst.math = 1; pAsm->D.dst.rtype = dstType; pAsm->D.dst.reg = dstReg; - pAsm->D.dst.writex = 1; + pAsm->D.dst.writex = 0; pAsm->D.dst.writey = 0; - pAsm->D.dst.writez = 0; + pAsm->D.dst.writez = 1; pAsm->D.dst.writew = 0; pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_Y; - pAsm->S[0].src.swizzley = SQ_SEL_Y; - pAsm->S[0].src.swizzlez = SQ_SEL_Y; - pAsm->S[0].src.swizzlew = SQ_SEL_Y; if( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } - /* before: tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */ - /* after : tmp.x = amd MUL_LIT(src.w, dst.x, src.x ) */ + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 2) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W); + + swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); + + /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT; + pAsm->D.dst.math = 1; pAsm->D.dst.op3 = 1; pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; @@ -2948,29 +3684,19 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_W; - pAsm->S[0].src.swizzley = SQ_SEL_W; - pAsm->S[0].src.swizzlez = SQ_SEL_W; - pAsm->S[0].src.swizzlew = SQ_SEL_W; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; pAsm->S[1].src.reg = dstReg; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); noneg_PVSSRC(&(pAsm->S[1].src)); - pAsm->S[1].src.swizzlex = SQ_SEL_X; - pAsm->S[1].src.swizzley = SQ_SEL_X; - pAsm->S[1].src.swizzlez = SQ_SEL_X; - pAsm->S[1].src.swizzlew = SQ_SEL_X; + pAsm->S[1].src.swizzlex = SQ_SEL_Z; + pAsm->S[1].src.swizzley = SQ_SEL_Z; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_Z; pAsm->S[2].src.rtype = srcType; pAsm->S[2].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); - noneg_PVSSRC(&(pAsm->S[2].src)); - pAsm->S[2].src.swizzlex = SQ_SEL_X; - pAsm->S[2].src.swizzley = SQ_SEL_X; - pAsm->S[2].src.swizzlez = SQ_SEL_X; - pAsm->S[2].src.swizzlew = SQ_SEL_X; if( GL_FALSE == next_ins(pAsm) ) { @@ -3229,77 +3955,137 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE); } -GLboolean assemble_SIN(r700_AssemblerBase *pAsm) -{ - return assemble_math_function(pAsm, SQ_OP2_INST_SIN); -} - GLboolean assemble_SCS(r700_AssemblerBase *pAsm) { BITS tmp; - checkop1(pAsm); + checkop1(pAsm); - tmp = gethelpr(pAsm); + tmp = gethelpr(pAsm); + /* tmp.x = src /2*PI */ + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; - // COS tmp.x, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_COS; - pAsm->D.dst.math = 1; + assemble_src(pAsm, 0, -1); - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + next_ins(pAsm); - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + // COS dst.x, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_COS; + pAsm->D.dst.math = 1; - // SIN tmp.y, a.x - pAsm->D.dst.opcode = SQ_OP2_INST_SIN; - pAsm->D.dst.math = 1; + assemble_dst(pAsm); + /* mask y */ + pAsm->D.dst.writey = 0; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writey = 1; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) - { - return GL_FALSE; - } + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } - if( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } + // SIN dst.y, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_SIN; + pAsm->D.dst.math = 1; - // MOV dst.mask, tmp - pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + assemble_dst(pAsm); + /* mask x */ + pAsm->D.dst.writex = 0; - if( GL_FALSE == assemble_dst(pAsm) ) - { - return GL_FALSE; - } + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - pAsm->S[0].src.rtype = DST_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } - noswizzle_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlez = SQ_SEL_0; - pAsm->S[0].src.swizzlew = SQ_SEL_0; + return GL_TRUE; +} - if ( GL_FALSE == next_ins(pAsm) ) - { - return GL_FALSE; - } +GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = opcode; + //pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + pAsm->D.dst.predicated = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->uHelpReg; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number; + pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7; + noneg_PVSSRC(&(pAsm->S[0].src)); + + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = pAsm->uHelpReg; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_0; + pAsm->S[1].src.swizzley = SQ_SEL_0; + pAsm->S[1].src.swizzlez = SQ_SEL_0; + pAsm->S[1].src.swizzlew = SQ_SEL_0; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } return GL_TRUE; } @@ -3376,9 +4162,13 @@ GLboolean assemble_STP(r700_AssemblerBase *pAsm) GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { GLboolean src_const; + GLboolean need_barrier = GL_FALSE; + checkop1(pAsm); + switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) { + case PROGRAM_UNIFORM: case PROGRAM_CONSTANT: case PROGRAM_LOCAL_PARAM: case PROGRAM_ENV_PARAM: @@ -3396,29 +4186,170 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { if ( GL_FALSE == mov_temp(pAsm, 0) ) return GL_FALSE; + need_barrier = GL_TRUE; } - switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) + if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) { - case OPCODE_TEX: - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; - break; - case OPCODE_TXB: - radeon_error("do not support TXB yet\n"); + GLuint tmp = gethelpr(pAsm); + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + pAsm->D.dst.math = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { return GL_FALSE; - break; - case OPCODE_TXP: - /* TODO : tex proj version : divid first 3 components by 4th */ - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; - break; - default: - radeon_error("Internal error: bad texture op (not TEX)\n"); + } + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W); + if( GL_FALSE == next_ins(pAsm) ) + { return GL_FALSE; - break; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->aArgSubst[1] = tmp; + need_barrier = GL_TRUE; } + if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX ) + { + GLuint tmp1 = gethelpr(pAsm); + GLuint tmp2 = gethelpr(pAsm); + + /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ + pAsm->D.dst.opcode = SQ_OP2_INST_CUBE; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* tmp1.z = RCP_e(|tmp1.z|) */ + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + pAsm->D.dst.math = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writez = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + pAsm->S[0].src.swizzlex = SQ_SEL_Z; + pAsm->S[0].src.abs = 1; + + next_ins(pAsm); + + /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x + * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x + * muladd has no writemask, have to use another temp + */ + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z); + setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); + /* immediate c 1.5 */ + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1.5F; + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + pAsm->S[2].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X); + + next_ins(pAsm); + + /* tmp1.xy = temp2.xy */ + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + pAsm->aArgSubst[1] = tmp1; + need_barrier = GL_TRUE; + + } + + if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXB) + { + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L; + } + else + { + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + } + + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) + + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) + { + pAsm->need_tex_barrier = GL_TRUE; + } // Set src1 to tex unit id - pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; + pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit]; pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; //No sw info from mesa compiler, so hard code here. @@ -3437,10 +4368,25 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) return GL_FALSE; } - if ( GL_FALSE == next_ins(pAsm) ) + if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) { - return GL_FALSE; + /* hopefully did swizzles before */ + noswizzle_PVSSRC(&(pAsm->S[0].src)); + } + + if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX) + { + /* SAMPLE dst, tmp.yxwy, CUBE */ + pAsm->S[0].src.swizzlex = SQ_SEL_Y; + pAsm->S[0].src.swizzley = SQ_SEL_X; + pAsm->S[0].src.swizzlez = SQ_SEL_W; + pAsm->S[0].src.swizzlew = SQ_SEL_Y; } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } return GL_TRUE; } @@ -3560,27 +4506,909 @@ GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_IF(r700_AssemblerBase *pAsm) +static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason) +{ + switch (uReason) + { + case FC_PUSH_VPM: + pAsm->CALLSTACK[pAsm->CALLSP].current--; + break; + case FC_PUSH_WQM: + pAsm->CALLSTACK[pAsm->CALLSP].current -= 4; + break; + case FC_LOOP: + pAsm->CALLSTACK[pAsm->CALLSP].current -= 4; + break; + case FC_REP: + /* TODO : for 16 vp asic, should -= 2; */ + pAsm->CALLSTACK[pAsm->CALLSP].current -= 1; + break; + }; +} + +static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly) +{ + if(GL_TRUE == bCheckMaxOnly) + { + switch (uReason) + { + case FC_PUSH_VPM: + if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1) + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current + 1; + } + break; + case FC_PUSH_WQM: + if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4) + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current + 4; + } + break; + } + return; + } + + switch (uReason) + { + case FC_PUSH_VPM: + pAsm->CALLSTACK[pAsm->CALLSP].current++; + break; + case FC_PUSH_WQM: + pAsm->CALLSTACK[pAsm->CALLSP].current += 4; + break; + case FC_LOOP: + pAsm->CALLSTACK[pAsm->CALLSP].current += 4; + break; + case FC_REP: + /* TODO : for 16 vp asic, should += 2; */ + pAsm->CALLSTACK[pAsm->CALLSP].current += 1; + break; + }; + + if(pAsm->CALLSTACK[pAsm->CALLSP].current + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current; + } +} + +GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset; + + return GL_TRUE; +} + +GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + + return GL_TRUE; +} + +GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) { + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + if(GL_TRUE != bHasElse) + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + } + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_IF; + pAsm->fc_stack[pAsm->FCSP].mid = NULL; + pAsm->fc_stack[pAsm->FCSP].midLen= 0; + pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; + +#ifndef USE_CF_FOR_POP_AFTER + if(GL_TRUE != bHasElse) + { + pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER; + } +#endif /* USE_CF_FOR_POP_AFTER */ + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE); + + return GL_TRUE; +} + +GLboolean assemble_ELSE(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; /// + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid, + 0, + sizeof(R700ControlFlowGenericClause *) ); + pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr; + //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1; + +#ifndef USE_CF_FOR_POP_AFTER + pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER; +#endif /* USE_CF_FOR_POP_AFTER */ + + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1; + return GL_TRUE; } GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm) { +#ifdef USE_CF_FOR_POP_AFTER + pops(pAsm, 1); +#endif /* USE_CF_FOR_POP_AFTER */ + + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + if(NULL == pAsm->fc_stack[pAsm->FCSP].mid) + { + /* no else in between */ + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode; + } + else + { + pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode; + } + + if(NULL != pAsm->fc_stack[pAsm->FCSP].mid) + { + FREE(pAsm->fc_stack[pAsm->FCSP].mid); + } + + if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF) + { + radeon_error("if/endif in shader code are not paired. \n"); + return GL_FALSE; + } + + pAsm->FCSP--; + + decreaseCurrent(pAsm, FC_PUSH_VPM); + return GL_TRUE; } -GLboolean AssembleInstr(GLuint uiNumberInsts, +GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP; + pAsm->fc_stack[pAsm->FCSP].mid = NULL; + pAsm->fc_stack[pAsm->FCSP].unNumMid = 0; + pAsm->fc_stack[pAsm->FCSP].midLen = 0; + pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; + + checkStackDepth(pAsm, FC_LOOP, GL_FALSE); + + return GL_TRUE; +} + +GLboolean assemble_BRK(r700_AssemblerBase *pAsm) +{ +#ifdef USE_CF_FOR_CONTINUE_BREAK + + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + unsigned int unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + break; + } + } + if(0 == FC_LOOP) + { + radeon_error("Break is not inside loop/endloop pair.\n"); + return GL_FALSE; + } + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + +#endif //USE_CF_FOR_CONTINUE_BREAK + return GL_TRUE; +} + +GLboolean assemble_CONT(r700_AssemblerBase *pAsm) +{ +#ifdef USE_CF_FOR_CONTINUE_BREAK + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + unsigned int unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + break; + } + } + if(0 == FC_LOOP) + { + radeon_error("Continue is not inside loop/endloop pair.\n"); + return GL_FALSE; + } + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + +#endif /* USE_CF_FOR_CONTINUE_BREAK */ + + return GL_TRUE; +} + +GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm) +{ + GLuint i; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1; + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + +#ifdef USE_CF_FOR_CONTINUE_BREAK + for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++) + { + pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex; + } + if(NULL != pAsm->fc_stack[pAsm->FCSP].mid) + { + FREE(pAsm->fc_stack[pAsm->FCSP].mid); + } +#endif + + if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP) + { + radeon_error("loop/endloop in shader code are not paired. \n"); + return GL_FALSE; + } + + GLuint unFCSP; + GLuint unIF = 0; + if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0) + { + for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + breakLoopOnFlag(pAsm, unFCSP); + break; + } + else if(FC_IF == pAsm->fc_stack[unFCSP].type) + { + unIF++; + } + } + if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry) + { +#ifdef USE_CF_FOR_POP_AFTER + returnOnFlag(pAsm, unIF); +#else + returnOnFlag(pAsm, 0); +#endif /* USE_CF_FOR_POP_AFTER */ + pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET; + } + } + + pAsm->FCSP--; + + decreaseCurrent(pAsm, FC_LOOP); + + return GL_TRUE; +} + +void add_return_inst(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; +} + +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift) +{ + /* Put in sub */ + if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize ) + { + pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs, + sizeof(SUB_OFFSET) * pAsm->unSubArraySize, + sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) ); + if(NULL == pAsm->subs) + { + return GL_FALSE; + } + pAsm->unSubArraySize += 10; + } + + pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0; + + pAsm->CALLSP++; + pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer; + pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP; + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local + = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local); + pAsm->CALLSTACK[pAsm->CALLSP].max = 0; + pAsm->CALLSTACK[pAsm->CALLSP].current = 0; + SetActiveCFlist(pAsm->pR700Shader, + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); + + pAsm->unSubArrayPointer++; + + /* start sub */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_REP; + + checkStackDepth(pAsm, FC_REP, GL_FALSE); + + return GL_TRUE; +} + +GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm) +{ + if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP) + { + radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n"); + return GL_FALSE; + } + + /* copy max to sub structure */ + pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax + = pAsm->CALLSTACK[pAsm->CALLSP].max; + + decreaseCurrent(pAsm, FC_REP); + + pAsm->CALLSP--; + SetActiveCFlist(pAsm->pR700Shader, + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); + + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + pAsm->FCSP--; + + return GL_TRUE; +} + +GLboolean assemble_RET(r700_AssemblerBase *pAsm) +{ + GLuint unIF = 0; + + if(pAsm->CALLSP > 0) + { /* in sub */ + GLuint unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + setRetInLoopFlag(pAsm, SQ_SEL_1); + breakLoopOnFlag(pAsm, unFCSP); + pAsm->unCFflags |= LOOPRET_FLAGS; + + return GL_TRUE; + } + else if(FC_IF == pAsm->fc_stack[unFCSP].type) + { + unIF++; + } + } + } + +#ifdef USE_CF_FOR_POP_AFTER + if(unIF > 0) + { + pops(pAsm, unIF); + } +#endif /* USE_CF_FOR_POP_AFTER */ + + add_return_inst(pAsm); + + return GL_TRUE; +} + +GLboolean assemble_CAL(r700_AssemblerBase *pAsm, + GLint nILindex, + GLuint uiIL_Shift, + GLuint uiNumberInsts, + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc) +{ + GLint uiIL_Offset; + + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + /* Put in caller */ + if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize ) + { + pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers, + sizeof(CALLER_POINTER) * pAsm->unCallerArraySize, + sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) ); + if(NULL == pAsm->callers) + { + return GL_FALSE; + } + pAsm->unCallerArraySize += 10; + } + + uiIL_Offset = nILindex + uiIL_Shift; + pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset; + pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr; + + pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL; + pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL; + + pAsm->unCallerArrayPointer++; + + int j; + GLuint max; + GLuint unSubID; + GLboolean bRet; + for(j=0; j<pAsm->unSubArrayPointer; j++) + { + if(uiIL_Offset == pAsm->subs[j].subIL_Offset) + { /* compiled before */ + + max = pAsm->subs[j].unStackDepthMax + + pAsm->CALLSTACK[pAsm->CALLSP].current; + if(max > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = max; + } + + pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j; + return GL_TRUE; + } + } + + pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer; + unSubID = pAsm->unSubArrayPointer; + + bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm); + + if(GL_TRUE == bRet) + { + max = pAsm->subs[unSubID].unStackDepthMax + + pAsm->CALLSTACK[pAsm->CALLSP].current; + if(max > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = max; + } + + pAsm->subs[unSubID].pPresubDesc = pPresubDesc; + } + + return bRet; +} + +GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) +{ + GLfloat fLiteral[2] = {0.1, 0.0}; + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + pAsm->D.dst.op3 = 0; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->flag_reg_index; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + pAsm->D2.dst2.literal_slots = 1; + pAsm->D2.dst2.SaturateMode = SATURATE_OFF; + pAsm->D.dst.predicated = 0; + /* in reloc where dislink flag init inst, only one slot alu inst is handled. */ + pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */ + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ +#if 0 + pAsm->S[0].src.rtype = SRC_REC_LITERAL; + //pAsm->S[0].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_Y; + pAsm->S[0].src.swizzlez = SQ_SEL_Z; + pAsm->S[0].src.swizzlew = SQ_SEL_W; + + if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) ) + { + return GL_FALSE; + } +#else + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = flagValue; + pAsm->S[0].src.swizzley = flagValue; + pAsm->S[0].src.swizzlez = flagValue; + pAsm->S[0].src.swizzlew = flagValue; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } +#endif + + return GL_TRUE; +} + +GLboolean testFlag(r700_AssemblerBase *pAsm) +{ + GLfloat fLiteral[2] = {0.1, 0.0}; + + //Test flag + GLuint tmp = gethelpr(pAsm); + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE; + pAsm->D.dst.math = 1; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + pAsm->D2.dst2.literal_slots = 1; + pAsm->D2.dst2.SaturateMode = SATURATE_OFF; + pAsm->D.dst.predicated = 1; + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ + + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->flag_reg_index; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_Y; + pAsm->S[0].src.swizzlez = SQ_SEL_Z; + pAsm->S[0].src.swizzlew = SQ_SEL_W; +#if 0 + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + //pAsm->S[1].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_X; + pAsm->S[1].src.swizzley = SQ_SEL_Y; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_W; + + if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) ) + { + return GL_FALSE; + } +#else + pAsm->S[1].src.rtype = DST_REG_TEMPORARY; + pAsm->S[1].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_1; + pAsm->S[1].src.swizzley = SQ_SEL_1; + pAsm->S[1].src.swizzlez = SQ_SEL_1; + pAsm->S[1].src.swizzlew = SQ_SEL_1; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } +#endif + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + + return GL_TRUE; +} + +GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF) +{ + testFlag(pAsm); + jumpToOffest(pAsm, 1, 4); + setRetInLoopFlag(pAsm, SQ_SEL_0); + pops(pAsm, unIF + 1); + add_return_inst(pAsm); + + return GL_TRUE; +} + +GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP) +{ + testFlag(pAsm); + + //break + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + pops(pAsm, 1); + + return GL_TRUE; +} + +GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, + GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode) { GLuint i; pR700AsmCode->pILInst = pILInst; - for(i=0; i<uiNumberInsts; i++) + for(i=uiFirstInst; i<uiNumberInsts; i++) { pR700AsmCode->uiCurInst = i; +#ifndef USE_CF_FOR_CONTINUE_BREAK + if(OPCODE_BRK == pILInst[i+1].Opcode) + { + switch(pILInst[i].Opcode) + { + case OPCODE_SLE: + pILInst[i].Opcode = OPCODE_SGT; + break; + case OPCODE_SLT: + pILInst[i].Opcode = OPCODE_SGE; + break; + case OPCODE_SGE: + pILInst[i].Opcode = OPCODE_SLT; + break; + case OPCODE_SGT: + pILInst[i].Opcode = OPCODE_SLE; + break; + case OPCODE_SEQ: + pILInst[i].Opcode = OPCODE_SNE; + break; + case OPCODE_SNE: + pILInst[i].Opcode = OPCODE_SEQ; + break; + default: + break; + } + } +#endif + if(pILInst[i].CondUpdate == 1) + { + /* remember dest register used for cond evaluation */ + /* XXX also handle PROGRAM_OUTPUT registers here? */ + pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index; + } + switch (pILInst[i].Opcode) { case OPCODE_ABS: @@ -3594,8 +5422,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, break; case OPCODE_ARL: - radeon_error("Not yet implemented instruction OPCODE_ARL \n"); - //if ( GL_FALSE == assemble_BAD("ARL") ) + if ( GL_FALSE == assemble_ARL(pR700AsmCode) ) return GL_FALSE; break; case OPCODE_ARR: @@ -3609,7 +5436,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_COS: - if ( GL_FALSE == assemble_COS(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) ) return GL_FALSE; break; @@ -3630,16 +5457,16 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_EXP: - radeon_error("Not yet implemented instruction OPCODE_EXP \n"); - //if ( GL_FALSE == assemble_BAD("EXP") ) + if ( GL_FALSE == assemble_EXP(pR700AsmCode) ) return GL_FALSE; - break; // approx of EX2 + break; case OPCODE_FLR: if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) return GL_FALSE; break; - //case OP_FLR_INT: + //case OP_FLR_INT: ; + // if ( GL_FALSE == assemble_FLR_INT() ) // return GL_FALSE; // break; @@ -3650,7 +5477,8 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, break; case OPCODE_KIL: - if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) + case OPCODE_KIL_NV: + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) return GL_FALSE; break; case OPCODE_LG2: @@ -3666,10 +5494,9 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_LOG: - radeon_error("Not yet implemented instruction OPCODE_LOG \n"); - //if ( GL_FALSE == assemble_BAD("LOG") ) + if ( GL_FALSE == assemble_LOG(pR700AsmCode) ) return GL_FALSE; - break; // approx of LG2 + break; case OPCODE_MAD: if ( GL_FALSE == assemble_MAD(pR700AsmCode) ) @@ -3691,6 +5518,26 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, case OPCODE_MUL: if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) return GL_FALSE; + break; + + case OPCODE_NOISE1: + { + callPreSub(pR700AsmCode, + GLSL_NOISE1, + &noise1_presub, + pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number, + 1); + radeon_error("noise1: not yet supported shader instruction\n"); + }; + break; + case OPCODE_NOISE2: + radeon_error("noise2: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE3: + radeon_error("noise3: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE4: + radeon_error("noise4: not yet supported shader instruction\n"); break; case OPCODE_POW: @@ -3706,22 +5553,78 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_SIN: - if ( GL_FALSE == assemble_SIN(pR700AsmCode) ) + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) ) return GL_FALSE; break; case OPCODE_SCS: if ( GL_FALSE == assemble_SCS(pR700AsmCode) ) return GL_FALSE; - break; + break; + + case OPCODE_SEQ: + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) + { + return GL_FALSE; + } + break; + + case OPCODE_SGT: + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) + { + return GL_FALSE; + } + break; case OPCODE_SGE: if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) + { return GL_FALSE; - break; + } + break; + + /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/ case OPCODE_SLT: - if ( GL_FALSE == assemble_SLT(pR700AsmCode) ) + { + struct prog_src_register SrcRegSave[2]; + SrcRegSave[0] = pILInst[i].SrcReg[0]; + SrcRegSave[1] = pILInst[i].SrcReg[1]; + pILInst[i].SrcReg[0] = SrcRegSave[1]; + pILInst[i].SrcReg[1] = SrcRegSave[0]; + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + } + break; + + case OPCODE_SLE: + { + struct prog_src_register SrcRegSave[2]; + SrcRegSave[0] = pILInst[i].SrcReg[0]; + SrcRegSave[1] = pILInst[i].SrcReg[1]; + pILInst[i].SrcReg[0] = SrcRegSave[1]; + pILInst[i].SrcReg[1] = SrcRegSave[0]; + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + } + break; + + case OPCODE_SNE: + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) + { return GL_FALSE; - break; + } + break; //case OP_STP: // if ( GL_FALSE == assemble_STP(pR700AsmCode) ) @@ -3755,30 +5658,104 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; + case OPCODE_TRUNC: + if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) ) + return GL_FALSE; + break; + case OPCODE_XPD: if ( GL_FALSE == assemble_XPD(pR700AsmCode) ) return GL_FALSE; break; case OPCODE_IF : - if ( GL_FALSE == assemble_IF(pR700AsmCode) ) - return GL_FALSE; + { + GLboolean bHasElse = GL_FALSE; + + if(pILInst[pILInst[i].BranchTarget - 1].Opcode == OPCODE_ELSE) + { + bHasElse = GL_TRUE; + } + + if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) ) + { + return GL_FALSE; + } + } break; + case OPCODE_ELSE : - radeon_error("Not yet implemented instruction OPCODE_ELSE \n"); - //if ( GL_FALSE == assemble_BAD("ELSE") ) + if ( GL_FALSE == assemble_ELSE(pR700AsmCode) ) return GL_FALSE; break; + case OPCODE_ENDIF: if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) ) return GL_FALSE; break; + case OPCODE_BGNLOOP: + if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_BRK: + if( GL_FALSE == assemble_BRK(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_CONT: + if( GL_FALSE == assemble_CONT(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_ENDLOOP: + if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_BGNSUB: + if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) ) + { + return GL_FALSE; + } + break; + + case OPCODE_RET: + if( GL_FALSE == assemble_RET(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_CAL: + if( GL_FALSE == assemble_CAL(pR700AsmCode, + pILInst[i].BranchTarget, + uiIL_Shift, + uiNumberInsts, + pILInst, + NULL) ) + { + return GL_FALSE; + } + break; + //case OPCODE_EXPORT: // if ( GL_FALSE == assemble_EXPORT() ) // return GL_FALSE; // break; + case OPCODE_ENDSUB: + return assemble_ENDSUB(pR700AsmCode); + case OPCODE_END: //pR700AsmCode->uiCurInst = i; //This is to remaind that if in later exoort there is depth/stencil @@ -3795,6 +5772,417 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_TRUE; } +GLboolean InitShaderProgram(r700_AssemblerBase * pAsm) +{ + setRetInLoopFlag(pAsm, SQ_SEL_0); + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + return GL_TRUE; +} + +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg) +{ + GLuint i; + GLuint unCFoffset; + TypedShaderList * plstCFmain; + TypedShaderList * plstCFsub; + + R700ShaderInstruction * pInst; + R700ControlFlowGenericClause * pCFInst; + + R700ControlFlowALUClause * pCF_ALU; + R700ALUInstruction * pALU; + GLuint unConstOffset = 0; + GLuint unRegOffset; + GLuint unMinRegIndex; + + plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; + + /* remove flags init if they are not used */ + if((pAsm->unCFflags & HAS_LOOPRET) == 0) + { + R700ControlFlowALUClause * pCF_ALU; + pInst = plstCFmain->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + if(0 == pCF_ALU->m_Word1.f.count) + { + pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP; + } + else + { + R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction; + + pALU->m_pLinkedALUClause = NULL; + pALU = (R700ALUInstruction *)(pALU->pNextInst); + pALU->m_pLinkedALUClause = pCF_ALU; + pCF_ALU->m_pLinkedALUInstruction = pALU; + + pCF_ALU->m_Word1.f.count--; + } + break; + } + pInst = pInst->pNextInst; + }; + } + + if(pAsm->CALLSTACK[0].max > 0) + { + pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2; + } + + if(0 == pAsm->unSubArrayPointer) + { + return GL_TRUE; + } + + unCFoffset = plstCFmain->uNumOfNode; + + if(NULL != pILProg->Parameters) + { + unConstOffset = pILProg->Parameters->NumParameters; + } + + /* Reloc subs */ + for(i=0; i<pAsm->unSubArrayPointer; i++) + { + pAsm->subs[i].unCFoffset = unCFoffset; + plstCFsub = &(pAsm->subs[i].lstCFInstructions_local); + + pInst = plstCFsub->pHead; + + /* reloc instructions */ + while(pInst) + { + if(SIT_CF_GENERIC == pInst->m_ShaderInstType) + { + pCFInst = (R700ControlFlowGenericClause *)pInst; + + switch (pCFInst->m_Word1.f.cf_inst) + { + case SQ_CF_INST_POP: + case SQ_CF_INST_JUMP: + case SQ_CF_INST_ELSE: + case SQ_CF_INST_LOOP_END: + case SQ_CF_INST_LOOP_START: + case SQ_CF_INST_LOOP_START_NO_AL: + case SQ_CF_INST_LOOP_CONTINUE: + case SQ_CF_INST_LOOP_BREAK: + pCFInst->m_Word0.f.addr += unCFoffset; + break; + default: + break; + } + } + + pInst->m_uIndex += unCFoffset; + + pInst = pInst->pNextInst; + }; + + if(NULL != pAsm->subs[i].pPresubDesc) + { + GLuint uNumSrc; + + unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg; + unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart; + + pInst = plstCFsub->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + + if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src0_sel += unConstOffset; + } + + if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F) + >= SQ_OP3_INST_MUL_LIT ) + { /* op3 : 3 srcs */ + if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word1_OP3.f.src2_sel += unConstOffset; + } + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + else + { + if(pAsm->bR6xx) + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0); + } + else + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0); + } + if(2 == uNumSrc) + { /* 2 srcs */ + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + } + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + pInst = pInst->pNextInst; + }; + } + + /* Put sub into main */ + plstCFmain->pTail->pNextInst = plstCFsub->pHead; + plstCFmain->pTail = plstCFsub->pTail; + plstCFmain->uNumOfNode += plstCFsub->uNumOfNode; + + unCFoffset += plstCFsub->uNumOfNode; + } + + /* reloc callers */ + for(i=0; i<pAsm->unCallerArrayPointer; i++) + { + pAsm->callers[i].cf_ptr->m_Word0.f.addr + = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset; + + if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc) + { + unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg; + + if(NULL != pAsm->callers[i].prelude_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + if(NULL != pAsm->callers[i].finale_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + } + } + + return GL_TRUE; +} + +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc) +{ + /* save assemble context */ + GLuint starting_temp_register_number_save; + GLuint number_used_registers_save; + GLuint uFirstHelpReg_save; + GLuint uHelpReg_save; + GLuint uiCurInst_save; + struct prog_instruction *pILInst_save; + PRESUB_DESC * pPresubDesc; + GLboolean bRet; + int i; + + R700ControlFlowGenericClause* prelude_cf_ptr = NULL; + + /* copy srcs to presub inputs */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + for(i=0; i<uNumValidSrc; i++) + { + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i]; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, i, 0) ) + { + return GL_FALSE; + } + + next_ins(pAsm); + } + if(uNumValidSrc > 0) + { + prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + /* browse thro existing presubs. */ + for(i=0; i<pAsm->unNumPresub; i++) + { + if(pAsm->presubs[i].sptSigniture == scriptSigniture) + { + break; + } + } + + if(i == pAsm->unNumPresub) + { /* not loaded yet */ + /* save assemble context */ + number_used_registers_save = pAsm->number_used_registers; + uFirstHelpReg_save = pAsm->uFirstHelpReg; + uHelpReg_save = pAsm->uHelpReg; + starting_temp_register_number_save = pAsm->starting_temp_register_number; + pILInst_save = pAsm->pILInst; + uiCurInst_save = pAsm->uiCurInst; + + /* alloc in presub */ + if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize ) + { + pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs, + sizeof(PRESUB_DESC) * pAsm->unPresubArraySize, + sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) ); + if(NULL == pAsm->presubs) + { + radeon_error("No memeory to allocate built in shader function description structures. \n"); + return GL_FALSE; + } + pAsm->unPresubArraySize += 4; + } + + pPresubDesc = &(pAsm->presubs[i]); + pPresubDesc->sptSigniture = scriptSigniture; + + /* constants offsets need to be final resolved at reloc. */ + if(0 == pAsm->unNumPresub) + { + pPresubDesc->unConstantsStart = 0; + } + else + { + pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart + + pAsm->presubs[i-1].pCompiledSub->NumParameters; + } + + pPresubDesc->pCompiledSub = pCompiledSub; + + pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts; + pPresubDesc->maxStartReg = uFirstHelpReg_save; + pAsm->unCurNumILInsts += pCompiledSub->NumInstructions; + + pAsm->unNumPresub++; + + /* setup new assemble context */ + pAsm->starting_temp_register_number = 0; + pAsm->number_used_registers = pCompiledSub->NumTemporaries; + pAsm->uFirstHelpReg = pAsm->number_used_registers; + pAsm->uHelpReg = pAsm->uFirstHelpReg; + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + + + pPresubDesc->number_used_registers = pAsm->number_used_registers; + + /* restore assemble context */ + pAsm->number_used_registers = number_used_registers_save; + pAsm->uFirstHelpReg = uFirstHelpReg_save; + pAsm->uHelpReg = uHelpReg_save; + pAsm->starting_temp_register_number = starting_temp_register_number_save; + pAsm->pILInst = pILInst_save; + pAsm->uiCurInst = uiCurInst_save; + } + else + { /* was loaded */ + pPresubDesc = &(pAsm->presubs[i]); + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + } + + if(GL_FALSE == bRet) + { + radeon_error("Shader presub assemble failed. \n"); + } + else + { + /* copy presub output to real dst */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pCompiledSub->dstRegIndex; + pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX; + pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY; + pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ; + pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW; + + next_ins(pAsm); + + pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr; + pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers ) + { + pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg; + } + if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg) + { + pPresubDesc->maxStartReg = pAsm->uFirstHelpReg; + } + + return bRet; +} + GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, GLuint export_starting_index, @@ -3940,6 +6328,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten) { unsigned int unBit; + GLuint export_count = 0; if(pR700AsmCode->depth_export_register_number >= 0) { @@ -3961,6 +6350,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, { return GL_FALSE; } + export_count++; } unBit = 1 << FRAG_RESULT_DEPTH; if(OutputsWritten & unBit) @@ -3974,8 +6364,15 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, { return GL_FALSE; } + export_count++; } - + /* Need to export something, otherwise we'll hang + * results are undefined anyway */ + if(export_count == 0) + { + Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE); + } + if(pR700AsmCode->cf_last_export_ptr != NULL) { pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; @@ -4080,6 +6477,25 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, export_starting_index++; } } + + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[i], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + } // At least one param should be exported if (export_count) @@ -4114,6 +6530,21 @@ GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode) { FREE(pR700AsmCode->pucOutMask); FREE(pR700AsmCode->pInstDeps); + + if(NULL != pR700AsmCode->subs) + { + FREE(pR700AsmCode->subs); + } + if(NULL != pR700AsmCode->callers) + { + FREE(pR700AsmCode->callers); + } + + if(NULL != pR700AsmCode->presubs) + { + FREE(pR700AsmCode->presubs); + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index f9c4d849c6..6ef945dfda 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -34,6 +34,45 @@ #include "r700_shaderinst.h" #include "r700_shader.h" +typedef enum LOADABLE_SCRIPT_SIGNITURE +{ + GLSL_NOISE1 = 0x10000001, + GLSL_NOISE2 = 0x10000002, + GLSL_NOISE3 = 0x10000003, + GLSL_NOISE4 = 0x10000004 +}LOADABLE_SCRIPT_SIGNITURE; + +typedef struct COMPILED_SUB +{ + struct prog_instruction *Instructions; + GLuint NumInstructions; + GLuint NumTemporaries; + GLuint NumParameters; + GLuint MinRegIndex; + GLfloat (*ParameterValues)[4]; + GLbyte outputSwizzleX; + GLbyte outputSwizzleY; + GLbyte outputSwizzleZ; + GLbyte outputSwizzleW; + GLshort srcRegIndex[3]; + GLushort dstRegIndex; +}COMPILED_SUB; + +typedef struct PRESUB_DESCtag +{ + LOADABLE_SCRIPT_SIGNITURE sptSigniture; + GLint subIL_Shift; + struct prog_src_register InReg[3]; + struct prog_dst_register OutReg; + + GLushort maxStartReg; + GLushort number_used_registers; + + GLuint unConstantsStart; + + COMPILED_SUB * pCompiledSub; +} PRESUB_DESC; + typedef enum SHADER_PIPE_TYPE { SPT_VP = 0, @@ -72,7 +111,8 @@ typedef enum SrcRegisterType SRC_REG_INPUT = 1, SRC_REG_CONSTANT = 2, SRC_REG_ALT_TEMPORARY = 3, - NUMBER_OF_SRC_REG_TYPE = 4 + SRC_REC_LITERAL = 4, + NUMBER_OF_SRC_REG_TYPE = 5 } SrcRegisterType; typedef enum DstRegisterType @@ -111,16 +151,24 @@ typedef struct PVSDSTtag BITS addrmode1:1; //32 } PVSDST; +typedef struct PVSINSTtag +{ + BITS literal_slots :2; + BITS SaturateMode :2; + BITS index_mode :3; +} PVSINST; + typedef struct PVSSRCtag { - BITS rtype:4; + BITS rtype:3; BITS addrmode0:1; - BITS reg:10; //15 (8) + BITS reg:10; //14 (8) BITS swizzlex:3; BITS swizzley:3; BITS swizzlez:3; - BITS swizzlew:3; //27 + BITS swizzlew:3; //26 + BITS abs:1; BITS negx:1; BITS negy:1; BITS negz:1; @@ -148,6 +196,7 @@ typedef union PVSDWORDtag { BITS bits; PVSDST dst; + PVSINST dst2; PVSSRC src; PVSMATH math; float f; @@ -251,6 +300,8 @@ enum FC_IF = 1, FC_LOOP = 2, FC_REP = 3, + FC_PUSH_VPM = 4, + FC_PUSH_WQM = 5, COND_NONE = 0, COND_BOOL = 1, @@ -263,22 +314,56 @@ enum typedef struct FC_LEVEL { - unsigned int first; ///< first fc instruction on level (if, rep, loop) - unsigned int* mid; ///< middle instructions - else or all breaks on this level - unsigned int midLen; - unsigned int type; - unsigned int cond; - unsigned int inv; - unsigned int bpush; ///< 1 if first instruction does branch stack push - int id; ///< id of bool or int variable + R700ControlFlowGenericClause * first; + R700ControlFlowGenericClause ** mid; + unsigned int unNumMid; + unsigned int midLen; + unsigned int type; + unsigned int cond; + unsigned int inv; + int id; ///< id of bool or int variable } FC_LEVEL; typedef struct VTX_FETCH_METHOD { - GLboolean bEnableMini; - GLuint mega_fetch_remainder; + GLboolean bEnableMini; + GLuint mega_fetch_remainder; } VTX_FETCH_METHOD; +typedef struct SUB_OFFSET +{ + GLint subIL_Offset; + GLuint unCFoffset; + GLuint unStackDepthMax; + PRESUB_DESC * pPresubDesc; + TypedShaderList lstCFInstructions_local; +} SUB_OFFSET; + +typedef struct CALLER_POINTER +{ + GLint subIL_Offset; + GLint subDescIndex; + R700ControlFlowGenericClause* cf_ptr; + + R700ControlFlowGenericClause* prelude_cf_ptr; + R700ControlFlowGenericClause* finale_cf_ptr; +} CALLER_POINTER; + +#define SQ_MAX_CALL_DEPTH 0x00000020 + +typedef struct CALL_LEVEL +{ + unsigned int FCSP_BeforeEntry; + GLint subDescIndex; + GLushort current; + GLushort max; + TypedShaderList * plstCFInstructions_local; +} CALL_LEVEL; + +#define HAS_CURRENT_LOOPRET 0x1L +#define HAS_LOOPRET 0x2L +#define LOOPRET_FLAGS HAS_LOOPRET | HAS_CURRENT_LOOPRET + typedef struct r700_AssemblerBase { R700ControlFlowSXClause* cf_last_export_ptr; @@ -294,14 +379,19 @@ typedef struct r700_AssemblerBase // No clause has been created yet CF_CLAUSE_TYPE cf_current_clause_type; + BITS alu_x_opcode; + GLuint number_of_exports; GLuint number_of_colorandz_exports; GLuint number_of_export_opcodes; PVSDWORD D; + PVSDWORD D2; PVSDWORD S[3]; + PVSDWORD C[4]; unsigned int uLastPosUpdate; + unsigned int last_cond_register; OUT_FRAGMENT_FMT_0 fp_stOutFmt0; @@ -310,6 +400,8 @@ typedef struct r700_AssemblerBase unsigned int number_used_registers; unsigned int uUsedConsts; + unsigned int flag_reg_index; + // Fragment programs unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX]; unsigned int uiFP_OutputMap[FRAG_RESULT_MAX]; @@ -330,9 +422,6 @@ typedef struct r700_AssemblerBase unsigned int FCSP; FC_LEVEL fc_stack[32]; - unsigned int branch_depth; - unsigned int max_branch_depth; - //----------------------------------------------------------------------------------- // ArgSubst used in Assemble_Source() function //----------------------------------------------------------------------------------- @@ -373,7 +462,29 @@ typedef struct r700_AssemblerBase SHADER_PIPE_TYPE currentShaderType; struct prog_instruction * pILInst; GLuint uiCurInst; + GLubyte SamplerUnits[MAX_SAMPLERS]; GLboolean bR6xx; + /* helper to decide which type of instruction to assemble */ + GLboolean is_tex; + /* we inserted helper intructions and need barrier on next TEX ins */ + GLboolean need_tex_barrier; + + SUB_OFFSET * subs; + GLuint unSubArraySize; + GLuint unSubArrayPointer; + CALLER_POINTER * callers; + GLuint unCallerArraySize; + GLuint unCallerArrayPointer; + unsigned int CALLSP; + CALL_LEVEL CALLSTACK[SQ_MAX_CALL_DEPTH]; + + GLuint unCFflags; + + PRESUB_DESC * presubs; + GLuint unPresubArraySize; + GLuint unNumPresub; + GLuint unCurNumILInsts; + } r700_AssemblerBase; //Internal use @@ -395,7 +506,7 @@ BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) ; GLboolean is_reduction_opcode(PVSDWORD * dest); GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size); -unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm); +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3); GLboolean IsTex(gl_inst_opcode Opcode); GLboolean IsAlu(gl_inst_opcode Opcode); @@ -411,6 +522,15 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, GLuint number_of_elements, GLenum dataElementType, VTX_FETCH_METHOD* pFetchMethod); +GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + VTX_FETCH_METHOD * pFetchMethod); +GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm); GLuint gethelpr(r700_AssemblerBase* pAsm); void resethelpr(r700_AssemblerBase* pAsm); void checkop_init(r700_AssemblerBase* pAsm); @@ -433,6 +553,10 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, R700ALUInstruction* alu_instruction_ptr, GLuint contiguous_slots_needed); + +GLboolean add_cf_instruction(r700_AssemblerBase* pAsm); +void add_return_inst(r700_AssemblerBase *pAsm); + void get_src_properties(R700ALUInstruction* alu_instruction_ptr, int source_index, BITS* psrc_sel, @@ -454,21 +578,31 @@ GLboolean check_vector(r700_AssemblerBase* pAsm, R700ALUInstruction* alu_instruction_ptr); GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm); GLboolean next_ins(r700_AssemblerBase *pAsm); + +GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops); +GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset); +GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue); +GLboolean testFlag(r700_AssemblerBase *pAsm); +GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP); +GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF); + GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode); GLboolean assemble_ABS(r700_AssemblerBase *pAsm); GLboolean assemble_ADD(r700_AssemblerBase *pAsm); +GLboolean assemble_ARL(r700_AssemblerBase *pAsm); GLboolean assemble_BAD(char *opcode_str); GLboolean assemble_CMP(r700_AssemblerBase *pAsm); -GLboolean assemble_COS(r700_AssemblerBase *pAsm); GLboolean assemble_DOT(r700_AssemblerBase *pAsm); GLboolean assemble_DST(r700_AssemblerBase *pAsm); GLboolean assemble_EX2(r700_AssemblerBase *pAsm); +GLboolean assemble_EXP(r700_AssemblerBase *pAsm); GLboolean assemble_FLR(r700_AssemblerBase *pAsm); GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm); GLboolean assemble_FRC(r700_AssemblerBase *pAsm); -GLboolean assemble_KIL(r700_AssemblerBase *pAsm); +GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode); GLboolean assemble_LG2(r700_AssemblerBase *pAsm); GLboolean assemble_LRP(r700_AssemblerBase *pAsm); +GLboolean assemble_LOG(r700_AssemblerBase *pAsm); GLboolean assemble_MAD(r700_AssemblerBase *pAsm); GLboolean assemble_LIT(r700_AssemblerBase *pAsm); GLboolean assemble_MAX(r700_AssemblerBase *pAsm); @@ -478,17 +612,37 @@ GLboolean assemble_MUL(r700_AssemblerBase *pAsm); GLboolean assemble_POW(r700_AssemblerBase *pAsm); GLboolean assemble_RCP(r700_AssemblerBase *pAsm); GLboolean assemble_RSQ(r700_AssemblerBase *pAsm); -GLboolean assemble_SIN(r700_AssemblerBase *pAsm); GLboolean assemble_SCS(r700_AssemblerBase *pAsm); GLboolean assemble_SGE(r700_AssemblerBase *pAsm); + +GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode); +GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode); +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode); + GLboolean assemble_SLT(r700_AssemblerBase *pAsm); GLboolean assemble_STP(r700_AssemblerBase *pAsm); GLboolean assemble_TEX(r700_AssemblerBase *pAsm); GLboolean assemble_XPD(r700_AssemblerBase *pAsm); GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm); -GLboolean assemble_IF(r700_AssemblerBase *pAsm); +GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse); +GLboolean assemble_ELSE(r700_AssemblerBase *pAsm); GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm); +GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm); +GLboolean assemble_BRK(r700_AssemblerBase *pAsm); +GLboolean assemble_COND(r700_AssemblerBase *pAsm); +GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm); + +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift); +GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm); +GLboolean assemble_RET(r700_AssemblerBase *pAsm); +GLboolean assemble_CAL(r700_AssemblerBase *pAsm, + GLint nILindex, + GLuint uiIL_Offest, + GLuint uiNumberInsts, + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc); + GLboolean Process_Export(r700_AssemblerBase* pAsm, GLuint type, GLuint export_starting_index, @@ -498,14 +652,25 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm, GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select); +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + /* struct prog_instruction ** pILInstParent, */ + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc); //Interface -GLboolean AssembleInstr(GLuint uiNumberInsts, +GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, + GLuint uiNumberInsts, struct prog_instruction *pILInst, r700_AssemblerBase *pR700AsmCode); GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten); +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg); +GLboolean InitShaderProgram(r700_AssemblerBase * pAsm); + int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader); GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode); diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 06d7e9c9ab..ee2a0a4c8a 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -54,11 +54,15 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { if (ctx->Texture.Unit[i]._ReallyEnabled) { radeonTexObj *t = r700->textures[i]; + uint32_t offset; if (t) { - if (!t->image_override) + if (!t->image_override) { bo = t->mt->bo; - else + offset = get_base_teximage_offset(t); + } else { bo = t->bo; + offset = 0; + } if (bo) { r700SyncSurf(context, bo, @@ -77,7 +81,7 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) R600_OUT_BATCH(r700->textures[i]->SQ_TEX_RESOURCE6); R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE2, bo, - 0, + offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0); R600_OUT_BATCH_RELOC(r700->textures[i]->SQ_TEX_RESOURCE3, bo, @@ -141,17 +145,15 @@ static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom } } +extern int getTypeSize(GLenum type); static void r700SetupVTXConstants(GLcontext * ctx, - unsigned int nStreamID, void * pAos, - unsigned int size, /* number of elements in vector */ - unsigned int stride, - unsigned int count) /* number of vectors in stream */ + StreamDesc * pStreamDesc) { context_t *context = R700_CONTEXT(ctx); struct radeon_aos * paos = (struct radeon_aos *)pAos; + unsigned int nVBsize; BATCH_LOCALS(&context->radeon); - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); unsigned int uSQ_VTX_CONSTANT_WORD0_0; unsigned int uSQ_VTX_CONSTANT_WORD1_0; @@ -171,18 +173,40 @@ static void r700SetupVTXConstants(GLcontext * ctx, else r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit); + if(0 == pStreamDesc->stride) + { + nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type); + } + else + { + nVBsize = paos->count * pStreamDesc->stride; + } + uSQ_VTX_CONSTANT_WORD0_0 = paos->offset; - uSQ_VTX_CONSTANT_WORD1_0 = count * (size * 4) - 1; + uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1; SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */ - SETfield(uSQ_VTX_CONSTANT_WORD2_0, stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, + SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift, SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask); - SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(GL_FLOAT, size, NULL), + SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL), SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift, SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); /* TODO : trace back api for initial data type, not only GL_FLOAT */ - SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED, - SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); - SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + + if(GL_TRUE == pStreamDesc->normalize) + { + SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM, + SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + } + //else + //{ + // SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_INT, + // SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask); + //} + + if(1 == pStreamDesc->_signed) + { + SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit); + } SETfield(uSQ_VTX_CONSTANT_WORD3_0, 1, MEM_REQUEST_SIZE_shift, MEM_REQUEST_SIZE_mask); SETfield(uSQ_VTX_CONSTANT_WORD6_0, SQ_TEX_VTX_VALID_BUFFER, @@ -191,7 +215,7 @@ static void r700SetupVTXConstants(GLcontext * ctx, BEGIN_BATCH_NO_AUTOSTATE(9 + 2); R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 7)); - R600_OUT_BATCH((nStreamID + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); + R600_OUT_BATCH((pStreamDesc->element + SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0); R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0); @@ -208,31 +232,6 @@ static void r700SetupVTXConstants(GLcontext * ctx, } -void r700SetupStreams(GLcontext *ctx) -{ - context_t *context = R700_CONTEXT(ctx); - struct r700_vertex_program *vp = context->selected_vp; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - unsigned int i, j = 0; - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - - R600_STATECHANGE(context, vtx); - - for(i=0; i<VERT_ATTRIB_MAX; i++) { - if(vp->mesa_program->Base.InputsRead & (1 << i)) { - rcommon_emit_vector(ctx, - &context->radeon.tcl.aos[j], - vb->AttribPtr[i]->data, - vb->AttribPtr[i]->size, - vb->AttribPtr[i]->stride, - vb->Count); - j++; - } - } - context->radeon.tcl.aos_count = j; -} - static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) { context_t *context = R700_CONTEXT(ctx); @@ -256,15 +255,12 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) COMMIT_BATCH(); for(i=0; i<VERT_ATTRIB_MAX; i++) { - if(vp->mesa_program->Base.InputsRead & (1 << i)) { - /* currently aos are packed */ - r700SetupVTXConstants(ctx, - i, - (void*)(&context->radeon.tcl.aos[j]), - (unsigned int)context->radeon.tcl.aos[j].components, - (unsigned int)context->radeon.tcl.aos[j].stride * 4, - (unsigned int)context->radeon.tcl.aos[j].count); - j++; + if(vp->mesa_program->Base.InputsRead & (1 << i)) + { + r700SetupVTXConstants(ctx, + (void*)(&context->radeon.tcl.aos[j]), + &(context->stream_desc[j])); + j++; } } } @@ -366,7 +362,6 @@ static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *a rrb = radeon_get_depthbuffer(&context->radeon); if (!rrb || !rrb->bo) { - fprintf(stderr, "no rrb\n"); return; } @@ -408,7 +403,6 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * rrb = radeon_get_colorbuffer(&context->radeon); if (!rrb || !rrb->bo) { - fprintf(stderr, "no rrb\n"); return; } @@ -452,68 +446,77 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom) { - context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_bo * pbo; - BATCH_LOCALS(&context->radeon); - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + struct radeon_bo * pbo; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context)); + pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context)); - if (!pbo) - return; + if (!pbo) + return; - r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3 + 2); - R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); - R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All); - R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All, - pbo, - r700->ps.SQ_PGM_START_PS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1); + R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All); + R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All, + pbo, + r700->ps.SQ_PGM_START_PS.u32All, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(9); - R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(9); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All); + END_BATCH(); - COMMIT_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL(SQ_LOOP_CONST_0, 0x01000FFF); + END_BATCH(); + + COMMIT_BATCH(); } static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom) { - context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); - struct radeon_bo * pbo; - BATCH_LOCALS(&context->radeon); - radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); + struct radeon_bo * pbo; + BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); + pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); - if (!pbo) - return; + if (!pbo) + return; - r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); + r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); - BEGIN_BATCH_NO_AUTOSTATE(3 + 2); - R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); - R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All); - R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All, - pbo, - r700->vs.SQ_PGM_START_VS.u32All, - RADEON_GEM_DOMAIN_GTT, 0, 0); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1); + R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All); + R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All, + pbo, + r700->vs.SQ_PGM_START_VS.u32All, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(6); - R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All); - R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All); - END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(6); + R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All); + R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All); + END_BATCH(); - COMMIT_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3); + R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + 32*4), 0x0100000F); + //R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + (SQ_LOOP_CONST_vs<2)), 0x0100000F); + END_BATCH(); + + COMMIT_BATCH(); } static void r700SendFSState(GLcontext *ctx, struct radeon_state_atom *atom) @@ -794,8 +797,7 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom) BATCH_LOCALS(&context->radeon); radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); - BEGIN_BATCH_NO_AUTOSTATE(23); - R600_OUT_BATCH_REGVAL(DB_HTILE_DATA_BASE, r700->DB_HTILE_DATA_BASE.u32All); + BEGIN_BATCH_NO_AUTOSTATE(17); R600_OUT_BATCH_REGSEQ(DB_STENCIL_CLEAR, 2); R600_OUT_BATCH(r700->DB_STENCIL_CLEAR.u32All); @@ -808,7 +810,6 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom) R600_OUT_BATCH(r700->DB_RENDER_CONTROL.u32All); R600_OUT_BATCH(r700->DB_RENDER_OVERRIDE.u32All); - R600_OUT_BATCH_REGVAL(DB_HTILE_SURFACE, r700->DB_HTILE_SURFACE.u32All); R600_OUT_BATCH_REGVAL(DB_ALPHA_TO_MASK, r700->DB_ALPHA_TO_MASK.u32All); END_BATCH(); @@ -1108,6 +1109,32 @@ static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom) COMMIT_BATCH(); } +static void r700SendQueryBegin(GLcontext *ctx, struct radeon_state_atom *atom) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = radeon->query.current; + BATCH_LOCALS(radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + + /* clear the buffer */ + radeon_bo_map(query->bo, GL_FALSE); + memset(query->bo->ptr, 0, 4 * 2 * sizeof(uint64_t)); /* 4 DBs, 2 qwords each */ + radeon_bo_unmap(query->bo); + + radeon_cs_space_check_with_bo(radeon->cmdbuf.cs, + query->bo, + 0, RADEON_GEM_DOMAIN_GTT); + + BEGIN_BATCH_NO_AUTOSTATE(4 + 2); + R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2)); + R600_OUT_BATCH(ZPASS_DONE); + R600_OUT_BATCH(query->curr_offset); /* hw writes qwords */ + R600_OUT_BATCH(0x00000000); + R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + query->emitted_begin = GL_TRUE; +} + static int check_always(GLcontext *ctx, struct radeon_state_atom *atom) { return atom->cmd_size; @@ -1136,7 +1163,11 @@ static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom) count += 3; if (context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) { - for (ui = 0; ui < R700_MAX_RENDER_TARGETS; ui++) { + /* targets are enabled in r700SetRenderTarget but state + size is calculated before that. Until MRT's are done + hardcode target0 as enabled. */ + count += 3; + for (ui = 1; ui < R700_MAX_RENDER_TARGETS; ui++) { if (r700->render_target[ui].enabled) count += 3; } @@ -1216,6 +1247,20 @@ static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom) return count; } +static int check_queryobj(GLcontext *ctx, struct radeon_state_atom *atom) +{ + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + struct radeon_query_object *query = radeon->query.current; + int count; + + if (!query || query->emitted_begin) + count = 0; + else + count = atom->cmd_size; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); + return count; +} + #define ALLOC_STATE( ATOM, CHK, SZ, EMIT ) \ do { \ context->atoms.ATOM.cmd_size = (SZ); \ @@ -1229,6 +1274,19 @@ do { \ insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \ } while (0) +static void r600_init_query_stateobj(radeonContextPtr radeon, int SZ) +{ + radeon->query.queryobj.cmd_size = (SZ); + radeon->query.queryobj.cmd = NULL; + radeon->query.queryobj.name = "queryobj"; + radeon->query.queryobj.idx = 0; + radeon->query.queryobj.check = check_queryobj; + radeon->query.queryobj.dirty = GL_FALSE; + radeon->query.queryobj.emit = r700SendQueryBegin; + radeon->hw.max_state_size += (SZ); + insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj); +} + void r600InitAtoms(context_t *context) { radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context); @@ -1239,7 +1297,7 @@ void r600InitAtoms(context_t *context) context->radeon.hw.atomlist.name = "atom-list"; ALLOC_STATE(sq, always, 34, r700SendSQConfig); - ALLOC_STATE(db, always, 23, r700SendDBState); + ALLOC_STATE(db, always, 17, r700SendDBState); ALLOC_STATE(stencil, always, 4, r700SendStencilState); ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState); ALLOC_STATE(sc, always, 15, r700SendSCState); @@ -1252,22 +1310,23 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(poly, always, 10, r700SendPolyState); ALLOC_STATE(cb, cb, 18, r700SendCBState); ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState); + ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState); ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState); ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState); - ALLOC_STATE(cb_target, always, 25, r700SendRenderTargetState); ALLOC_STATE(sx, always, 9, r700SendSXState); ALLOC_STATE(vgt, always, 41, r700SendVGTState); ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState); ALLOC_STATE(vpt, always, 16, r700SendViewportState); ALLOC_STATE(fs, always, 18, r700SendFSState); - ALLOC_STATE(vs, always, 18, r700SendVSState); - ALLOC_STATE(ps, always, 21, r700SendPSState); + ALLOC_STATE(vs, always, 21, r700SendVSState); + ALLOC_STATE(ps, always, 24, r700SendPSState); ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts); ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts); ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState); ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState); ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState); ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState); + r600_init_query_stateobj(&context->radeon, 6 * 2); context->radeon.hw.is_dirty = GL_TRUE; context->radeon.hw.all_dirty = GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 78ce3ae436..d15f013710 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -44,12 +44,18 @@ //TODO : Validate FP input with VP output. void Map_Fragment_Program(r700_AssemblerBase *pAsm, - struct gl_fragment_program *mesa_fp) + struct gl_fragment_program *mesa_fp, + GLcontext *ctx) { unsigned int unBit; unsigned int i; GLuint ui; + /* match fp inputs with vp exports. */ + struct r700_vertex_program_cont *vpc = + (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten; + pAsm->number_used_registers = 0; //Input mapping : mesa_fp->Base.InputsRead set the flag, set in @@ -61,32 +67,93 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++; } - unBit = 1 << FRAG_ATTRIB_COL0; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL0; + if(OutputsWritten & unBit) { pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++; } - unBit = 1 << FRAG_ATTRIB_COL1; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL1; + if(OutputsWritten & unBit) { pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++; } - unBit = 1 << FRAG_ATTRIB_FOGC; - if(mesa_fp->Base.InputsRead & unBit) - { - pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++; - } + unBit = 1 << VERT_RESULT_FOGC; + if(OutputsWritten & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++; + } for(i=0; i<8; i++) { - unBit = 1 << (FRAG_ATTRIB_TEX0 + i); - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(OutputsWritten & unBit) { pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++; } } + +/* order has been taken care of */ +#if 1 + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(OutputsWritten & unBit) + { + pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0] = pAsm->number_used_registers++; + } + } +#else + if( (mesa_fp->Base.InputsRead >> FRAG_ATTRIB_VAR0) > 0 ) + { + struct r700_vertex_program_cont *vpc = + (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + struct gl_program_parameter_list * VsVarying = vpc->mesa_program.Base.Varying; + struct gl_program_parameter_list * PsVarying = mesa_fp->Base.Varying; + struct gl_program_parameter * pVsParam; + struct gl_program_parameter * pPsParam; + GLuint j, k; + GLuint unMaxVarying = 0; + + for(i=0; i<VsVarying->NumParameters; i++) + { + pAsm->uiFP_AttributeMap[i + FRAG_ATTRIB_VAR0] = 0; + } + + for(i=FRAG_ATTRIB_VAR0; i<FRAG_ATTRIB_MAX; i++) + { + unBit = 1 << i; + if(mesa_fp->Base.InputsRead & unBit) + { + j = i - FRAG_ATTRIB_VAR0; + pPsParam = PsVarying->Parameters + j; + + for(k=0; k<VsVarying->NumParameters; k++) + { + pVsParam = VsVarying->Parameters + k; + + if( strcmp(pPsParam->Name, pVsParam->Name) == 0) + { + pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers + k; + if(k > unMaxVarying) + { + unMaxVarying = k; + } + break; + } + } + } + } + + pAsm->number_used_registers += unMaxVarying + 1; + } +#endif + unBit = 1 << FRAG_ATTRIB_FACE; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++; + } /* Map temporary registers (GPRs) */ pAsm->starting_temp_register_number = pAsm->number_used_registers; @@ -127,6 +194,8 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->pucOutMask[ui] = 0x0; } + pAsm->flag_reg_index = pAsm->number_used_registers++; + pAsm->uFirstHelpReg = pAsm->number_used_registers; } @@ -135,15 +204,19 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, { GLuint i, j; GLint * puiTEMPwrites; + GLint * puiTEMPreads; struct prog_instruction * pILInst; InstDeps *pInstDeps; struct prog_instruction * texcoord_DepInst; GLint nDepInstID; puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries); + puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries); + for(i=0; i<mesa_fp->Base.NumTemporaries; i++) { puiTEMPwrites[i] = -1; + puiTEMPreads[i] = -1; } pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions); @@ -167,6 +240,11 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, { //Set dep. pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index]; + //Set first read + if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 ) + { + puiTEMPreads[pILInst->SrcReg[j].Index] = i; + } } else { @@ -177,8 +255,6 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, fp->r700AsmCode.pInstDeps = pInstDeps; - FREE(puiTEMPwrites); - //Find dep for tex inst for(i=0; i<mesa_fp->Base.NumInstructions; i++) { @@ -203,29 +279,58 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, { //... other deps? } } + // make sure that we dont overwrite src used earlier + nDepInstID = puiTEMPreads[pILInst->DstReg.Index]; + if(nDepInstID < i) + { + pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index]; + texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]); + if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) ) + { + pInstDeps[nDepInstID].nDstDep = i; + } + + } + } } + FREE(puiTEMPwrites); + FREE(puiTEMPreads); + return GL_TRUE; } GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, - struct gl_fragment_program *mesa_fp) + struct gl_fragment_program *mesa_fp, + GLcontext *ctx) { GLuint number_of_colors_exported; GLboolean z_enabled = GL_FALSE; GLuint unBit; + int i; //Init_Program Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) ); - Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp); + Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx); if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) ) { return GL_FALSE; } + + InitShaderProgram(&(fp->r700AsmCode)); - if( GL_FALSE == AssembleInstr(mesa_fp->Base.NumInstructions, + for(i=0; i < MAX_SAMPLERS; i++) + { + fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i]; + } + + fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions; + + if( GL_FALSE == AssembleInstr(0, + 0, + mesa_fp->Base.NumInstructions, &(mesa_fp->Base.Instructions[0]), &(fp->r700AsmCode)) ) { @@ -237,6 +342,11 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, return GL_FALSE; } + if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) ) + { + return GL_FALSE; + } + fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0 : (fp->r700AsmCode.number_used_registers - 1); @@ -251,7 +361,15 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, number_of_colors_exported--; } - fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled; + /* illegal to set this to 0 */ + if(number_of_colors_exported || z_enabled) + { + fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled; + } + else + { + fp->r700Shader.exportMode = (1 << 1); + } fp->translated = GL_TRUE; @@ -269,7 +387,7 @@ void r700SelectFragmentShader(GLcontext *ctx) } if (GL_FALSE == fp->translated) - r700TranslateFragmentShader(fp, &(fp->mesa_program)); + r700TranslateFragmentShader(fp, &(fp->mesa_program), ctx); } void * r700GetActiveFpShaderBo(GLcontext * ctx) @@ -341,6 +459,25 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) SETbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); SETbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); } + else + { + CLEARbit(r700->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit); + CLEARbit(r700->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit); + } + + if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE)) + { + ui += 1; + SETfield(r700->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask); + SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); + SETbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit); + SETfield(r700->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask); + } + else + { + CLEARbit(r700->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit); + } + ui = (unNumOfReg < ui) ? ui : unNumOfReg; @@ -357,27 +494,10 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) SETfield(r700->ps.SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode, EXPORT_MODE_shift, EXPORT_MODE_mask); - R600_STATECHANGE(context, db); - - if(fp->r700Shader.killIsUsed) - { - SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit); - } - else - { - CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit); - } - - if(fp->r700Shader.depthIsExported) - { - SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); - } - else - { - CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); - } - // emit ps input map + struct r700_vertex_program_cont *vpc = + (struct r700_vertex_program_cont *)ctx->VertexProgram._Current; + GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten; unBit = 1 << FRAG_ATTRIB_WPOS; if(mesa_fp->Base.InputsRead & unBit) { @@ -391,8 +511,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } - unBit = 1 << FRAG_ATTRIB_COL0; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL0; + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -404,8 +524,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } - unBit = 1 << FRAG_ATTRIB_COL1; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_COL1; + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -417,8 +537,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } - unBit = 1 << FRAG_ATTRIB_FOGC; - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << VERT_RESULT_FOGC; + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -432,8 +552,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) for(i=0; i<8; i++) { - unBit = 1 << (FRAG_ATTRIB_TEX0 + i); - if(mesa_fp->Base.InputsRead & unBit) + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(OutputsWritten & unBit) { ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i]; SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); @@ -443,14 +563,47 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) } } - R600_STATECHANGE(context, cb); + unBit = 1 << FRAG_ATTRIB_FACE; + if(mesa_fp->Base.InputsRead & unBit) + { + ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(OutputsWritten & unBit) + { + ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0]; + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); + SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, + SEMANTIC_shift, SEMANTIC_mask); + if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) + SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + else + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + } + } + exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift); - r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1; + if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1)) + { + R600_STATECHANGE(context, cb); + r700->CB_SHADER_CONTROL.u32All = (1 << exportCount) - 1; + } /* sent out shader constants. */ paramList = fp->mesa_program.Base.Parameters; - if(NULL != paramList) { + if(NULL != paramList) + { _mesa_load_state_parameters(ctx, paramList); if (paramList->NumParameters > R700_MAX_DX9_CONSTS) @@ -463,14 +616,33 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { - r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; + r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; + r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; + r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; } } else r700->ps.num_consts = 0; + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = r700->ps.num_consts; + for(ui=0; ui<pAsm->unNumPresub; ui++) + { + pCompiledSub = pAsm->presubs[ui].pCompiledSub; + + r700->ps.num_consts += pCompiledSub->NumParameters; + + for(uj=0; uj<pCompiledSub->NumParameters; uj++) + { + r700->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + r700->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + r700->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + r700->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h index cbb108d212..e562bfa478 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.h +++ b/src/mesa/drivers/dri/r600/r700_fragprog.h @@ -49,12 +49,14 @@ struct r700_fragment_program /* Internal */ void Map_Fragment_Program(r700_AssemblerBase *pAsm, - struct gl_fragment_program *mesa_fp); + struct gl_fragment_program *mesa_fp, + GLcontext *ctx); GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp, struct gl_fragment_program *mesa_fp); GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp, - struct gl_fragment_program *mesa_vp); + struct gl_fragment_program *mesa_vp, + GLcontext *ctx); /* Interface */ extern void r700SelectFragmentShader(GLcontext *ctx); diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c index 5290ef31be..0d476fcd86 100644 --- a/src/mesa/drivers/dri/r600/r700_oglprog.c +++ b/src/mesa/drivers/dri/r600/r700_oglprog.c @@ -40,6 +40,24 @@ #include "r700_vertprog.h" +static void freeVertProgCache(GLcontext *ctx, struct r700_vertex_program_cont *cache) +{ + struct r700_vertex_program *tmp, *vp = cache->progs; + + while (vp) { + tmp = vp->next; + /* Release DMA region */ + r600DeleteShader(ctx, vp->shaderbo); + /* Clean up */ + Clean_Up_Assembler(&(vp->r700AsmCode)); + Clean_Up_Shader(&(vp->r700Shader)); + + _mesa_reference_vertprog(ctx, &vp->mesa_program, NULL); + _mesa_free(vp); + vp = tmp; + } +} + static struct gl_program *r700NewProgram(GLcontext * ctx, GLenum target, GLuint id) @@ -84,8 +102,7 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { - struct r700_vertex_program_cont * vpc; - struct r700_vertex_program *vp, *tmp; + struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog; struct r700_fragment_program * fp; radeon_print(RADEON_SHADER, RADEON_VERBOSE, @@ -95,20 +112,7 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) { case GL_VERTEX_STATE_PROGRAM_NV: case GL_VERTEX_PROGRAM_ARB: - vpc = (struct r700_vertex_program_cont*)prog; - vp = vpc->progs; - while (vp) { - tmp = vp->next; - /* Release DMA region */ - - r600DeleteShader(ctx, vp->shaderbo); - - /* Clean up */ - Clean_Up_Assembler(&(vp->r700AsmCode)); - Clean_Up_Shader(&(vp->r700Shader)); - _mesa_free(vp); - vp = tmp; - } + freeVertProgCache(ctx, vpc); break; case GL_FRAGMENT_PROGRAM_NV: case GL_FRAGMENT_PROGRAM_ARB: @@ -131,7 +135,24 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) static void r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog) { - + struct r700_vertex_program_cont *vpc = (struct r700_vertex_program_cont *)prog; + struct r700_fragment_program * fp = (struct r700_fragment_program*)prog; + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + freeVertProgCache(ctx, vpc); + vpc->progs = NULL; + break; + case GL_FRAGMENT_PROGRAM_ARB: + r600DeleteShader(ctx, fp->shaderbo); + Clean_Up_Assembler(&(fp->r700AsmCode)); + Clean_Up_Shader(&(fp->r700Shader)); + fp->translated = GL_FALSE; + fp->loaded = GL_FALSE; + fp->shaderbo = NULL; + break; + } + } static GLboolean r700IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog) diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index b1c3648ca5..eab27cbd84 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -43,6 +43,7 @@ #include "tnl/t_context.h" #include "tnl/t_vertex.h" #include "tnl/t_pipeline.h" +#include "vbo/vbo_context.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -53,13 +54,12 @@ #include "r700_fragprog.h" #include "r700_state.h" +#include "radeon_buffer_objects.h" #include "radeon_common_context.h" void r700WaitForIdle(context_t *context); void r700WaitForIdleClean(context_t *context); -GLboolean r700SendTextureState(context_t *context); static unsigned int r700PrimitiveType(int prim); -void r600UpdateTextureState(GLcontext * ctx); GLboolean r700SyncSurf(context_t *context, struct radeon_bo *pbo, uint32_t read_domain, @@ -249,113 +249,654 @@ static int r700NumVerts(int num_verts, int prim) static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) { - context_t *context = R700_CONTEXT(ctx); - BATCH_LOCALS(&context->radeon); - int type, i, total_emit; - int num_indices; - uint32_t vgt_draw_initiator = 0; - uint32_t vgt_index_type = 0; - uint32_t vgt_primitive_type = 0; - uint32_t vgt_num_indices = 0; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - - type = r700PrimitiveType(prim); - num_indices = r700NumVerts(end - start, prim); - - radeon_print(RADEON_RENDER, RADEON_TRACE, - "%s type %x num_indices %d\n", - __func__, type, num_indices); - - if (type < 0 || num_indices <= 0) - return; + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, total_emit; + int num_indices; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + + type = r700PrimitiveType(prim); + num_indices = r700NumVerts(end - start, prim); + + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); + + if (type < 0 || num_indices <= 0) + return; + + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); + + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + + if(GL_TRUE != context->ind_buf.is_32bit) + { + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + + total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 5 + 2; /* DRAW_INDEX */ + + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + // draw packet + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3)); + R600_OUT_BATCH(context->ind_buf.bo_offset); + R600_OUT_BATCH(0); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset, + context->ind_buf.bo, + context->ind_buf.bo_offset, + RADEON_GEM_DOMAIN_GTT, 0, 0); + END_BATCH(); + COMMIT_BATCH(); +} + +static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, int prim) +{ + context_t *context = R700_CONTEXT(ctx); + BATCH_LOCALS(&context->radeon); + int type, i; + uint32_t num_indices, total_emit = 0; + uint32_t vgt_draw_initiator = 0; + uint32_t vgt_index_type = 0; + uint32_t vgt_primitive_type = 0; + uint32_t vgt_num_indices = 0; + + type = r700PrimitiveType(prim); + num_indices = r700NumVerts(end - start, prim); + + radeon_print(RADEON_RENDER, RADEON_TRACE, + "%s type %x num_indices %d\n", + __func__, type, num_indices); + + if (type < 0 || num_indices <= 0) + return; + + SETfield(vgt_primitive_type, type, + VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); - total_emit = 3 /* VGT_PRIMITIVE_TYPE */ - + 2 /* VGT_INDEX_TYPE */ - + 2 /* NUM_INSTANCES */ - + num_indices + 3; /* DRAW_INDEX_IMMD */ - - BEGIN_BATCH_NO_AUTOSTATE(total_emit); - // prim - SETfield(vgt_primitive_type, type, - VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, 1)); - R600_OUT_BATCH(mmVGT_PRIMITIVE_TYPE - ASIC_CONFIG_BASE_INDEX); - R600_OUT_BATCH(vgt_primitive_type); - - // index type - SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); - R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); - R600_OUT_BATCH(vgt_index_type); - - // num instances - R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); - R600_OUT_BATCH(1); - - // draw packet - vgt_num_indices = num_indices; - SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); - SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); - - R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + if (num_indices > 0xffff) + { + SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + else + { + SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask); + } + + vgt_num_indices = num_indices; + SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask); + + if (start == 0) + { + SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + } + else + { + if (num_indices > 0xffff) + { + total_emit += num_indices; + } + else + { + total_emit += (num_indices + 1) / 2; + } + SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask); + } + + total_emit += 3 /* VGT_PRIMITIVE_TYPE */ + + 2 /* VGT_INDEX_TYPE */ + + 2 /* NUM_INSTANCES */ + + 3; /* DRAW */ + + BEGIN_BATCH_NO_AUTOSTATE(total_emit); + // prim + R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1); + R600_OUT_BATCH(vgt_primitive_type); + // index type + R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0)); + R600_OUT_BATCH(vgt_index_type); + // num instances + R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); + R600_OUT_BATCH(1); + // draw packet + if(start == 0) + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1)); R600_OUT_BATCH(vgt_num_indices); R600_OUT_BATCH(vgt_draw_initiator); - - for (i = start; i < (start + num_indices); i++) { - if(vb->Elts) - R600_OUT_BATCH(vb->Elts[i]); + } + else + { + if (num_indices > 0xffff) + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + for (i = start; i < (start + num_indices); i++) + { + R600_OUT_BATCH(i); + } + } + else + { + R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1))); + R600_OUT_BATCH(vgt_num_indices); + R600_OUT_BATCH(vgt_draw_initiator); + for (i = start; i < (start + num_indices); i += 2) + { + if ((i + 1) == (start + num_indices)) + { + R600_OUT_BATCH(i); + } else - R600_OUT_BATCH(i); - } - END_BATCH(); - COMMIT_BATCH(); + { + R600_OUT_BATCH(((i + 1) << 16) | (i)); + } + } + } + } + END_BATCH(); + COMMIT_BATCH(); } /* start 3d, idle, cb/db flush */ #define PRE_EMIT_STATE_BUFSZ 10 + 5 + 14 -static GLuint r700PredictRenderSize(GLcontext* ctx) +static GLuint r700PredictRenderSize(GLcontext* ctx, + const struct _mesa_prim *prim, + const struct _mesa_index_buffer *ib, + GLuint nr_prims) { context_t *context = R700_CONTEXT(ctx); - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct r700_vertex_program *vp = context->selected_vp; - struct vertex_buffer *vb = &tnl->vb; GLboolean flushed; GLuint dwords, i; GLuint state_size; - /* pre calculate aos count so state prediction works */ - context->radeon.tcl.aos_count = _mesa_bitcount(vp->mesa_program->Base.InputsRead); dwords = PRE_EMIT_STATE_BUFSZ; - for (i = 0; i < vb->PrimitiveCount; i++) - dwords += vb->Primitive[i].count + 10; + if (ib) + dwords += nr_prims * 14; + else { + for (i = 0; i < nr_prims; ++i) + { + if (prim[i].start == 0) + dwords += 10; + else if (prim[i].count > 0xffff) + dwords += prim[i].count + 10; + else + dwords += ((prim[i].count + 1) / 2) + 10; + } + } + state_size = radeonCountStateEmitSize(&context->radeon); flushed = rcommonEnsureCmdBufSpace(&context->radeon, - dwords + state_size, __FUNCTION__); - + dwords + state_size, + __FUNCTION__); if (flushed) - dwords += radeonCountStateEmitSize(&context->radeon); + dwords += radeonCountStateEmitSize(&context->radeon); else - dwords += state_size; + dwords += state_size; - radeon_print(RADEON_RENDER, RADEON_VERBOSE, - "%s: total prediction size is %d.\n", __FUNCTION__, dwords); + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); return dwords; + +} + +#define CONVERT( TYPE, MACRO ) do { \ + GLuint i, j, sz; \ + sz = input->Size; \ + if (input->Normalized) { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = MACRO(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } else { \ + for (i = 0; i < count; i++) { \ + const TYPE *in = (TYPE *)src_ptr; \ + for (j = 0; j < sz; j++) { \ + *dst_ptr++ = (GLfloat)(*in); \ + in++; \ + } \ + src_ptr += stride; \ + } \ + } \ +} while (0) + +/** + * Convert attribute data type to float + * If the attribute uses named buffer object replace the bo with newly allocated bo + */ +static void r700ConvertAttrib(GLcontext *ctx, int count, + const struct gl_client_array *input, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const GLvoid *src_ptr; + GLboolean mapped_named_bo = GL_FALSE; + GLfloat *dst_ptr; + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; + + /* Convert value for first element only */ + if (input->StrideB == 0) + { + count = 1; + } + + if (input->BufferObj->Name) + { + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + } + else + { + src_ptr = input->Ptr; + } + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, + sizeof(GLfloat) * input->Size * count, 32); + + radeon_bo_map(attr->bo, 1); + + dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + + assert(src_ptr != NULL); + + switch (input->Type) + { + case GL_DOUBLE: + CONVERT(GLdouble, (GLfloat)); + break; + case GL_UNSIGNED_INT: + CONVERT(GLuint, UINT_TO_FLOAT); + break; + case GL_INT: + CONVERT(GLint, INT_TO_FLOAT); + break; + case GL_UNSIGNED_SHORT: + CONVERT(GLushort, USHORT_TO_FLOAT); + break; + case GL_SHORT: + CONVERT(GLshort, SHORT_TO_FLOAT); + break; + case GL_UNSIGNED_BYTE: + assert(input->Format != GL_BGRA); + CONVERT(GLubyte, UBYTE_TO_FLOAT); + break; + case GL_BYTE: + CONVERT(GLbyte, BYTE_TO_FLOAT); + break; + default: + assert(0); + break; + } + + radeon_bo_unmap(attr->bo); + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } +} + +static void r700AlignDataToDword(GLcontext *ctx, + const struct gl_client_array *input, + int count, + struct StreamDesc *attr) +{ + context_t *context = R700_CONTEXT(ctx); + const int dst_stride = (input->StrideB + 3) & ~3; + const int size = getTypeSize(input->Type) * input->Size * count; + GLboolean mapped_named_bo = GL_FALSE; + + radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32); + + radeon_bo_map(attr->bo, 1); + + if (!input->BufferObj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj); + mapped_named_bo = GL_TRUE; + } + + { + GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr); + GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset); + int i; + + for (i = 0; i < count; ++i) + { + _mesa_memcpy(dst_ptr, src_ptr, input->StrideB); + src_ptr += input->StrideB; + dst_ptr += dst_stride; + } + } + + radeon_bo_unmap(attr->bo); + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj); + } + + attr->stride = dst_stride; +} + +static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input[], int count) +{ + context_t *context = R700_CONTEXT(ctx); + GLuint stride; + int ret; + int i, index; + + R600_STATECHANGE(context, vtx); + + for(index = 0; index < context->nNumActiveAos; index++) + { + struct radeon_aos *aos = &context->radeon.tcl.aos[index]; + i = context->stream_desc[index].element; + + stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; + + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input[i]->Type) != 4 || +#endif + stride < 4) + { + r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); + } + else + { + if (input[i]->BufferObj->Name) + { + if (stride % 4 != 0) + { + assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); + r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]); + context->stream_desc[index].is_named_bo = GL_FALSE; + } + else + { + context->stream_desc[index].stride = input[i]->StrideB; + context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; + context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + context->stream_desc[index].is_named_bo = GL_TRUE; + } + } + else + { + int size; + int local_count = count; + uint32_t *dst; + + if (input[i]->StrideB == 0) + { + size = getTypeSize(input[i]->Type) * input[i]->Size; + local_count = 1; + } + else + { + size = getTypeSize(input[i]->Type) * input[i]->Size * local_count; + } + + radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo, + &context->stream_desc[index].bo_offset, size, 32); + + radeon_bo_map(context->stream_desc[index].bo, 1); + assert(context->stream_desc[index].bo->ptr != NULL); + + + dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr, + context->stream_desc[index].bo_offset); + + switch (context->stream_desc[index].dwords) + { + case 1: + radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + case 2: + radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + case 3: + radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + case 4: + radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); + break; + default: + assert(0); + break; + } + radeon_bo_unmap(context->stream_desc[index].bo); + } + } + + aos->count = context->stream_desc[index].stride == 0 ? 1 : count; + aos->stride = context->stream_desc[index].stride / sizeof(float); + aos->components = context->stream_desc[index].dwords; + aos->bo = context->stream_desc[index].bo; + aos->offset = context->stream_desc[index].bo_offset; + + if(context->stream_desc[index].is_named_bo) + { + radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs, + context->stream_desc[index].bo, + RADEON_GEM_DOMAIN_GTT, 0); + } + } + + ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs, + first_elem(&context->radeon.dma.reserved)->bo, + RADEON_GEM_DOMAIN_GTT, 0); +} + +static void r700FreeData(GLcontext *ctx) +{ + /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo + * to prevent double unref in radeonReleaseArrays + * called during context destroy + */ + context_t *context = R700_CONTEXT(ctx); + + int i; + + for (i = 0; i < context->nNumActiveAos; i++) + { + if (!context->stream_desc[i].is_named_bo) + { + radeon_bo_unref(context->stream_desc[i].bo); + } + context->radeon.tcl.aos[i].bo = NULL; + } + + if (context->ind_buf.bo != NULL) + { + radeon_bo_unref(context->ind_buf.bo); + } +} + +static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + GLvoid *src_ptr; + GLuint *out; + int i; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + mapped_named_bo = GL_TRUE; + assert(mesa_ind_buf->obj->Pointer != NULL); + } + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) + { + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + GLubyte *in = (GLubyte *)src_ptr; + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + radeon_bo_map(context->ind_buf.bo, 1); + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } + + radeon_bo_unmap(context->ind_buf.bo); +#if MESA_BIG_ENDIAN + } + else + { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */ + GLushort *in = (GLushort *)src_ptr; + GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1); + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + + radeon_bo_map(context->ind_buf.bo, 1); + assert(context->ind_buf.bo->ptr != NULL); + out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) + { + *out++ = in[i] | in[i + 1] << 16; + } + + if (i < mesa_ind_buf->count) + { + *out++ = in[i]; + } + radeon_bo_unmap(context->ind_buf.bo); +#endif + } + + context->ind_buf.is_32bit = GL_FALSE; + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } } -static GLboolean r700RunRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) +static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf) +{ + context_t *context = R700_CONTEXT(ctx); + + if (!mesa_ind_buf) { + context->ind_buf.bo = NULL; + return; + } + +#if MESA_BIG_ENDIAN + if (mesa_ind_buf->type == GL_UNSIGNED_INT) + { +#else + if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) + { +#endif + const GLvoid *src_ptr; + GLvoid *dst_ptr; + GLboolean mapped_named_bo = GL_FALSE; + + if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) + { + ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj); + assert(mesa_ind_buf->obj->Pointer != NULL); + mapped_named_bo = GL_TRUE; + } + + src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr); + + const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type); + + radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo, + &context->ind_buf.bo_offset, size, 4); + radeon_bo_map(context->ind_buf.bo, 1); + assert(context->ind_buf.bo->ptr != NULL); + dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset); + + _mesa_memcpy(dst_ptr, src_ptr, size); + + radeon_bo_unmap(context->ind_buf.bo); + context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT); + context->ind_buf.count = mesa_ind_buf->count; + + if (mapped_named_bo) + { + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj); + } + } + else + { + r700FixupIndexBuffer(ctx, mesa_ind_buf); + } +} + +static GLboolean r700TryDrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) { context_t *context = R700_CONTEXT(ctx); radeonContextPtr radeon = &context->radeon; - unsigned int i, id = 0; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; + GLuint i, id = 0; struct radeon_renderbuffer *rrb; - radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n", - __func__, context->radeon.cmdbuf.cs->cdw); + if (ctx->NewState) + _mesa_update_state( ctx ); + + _tnl_UpdateFixedFunctionProgram(ctx); + r700SetVertexFormat(ctx, arrays, max_index + 1); + /* shaders need to be updated before buffers are validated */ + r700UpdateShaders(ctx); + if (!r600ValidateBuffers(ctx)) + return GL_FALSE; /* always emit CB base to prevent * lock ups on some chips. @@ -367,21 +908,29 @@ static GLboolean r700RunRender(GLcontext * ctx, r700SetScissor(context); r700SetupVertexProgram(ctx); r700SetupFragmentProgram(ctx); - r600UpdateTextureState(ctx); + r700UpdateShaderStates(ctx); - GLuint emit_end = r700PredictRenderSize(ctx) - + context->radeon.cmdbuf.cs->cdw; - r700SetupStreams(ctx); + GLuint emit_end = r700PredictRenderSize(ctx, prim, ib, nr_prims) + + context->radeon.cmdbuf.cs->cdw; + + r700SetupIndexBuffer(ctx, ib); + r700SetupStreams(ctx, arrays, max_index + 1); radeonEmitState(radeon); radeon_debug_add_indent(); - /* richard test code */ - for (i = 0; i < vb->PrimitiveCount; i++) { - GLuint prim = _tnl_translate_prim(&vb->Primitive[i]); - GLuint start = vb->Primitive[i].start; - GLuint end = vb->Primitive[i].start + vb->Primitive[i].count; - r700RunRenderPrimitive(ctx, start, end, prim); + for (i = 0; i < nr_prims; ++i) + { + if (context->ind_buf.bo) + r700RunRenderPrimitive(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); + else + r700RunRenderPrimitiveImmediate(ctx, + prim[i].start, + prim[i].start + prim[i].count, + prim[i].mode); } radeon_debug_remove_indent(); @@ -398,83 +947,54 @@ static GLboolean r700RunRender(GLcontext * ctx, r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM, DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit); - radeonReleaseArrays(ctx, ~0); - - radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n", - __func__, context->radeon.cmdbuf.cs->cdw); + r700FreeData(ctx); - if ( emit_end < context->radeon.cmdbuf.cs->cdw ) - WARN_ONCE("Rendering was %d commands larger than predicted size." - " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); - - return GL_FALSE; -} + if (emit_end < context->radeon.cmdbuf.cs->cdw) + { + WARN_ONCE("Rendering was %d commands larger than predicted size." + " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end); + } -static GLboolean r700RunNonTCLRender(GLcontext * ctx, - struct tnl_pipeline_stage *stage) /* -------------------- */ -{ - GLboolean bRet = GL_TRUE; - - return bRet; + return GL_TRUE; } -static GLboolean r700RunTCLRender(GLcontext * ctx, /*----------------------*/ - struct tnl_pipeline_stage *stage) +static void r700DrawPrims(GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index) { - GLboolean bRet = GL_FALSE; + GLboolean retval = GL_FALSE; - /* TODO : sw fallback */ - - /* Need shader bo's setup before bo check */ - r700UpdateShaders(ctx); - /** + /* This check should get folded into just the places that + * min/max index are really needed. + */ + if (!index_bounds_valid) { + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + } - * Ensure all enabled and complete textures are uploaded along with any buffers being used. - */ - if(!r600ValidateBuffers(ctx)) - { - return GL_TRUE; - } + if (min_index) { + vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims ); + return; + } - bRet = r700RunRender(ctx, stage); + /* Make an attempt at drawing */ + retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); - return bRet; - //GL_FALSE will stop to do other pipe stage in _tnl_run_pipeline - //The render here DOES finish the whole pipe, so GL_FALSE should be returned for success. + /* If failed run tnl pipeline - it should take care of fallbacks */ + if (!retval) + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); } -const struct tnl_pipeline_stage _r700_render_stage = { - "r700 Hardware Rasterization", - NULL, - NULL, - NULL, - NULL, - r700RunNonTCLRender -}; - -const struct tnl_pipeline_stage _r700_tcl_stage = { - "r700 Hardware Transform, Clipping and Lighting", - NULL, - NULL, - NULL, - NULL, - r700RunTCLRender -}; - -const struct tnl_pipeline_stage *r700_pipeline[] = +void r700InitDraw(GLcontext *ctx) { - &_r700_tcl_stage, - &_tnl_vertex_transform_stage, - &_tnl_normal_transform_stage, - &_tnl_lighting_stage, - &_tnl_fog_coordinate_stage, - &_tnl_texgen_stage, - &_tnl_texture_transform_stage, - &_tnl_vertex_program_stage, - - &_r700_render_stage, - &_tnl_render_stage, - 0, -}; + struct vbo_context *vbo = vbo_context(ctx); + + /* to be enabled */ + vbo->draw_prims = r700DrawPrims; +} diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c index b4fd51c137..2eed1acc2f 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.c +++ b/src/mesa/drivers/dri/r600/r700_shader.c @@ -60,6 +60,55 @@ void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * plstCFInstructions->uNumOfNode++; } +void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst) +{ + GLuint ulIndex = 0; + GLboolean bFound = GL_FALSE; + R700ShaderInstruction * pPrevInst = NULL; + R700ShaderInstruction * pCurInst = plstCFInstructions->pHead; + + /* Need go thro list to make sure pInst is there. */ + while(NULL != pCurInst) + { + if(pCurInst == pInst) + { + bFound = GL_TRUE; + break; + } + + pPrevInst = pCurInst; + pCurInst = pCurInst->pNextInst; + } + if(GL_TRUE == bFound) + { + plstCFInstructions->uNumOfNode--; + + pCurInst = pInst->pNextInst; + ulIndex = pInst->m_uIndex; + while(NULL != pCurInst) + { + pCurInst->m_uIndex = ulIndex; + ulIndex++; + pCurInst = pCurInst->pNextInst; + } + + if(plstCFInstructions->pHead == pInst) + { + plstCFInstructions->pHead = pInst->pNextInst; + } + if(plstCFInstructions->pTail == pInst) + { + plstCFInstructions->pTail = pPrevInst; + } + if(NULL != pPrevInst) + { + pPrevInst->pNextInst = pInst->pNextInst; + } + + FREE(pInst); + } +} + void Init_R700_Shader(R700_Shader * pShader) { pShader->Type = R700_SHADER_INVALID; @@ -110,13 +159,18 @@ void Init_R700_Shader(R700_Shader * pShader) pShader->lstVTXInstructions.uNumOfNode=0; } +void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF) +{ + pShader->plstCFInstructions_active = plstCF; +} + void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst) { R700ControlFlowSXClause* pSXClause; R700ControlFlowSMXClause* pSMXClause; - pCFInst->m_uIndex = pShader->lstCFInstructions.uNumOfNode; - AddInstToList(&(pShader->lstCFInstructions), + pCFInst->m_uIndex = pShader->plstCFInstructions_active->uNumOfNode; + AddInstToList(pShader->plstCFInstructions_active, (R700ShaderInstruction*)pCFInst); pShader->uShaderBinaryDWORDSize += GetInstructionSize(pCFInst->m_ShaderInstType); @@ -488,6 +542,47 @@ void DebugPrint(void) { } +void cleanup_vfetch_shaderinst(R700_Shader *pShader) +{ + R700ShaderInstruction *pInst; + R700ShaderInstruction *pInstToFree; + R700VertexInstruction *pVTXInst; + R700ControlFlowInstruction *pCFInst; + + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + pVTXInst = (R700VertexInstruction *)pInst; + pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pVTXInst->m_ShaderInstType); + + if(NULL != pVTXInst->m_pLinkedGenericClause) + { + pCFInst = (R700ControlFlowInstruction*)(pVTXInst->m_pLinkedGenericClause); + + TakeInstOutFromList(&(pShader->lstCFInstructions), + (R700ShaderInstruction*)pCFInst); + + pShader->uShaderBinaryDWORDSize -= GetInstructionSize(pCFInst->m_ShaderInstType); + } + + pInst = pInst->pNextInst; + }; + + //destroy each item in pShader->lstVTXInstructions; + pInst = pShader->lstVTXInstructions.pHead; + while(NULL != pInst) + { + pInstToFree = pInst; + pInst = pInst->pNextInst; + FREE(pInstToFree); + }; + + //set NULL pShader->lstVTXInstructions + pShader->lstVTXInstructions.pHead=NULL; + pShader->lstVTXInstructions.pTail=NULL; + pShader->lstVTXInstructions.uNumOfNode=0; +} + void Clean_Up_Shader(R700_Shader *pShader) { FREE(pShader->pProgram); diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h index bfd01e1a93..0599ffd901 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.h +++ b/src/mesa/drivers/dri/r600/r700_shader.h @@ -109,6 +109,7 @@ typedef struct R700_Shader GLuint uStackSize; GLuint uMaxCallDepth; + TypedShaderList * plstCFInstructions_active; TypedShaderList lstCFInstructions; TypedShaderList lstALUInstructions; TypedShaderList lstTEXInstructions; @@ -128,21 +129,23 @@ typedef struct R700_Shader //Internal void AddInstToList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst); +void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruction * pInst); void ResolveLinks(R700_Shader *pShader); void Assemble(R700_Shader *pShader); - //Interface void Init_R700_Shader(R700_Shader * pShader); void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst); void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst); void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst); void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst); +void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF); void LoadProgram(R700_Shader *pShader); void UpdateShaderRegisters(R700_Shader *pShader); void DeleteInstructions(R700_Shader *pShader); void DebugPrint(void); +void cleanup_vfetch_shaderinst(R700_Shader *pShader); void Clean_Up_Shader(R700_Shader *pShader); diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index e91aa43118..d7420678ff 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -46,7 +46,6 @@ #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" #include "vbo/vbo.h" -#include "main/texformat.h" #include "r600_context.h" @@ -55,18 +54,15 @@ #include "r700_fragprog.h" #include "r700_vertprog.h" - +void r600UpdateTextureState(GLcontext * ctx); static void r700SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state); static void r700UpdatePolygonMode(GLcontext * ctx); static void r700SetPolygonOffsetState(GLcontext * ctx, GLboolean state); static void r700SetStencilState(GLcontext * ctx, GLboolean state); -void r700UpdateShaders (GLcontext * ctx) //---------------------------------- +void r700UpdateShaders(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); - GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; - GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; - int i; /* should only happenen once, just after context is created */ /* TODO: shouldn't we fallback to sw here? */ @@ -77,21 +73,6 @@ void r700UpdateShaders (GLcontext * ctx) //---------------------------------- r700SelectFragmentShader(ctx); - if (context->radeon.NewGLState) { - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - /* mat states from state var not array for sw */ - dummy_attrib[i].stride = 0; - temp_attrib[i] = TNL_CONTEXT(ctx)->vb.AttribPtr[i]; - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = &(dummy_attrib[i]); - } - - _tnl_UpdateFixedFunctionProgram(ctx); - - for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) { - TNL_CONTEXT(ctx)->vb.AttribPtr[i] = temp_attrib[i]; - } - } - r700SelectVertexShader(ctx); r700UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS); context->radeon.NewGLState = 0; @@ -210,6 +191,67 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //----------- context->radeon.NewGLState |= new_state; } +static void r700SetDBRenderState(GLcontext * ctx) +{ + context_t *context = R700_CONTEXT(ctx); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); + struct r700_fragment_program *fp = (struct r700_fragment_program *) + (ctx->FragmentProgram._Current); + + R600_STATECHANGE(context, db); + + SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit); + SETfield(r700->DB_SHADER_CONTROL.u32All, EARLY_Z_THEN_LATE_Z, Z_ORDER_shift, Z_ORDER_mask); + /* XXX need to enable htile for hiz/s */ + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); + SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); + + if (context->radeon.query.current) + { + SETbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit); + if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770) + { + SETbit(r700->DB_RENDER_CONTROL.u32All, PERFECT_ZPASS_COUNTS_bit); + } + } + else + { + CLEARbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit); + if (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV770) + { + CLEARbit(r700->DB_RENDER_CONTROL.u32All, PERFECT_ZPASS_COUNTS_bit); + } + } + + if (fp) + { + if (fp->r700Shader.killIsUsed) + { + SETbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit); + } + else + { + CLEARbit(r700->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit); + } + + if (fp->r700Shader.depthIsExported) + { + SETbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); + } + else + { + CLEARbit(r700->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit); + } + } +} + +void r700UpdateShaderStates(GLcontext * ctx) +{ + r700SetDBRenderState(ctx); + r600UpdateTextureState(ctx); +} + static void r700SetDepthState(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); @@ -475,10 +517,10 @@ static void r700SetBlendState(GLcontext * ctx) eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask); SETfield(blend_reg, - blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE), + blend_factor(ctx->Color.BlendSrcA, GL_TRUE), ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask); SETfield(blend_reg, - blend_factor(ctx->Color.BlendDstRGB, GL_FALSE), + blend_factor(ctx->Color.BlendDstA, GL_FALSE), ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask); switch (ctx->Color.BlendEquationA) { @@ -753,9 +795,9 @@ static void r700ColorMask(GLcontext * ctx, (b ? 4 : 0) | (a ? 8 : 0)); - if (mask != r700->CB_SHADER_MASK.u32All) { + if (mask != r700->CB_TARGET_MASK.u32All) { R600_STATECHANGE(context, cb); - SETfield(r700->CB_SHADER_MASK.u32All, mask, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask); + SETfield(r700->CB_TARGET_MASK.u32All, mask, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask); } } @@ -845,9 +887,9 @@ static void r700PointSize(GLcontext * ctx, GLfloat size) size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize); /* format is 12.4 fixed point */ - SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 16), + SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0), PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask); - SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 16), + SETfield(r700->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0), PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask); } @@ -862,11 +904,11 @@ static void r700PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * pa /* format is 12.4 fixed point */ switch (pname) { case GL_POINT_SIZE_MIN: - SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 16.0), + SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 8.0), MIN_SIZE_shift, MIN_SIZE_mask); break; case GL_POINT_SIZE_MAX: - SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 16.0), + SETfield(r700->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 8.0), MAX_SIZE_shift, MAX_SIZE_mask); break; case GL_POINT_DISTANCE_ATTENUATION: @@ -1049,6 +1091,7 @@ static void r700UpdateWindow(GLcontext * ctx, int id) //-------------------- GLfloat tz = v[MAT_TZ] * depthScale; R600_STATECHANGE(context, vpt); + R600_STATECHANGE(context, cl); r700->viewport[id].PA_CL_VPORT_XSCALE.f32All = sx; r700->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx; @@ -1059,6 +1102,18 @@ static void r700UpdateWindow(GLcontext * ctx, int id) //-------------------- r700->viewport[id].PA_CL_VPORT_ZSCALE.f32All = sz; r700->viewport[id].PA_CL_VPORT_ZOFFSET.f32All = tz; + if (ctx->Transform.DepthClamp) { + r700->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = MIN2(ctx->Viewport.Near, ctx->Viewport.Far); + r700->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = MAX2(ctx->Viewport.Near, ctx->Viewport.Far); + SETbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit); + SETbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit); + } else { + r700->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = 0.0; + r700->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = 1.0; + CLEARbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit); + CLEARbit(r700->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit); + } + r700->viewport[id].enabled = GL_TRUE; r700SetScissor(context); @@ -1130,20 +1185,25 @@ static void r700PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) // context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); GLfloat constant = units; + GLchar depth = 0; + + R600_STATECHANGE(context, poly); switch (ctx->Visual.depthBits) { case 16: constant *= 4.0; + depth = -16; break; case 24: constant *= 2.0; + depth = -24; break; } factor *= 12.0; - - R600_STATECHANGE(context, poly); - + SETfield(r700->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All, depth, + POLY_OFFSET_NEG_NUM_DB_BITS_shift, POLY_OFFSET_NEG_NUM_DB_BITS_mask); + //r700->PA_SU_POLY_OFFSET_CLAMP.f32All = constant; //??? r700->PA_SU_POLY_OFFSET_FRONT_SCALE.f32All = factor; r700->PA_SU_POLY_OFFSET_FRONT_OFFSET.f32All = constant; r700->PA_SU_POLY_OFFSET_BACK_SCALE.f32All = factor; @@ -1276,6 +1336,11 @@ void r700SetScissor(context_t *context) //--------------- y1 = context->radeon.state.scissor.rect.y1; x2 = context->radeon.state.scissor.rect.x2; y2 = context->radeon.state.scissor.rect.y2; + /* r600 has exclusive BR scissors */ + if (context->radeon.radeonScreen->kernel_mm) { + x2++; + y2++; + } } else { if (context->radeon.radeonScreen->driScreen->dri2.enabled) { x1 = 0; @@ -1354,8 +1419,6 @@ void r700SetScissor(context_t *context) //--------------- SETfield(r700->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, y2, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask); - r700->viewport[id].PA_SC_VPORT_ZMIN_0.u32All = 0; - r700->viewport[id].PA_SC_VPORT_ZMAX_0.u32All = 0x3F800000; r700->viewport[id].enabled = GL_TRUE; } @@ -1670,19 +1733,10 @@ void r700InitState(GLcontext * ctx) //------------------- r700Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test); r700DepthMask(ctx, ctx->Depth.Mask); r700DepthFunc(ctx, ctx->Depth.Func); - SETbit(r700->DB_SHADER_CONTROL.u32All, DUAL_EXPORT_ENABLE_bit); - r700->DB_DEPTH_CLEAR.u32All = 0x3F800000; - - r700->DB_RENDER_CONTROL.u32All = 0; SETbit(r700->DB_RENDER_CONTROL.u32All, STENCIL_COMPRESS_DISABLE_bit); SETbit(r700->DB_RENDER_CONTROL.u32All, DEPTH_COMPRESS_DISABLE_bit); - r700->DB_RENDER_OVERRIDE.u32All = 0; - if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) - SETbit(r700->DB_RENDER_OVERRIDE.u32All, FORCE_SHADER_Z_ORDER_bit); - SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask); - SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask); - SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask); + r700SetDBRenderState(ctx); r700->DB_ALPHA_TO_MASK.u32All = 0; SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask); @@ -1756,7 +1810,7 @@ void r700InitState(GLcontext * ctx) //------------------- r700->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF; /* screen/window/view */ - SETfield(r700->CB_TARGET_MASK.u32All, 0xF, (4 * id), TARGET0_ENABLE_mask); + SETfield(r700->CB_SHADER_MASK.u32All, 0xF, (4 * id), OUTPUT0_ENABLE_mask); context->radeon.hw.all_dirty = GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r700_state.h b/src/mesa/drivers/dri/r600/r700_state.h index 0f53d5b4c5..60c6a7f23c 100644 --- a/src/mesa/drivers/dri/r600/r700_state.h +++ b/src/mesa/drivers/dri/r600/r700_state.h @@ -35,6 +35,7 @@ extern void r700UpdateStateParameters(GLcontext * ctx, GLuint new_state); extern void r700UpdateShaders (GLcontext * ctx); +extern void r700UpdateShaderStates(GLcontext * ctx); extern void r700UpdateViewportOffset(GLcontext * ctx); diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 9ee26286d9..90fac078ff 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -111,6 +111,15 @@ unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm, } } + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(mesa_vp->Base.OutputsWritten & unBit) + { + pAsm->ucVP_OutputMap[i] = unTotal++; + } + } + return (unTotal - unStart); } @@ -159,7 +168,35 @@ GLboolean Process_Vertex_Program_Vfetch_Instructions( return GL_TRUE; } -void Map_Vertex_Program(struct r700_vertex_program *vp, +GLboolean Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp) +{ + int i; + context_t *context = R700_CONTEXT(ctx); + + VTX_FETCH_METHOD vtxFetchMethod; + vtxFetchMethod.bEnableMini = GL_FALSE; + vtxFetchMethod.mega_fetch_remainder = 0; + + for(i=0; i<context->nNumActiveAos; i++) + { + assemble_vfetch_instruction2(&vp->r700AsmCode, + vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element], + context->stream_desc[i].type, + context->stream_desc[i].size, + context->stream_desc[i].element, + context->stream_desc[i]._signed, + context->stream_desc[i].normalize, + &vtxFetchMethod); + } + + return GL_TRUE; +} + +void Map_Vertex_Program(GLcontext *ctx, + struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp) { GLuint ui; @@ -175,10 +212,10 @@ void Map_Vertex_Program(struct r700_vertex_program *vp, pAsm->number_used_registers += num_inputs; // Create VFETCH instructions for inputs - if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) + if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) ) { - radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); - return; //error + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions2 return error. \n"); + return; } // Map Outputs @@ -189,7 +226,7 @@ void Map_Vertex_Program(struct r700_vertex_program *vp, pAsm->number_used_registers += pAsm->number_of_exports; pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports); - + for(ui=0; ui<pAsm->number_of_exports; ui++) { pAsm->pucOutMask[ui] = 0x0; @@ -206,7 +243,9 @@ void Map_Vertex_Program(struct r700_vertex_program *vp, { /* fix func t_vp uses NumTemporaries */ pAsm->number_used_registers += mesa_vp->Base.NumTemporaries; } - + + pAsm->flag_reg_index = pAsm->number_used_registers++; + pAsm->uFirstHelpReg = pAsm->number_used_registers; } @@ -261,13 +300,10 @@ GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, } struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, - struct gl_vertex_program *mesa_vp) + struct gl_vertex_program *mesa_vp) { context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program *vp; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - unsigned int unBit; unsigned int i; vp = _mesa_calloc(sizeof(*vp)); @@ -278,17 +314,13 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, _mesa_insert_mvp_code(ctx, vp->mesa_program); } - for(i=0; i<VERT_ATTRIB_MAX; i++) + for(i=0; i<context->nNumActiveAos; i++) { - unBit = 1 << i; - if(vp->mesa_program->Base.InputsRead & unBit) /* ctx->Array.ArrayObj->xxxxxxx */ - { - vp->aos_desc[i].size = vb->AttribPtr[i]->size; - vp->aos_desc[i].stride = vb->AttribPtr[i]->size * sizeof(GL_FLOAT);/* when emit array, data is packed. vb->AttribPtr[i]->stride;*/ - vp->aos_desc[i].type = GL_FLOAT; - } + vp->aos_desc[i].size = context->stream_desc[i].size; + vp->aos_desc[i].stride = context->stream_desc[i].stride; + vp->aos_desc[i].type = context->stream_desc[i].type; } - + if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { vp->r700AsmCode.bR6xx = 1; @@ -296,25 +328,41 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, //Init_Program Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) ); - Map_Vertex_Program( vp, vp->mesa_program ); + Map_Vertex_Program(ctx, vp, vp->mesa_program ); if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program)) { return NULL; + } + + InitShaderProgram(&(vp->r700AsmCode)); + + for(i=0; i < MAX_SAMPLERS; i++) + { + vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i]; } - if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions, - &(vp->mesa_program->Base.Instructions[0]), + vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions; + + if(GL_FALSE == AssembleInstr(0, + 0, + vp->mesa_program->Base.NumInstructions, + &(vp->mesa_program->Base.Instructions[0]), &(vp->r700AsmCode)) ) { return NULL; - } + } if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) ) { return NULL; } + if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) ) + { + return GL_FALSE; + } + vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0 : (vp->r700AsmCode.number_used_registers - 1); @@ -330,9 +378,6 @@ void r700SelectVertexShader(GLcontext *ctx) context_t *context = R700_CONTEXT(ctx); struct r700_vertex_program_cont *vpc; struct r700_vertex_program *vp; - TNLcontext *tnl = TNL_CONTEXT(ctx); - struct vertex_buffer *vb = &tnl->vb; - unsigned int unBit; unsigned int i; GLboolean match; GLbitfield InputsRead; @@ -343,29 +388,27 @@ void r700SelectVertexShader(GLcontext *ctx) if (vpc->mesa_program.IsPositionInvariant) { InputsRead |= VERT_BIT_POS; - } - + } + for (vp = vpc->progs; vp; vp = vp->next) { - match = GL_TRUE; - for(i=0; i<VERT_ATTRIB_MAX; i++) + match = GL_TRUE; + for(i=0; i<context->nNumActiveAos; i++) { - unBit = 1 << i; - if(InputsRead & unBit) + if (vp->aos_desc[i].size != context->stream_desc[i].size) { - if (vp->aos_desc[i].size != vb->AttribPtr[i]->size) - match = GL_FALSE; - break; + match = GL_FALSE; + break; } } - if (match) + if (match) { context->selected_vp = vp; return; } } - vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program) ); + vp = r700TranslateVertexShader(ctx, &(vpc->mesa_program)); if(!vp) { radeon_error("Failed to translate vertex shader. \n"); @@ -377,6 +420,146 @@ void r700SelectVertexShader(GLcontext *ctx) return; } +int getTypeSize(GLenum type) +{ + switch (type) + { + case GL_DOUBLE: + return sizeof(GLdouble); + case GL_FLOAT: + return sizeof(GLfloat); + case GL_INT: + return sizeof(GLint); + case GL_UNSIGNED_INT: + return sizeof(GLuint); + case GL_SHORT: + return sizeof(GLshort); + case GL_UNSIGNED_SHORT: + return sizeof(GLushort); + case GL_BYTE: + return sizeof(GLbyte); + case GL_UNSIGNED_BYTE: + return sizeof(GLubyte); + default: + assert(0); + return 0; + } +} + +static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input) +{ + context_t *context = R700_CONTEXT(ctx); + + StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]); + + GLuint stride; + + stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size + : input->StrideB; + + if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || +#if MESA_BIG_ENDIAN + getTypeSize(input->Type) != 4 || +#endif + stride < 4) + { + pStreamDesc->type = GL_FLOAT; + + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = sizeof(GLfloat) * input->Size; + } + pStreamDesc->dwords = input->Size; + pStreamDesc->is_named_bo = GL_FALSE; + } + else + { + pStreamDesc->type = input->Type; + pStreamDesc->dwords = (getTypeSize(input->Type) * input->Size + 3)/ 4; + if (!input->BufferObj->Name) + { + if (input->StrideB == 0) + { + pStreamDesc->stride = 0; + } + else + { + pStreamDesc->stride = (getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3; + } + + pStreamDesc->is_named_bo = GL_FALSE; + } + } + + pStreamDesc->size = input->Size; + pStreamDesc->dst_loc = context->nNumActiveAos; + pStreamDesc->element = unLoc; + + switch (pStreamDesc->type) + { //GetSurfaceFormat + case GL_FLOAT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = GL_FALSE; + break; + case GL_SHORT: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_BYTE: + pStreamDesc->_signed = 1; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_SHORT: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + case GL_UNSIGNED_BYTE: + pStreamDesc->_signed = 0; + pStreamDesc->normalize = input->Normalized; + break; + default: + case GL_INT: + case GL_UNSIGNED_INT: + case GL_DOUBLE: + assert(0); + break; + } + context->nNumActiveAos++; +} + +void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count) +{ + context_t *context = R700_CONTEXT(ctx); + struct r700_vertex_program *vpc + = (struct r700_vertex_program *)ctx->VertexProgram._Current; + + struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program); + unsigned int unLoc = 0; + unsigned int unBit = mesa_vp->Base.InputsRead; + context->nNumActiveAos = 0; + + if (mesa_vp->IsPositionInvariant) + { + unBit |= VERT_BIT_POS; + } + + while(unBit) + { + if(unBit & 1) + { + r700TranslateAttrib(ctx, unLoc, count, arrays[unLoc]); + } + + unBit >>= 1; + ++unLoc; + } + context->radeon.tcl.aos_count = context->nNumActiveAos; +} + void * r700GetActiveVpShaderBo(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); @@ -456,6 +639,12 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) paramList = vp->mesa_program->Base.Parameters; if(NULL != paramList) { + /* vp->mesa_program was cloned, not updated by glsl shader api. */ + /* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */ + /* so, use ctx->VertexProgem._Current */ + struct gl_program_parameter_list *paramListOrginal = + paramListOrginal = ctx->VertexProgram._Current->Base.Parameters; + _mesa_load_state_parameters(ctx, paramList); if (paramList->NumParameters > R700_MAX_DX9_CONSTS) @@ -468,13 +657,42 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) unNumParamData = paramList->NumParameters; for(ui=0; ui<unNumParamData; ui++) { - r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; - r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; - r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; - r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM) + { + r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0]; + r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1]; + r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2]; + r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3]; + } + else + { + r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0]; + r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1]; + r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2]; + r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3]; + } } } else r700->vs.num_consts = 0; + COMPILED_SUB * pCompiledSub; + GLuint uj; + GLuint unConstOffset = r700->vs.num_consts; + for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++) + { + pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub; + + r700->vs.num_consts += pCompiledSub->NumParameters; + + for(uj=0; uj<pCompiledSub->NumParameters; uj++) + { + r700->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0]; + r700->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1]; + r700->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2]; + r700->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3]; + } + unConstOffset += pCompiledSub->NumParameters; + } + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h index c48764c43b..00824c29d3 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.h +++ b/src/mesa/drivers/dri/r600/r700_vertprog.h @@ -52,8 +52,7 @@ struct r700_vertex_program GLboolean translated; GLboolean loaded; - GLboolean needUpdateVF; - + void * shaderbo; ArrayDesc aos_desc[VERT_ATTRIB_MAX]; @@ -76,19 +75,27 @@ unsigned int Map_Vertex_Input(r700_AssemblerBase *pAsm, GLboolean Process_Vertex_Program_Vfetch_Instructions( struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); -void Map_Vertex_Program(struct r700_vertex_program *vp, +GLboolean Process_Vertex_Program_Vfetch_Instructions2( + GLcontext *ctx, + struct r700_vertex_program *vp, + struct gl_vertex_program *mesa_vp); +void Map_Vertex_Program(GLcontext *ctx, + struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); GLboolean Find_Instruction_Dependencies_vp(struct r700_vertex_program *vp, struct gl_vertex_program *mesa_vp); struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx, - struct gl_vertex_program *mesa_vp); + struct gl_vertex_program *mesa_vp); /* Interface */ extern void r700SelectVertexShader(GLcontext *ctx); +extern void r700SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count); extern GLboolean r700SetupVertexProgram(GLcontext * ctx); extern void * r700GetActiveVpShaderBo(GLcontext * ctx); +extern int getTypeSize(GLenum type); + #endif /* _R700_VERTPROG_H_ */ diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.c b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c new file mode 120000 index 0000000000..f6a5f66470 --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.c @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.c
\ No newline at end of file diff --git a/src/mesa/drivers/dri/r600/radeon_buffer_objects.h b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h new file mode 120000 index 0000000000..2f134fd17b --- /dev/null +++ b/src/mesa/drivers/dri/r600/radeon_buffer_objects.h @@ -0,0 +1 @@ +../radeon/radeon_buffer_objects.h
\ No newline at end of file |