summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r600
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r600')
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c27
-rw-r--r--src/mesa/drivers/dri/r600/r600_texstate.c13
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c2342
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.h129
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.c103
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.c151
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.h6
-rw-r--r--src/mesa/drivers/dri/r600/r700_render.c21
-rw-r--r--src/mesa/drivers/dri/r600/r700_shader.c9
-rw-r--r--src/mesa/drivers/dri/r600/r700_shader.h3
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c45
11 files changed, 2616 insertions, 233 deletions
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index dbd233729c..25314eff56 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -74,6 +74,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "utils.h"
#include "xmlpool.h" /* for symbolic values of enum-type options */
+//#define R600_ENABLE_GLSL_TEST 1
+
#define need_GL_VERSION_2_0
#define need_GL_ARB_occlusion_query
#define need_GL_ARB_point_parameters
@@ -109,6 +111,7 @@ static const struct dri_extension card_extensions[] = {
{"GL_ARB_texture_env_crossbar", NULL},
{"GL_ARB_texture_env_dot3", NULL},
{"GL_ARB_texture_mirrored_repeat", NULL},
+ {"GL_ARB_texture_non_power_of_two", NULL},
{"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
{"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions},
{"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
@@ -155,7 +158,11 @@ static const struct dri_extension mm_extensions[] = {
* functions added by GL_ATI_separate_stencil.
*/
static const struct dri_extension gl_20_extension[] = {
+#ifdef R600_ENABLE_GLSL_TEST
+ {"GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
+#else
{"GL_VERSION_2_0", GL_VERSION_2_0_functions },
+#endif /* R600_ENABLE_GLSL_TEST */
};
static const struct tnl_pipeline_stage *r600_pipeline[] = {
@@ -308,6 +315,26 @@ static void r600InitGLExtensions(GLcontext *ctx)
if (r600->radeon.radeonScreen->kernel_mm)
driInitExtensions(ctx, mm_extensions, GL_FALSE);
+#ifdef R600_ENABLE_GLSL_TEST
+ driInitExtensions(ctx, gl_20_extension, GL_TRUE);
+ //_mesa_enable_2_0_extensions(ctx);
+ //1.5
+ ctx->Extensions.ARB_occlusion_query = GL_TRUE;
+ ctx->Extensions.ARB_vertex_buffer_object = GL_TRUE;
+ ctx->Extensions.EXT_shadow_funcs = GL_TRUE;
+ //2.0
+ ctx->Extensions.ARB_draw_buffers = GL_TRUE;
+ ctx->Extensions.ARB_point_sprite = GL_TRUE;
+ ctx->Extensions.ARB_shader_objects = GL_TRUE;
+ ctx->Extensions.ARB_vertex_shader = GL_TRUE;
+ ctx->Extensions.ARB_fragment_shader = GL_TRUE;
+ ctx->Extensions.EXT_blend_equation_separate = GL_TRUE;
+ ctx->Extensions.ATI_separate_stencil = GL_TRUE;
+
+ /* glsl compiler has problem if this is not GL_TRUE */
+ ctx->Shader.EmitCondCodes = GL_TRUE;
+#endif /* R600_ENABLE_GLSL_TEST */
+
if (driQueryOptionb
(&r600->radeon.optionCache, "disable_stencil_two_side"))
_mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 4ec315b78c..2a4a6e6ee1 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -917,18 +917,7 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
return;
}
- radeon_update_renderbuffers(pDRICtx, dPriv);
- /* back & depth buffer are useless free them right away */
- rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer;
- if (rb && rb->bo) {
- radeon_bo_unref(rb->bo);
- rb->bo = NULL;
- }
- rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer;
- if (rb && rb->bo) {
- radeon_bo_unref(rb->bo);
- rb->bo = NULL;
- }
+ radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE);
rb = rfb->color_rb[0];
if (rb->bo == NULL) {
/* Failed to BO for the buffer */
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 67e0ee7746..cf64d170ed 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -38,6 +38,9 @@
#include "r700_assembler.h"
+#define USE_CF_FOR_CONTINUE_BREAK 1
+#define USE_CF_FOR_POP_AFTER 1
+
BITS addrmode_PVSDST(PVSDST * pPVSDST)
{
return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
@@ -337,12 +340,17 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
switch (pAsm->D.dst.opcode)
{
case SQ_OP2_INST_ADD:
+ case SQ_OP2_INST_KILLE:
case SQ_OP2_INST_KILLGT:
+ case SQ_OP2_INST_KILLGE:
+ case SQ_OP2_INST_KILLNE:
case SQ_OP2_INST_MUL:
case SQ_OP2_INST_MAX:
case SQ_OP2_INST_MIN:
//case SQ_OP2_INST_MAX_DX10:
//case SQ_OP2_INST_MIN_DX10:
+ case SQ_OP2_INST_SETE:
+ case SQ_OP2_INST_SETNE:
case SQ_OP2_INST_SETGT:
case SQ_OP2_INST_SETGE:
case SQ_OP2_INST_PRED_SETE:
@@ -358,6 +366,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
case SQ_OP2_INST_MOVA_FLOOR:
case SQ_OP2_INST_FRACT:
case SQ_OP2_INST_FLOOR:
+ case SQ_OP2_INST_TRUNC:
case SQ_OP2_INST_EXP_IEEE:
case SQ_OP2_INST_LOG_CLAMPED:
case SQ_OP2_INST_LOG_IEEE:
@@ -383,98 +392,115 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700
pAsm->pR700Shader = pShader;
pAsm->currentShaderType = spt;
- pAsm->cf_last_export_ptr = NULL;
+ pAsm->cf_last_export_ptr = NULL;
+
+ pAsm->cf_current_export_clause_ptr = NULL;
+ pAsm->cf_current_alu_clause_ptr = NULL;
+ pAsm->cf_current_tex_clause_ptr = NULL;
+ pAsm->cf_current_vtx_clause_ptr = NULL;
+ pAsm->cf_current_cf_clause_ptr = NULL;
- pAsm->cf_current_export_clause_ptr = NULL;
- pAsm->cf_current_alu_clause_ptr = NULL;
- pAsm->cf_current_tex_clause_ptr = NULL;
- pAsm->cf_current_vtx_clause_ptr = NULL;
- pAsm->cf_current_cf_clause_ptr = NULL;
+ // No clause has been created yet
+ pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
- // No clause has been created yet
- pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
+ pAsm->number_of_colorandz_exports = 0;
+ pAsm->number_of_exports = 0;
+ pAsm->number_of_export_opcodes = 0;
- pAsm->number_of_colorandz_exports = 0;
- pAsm->number_of_exports = 0;
- pAsm->number_of_export_opcodes = 0;
+ pAsm->alu_x_opcode = 0;
+ pAsm->D2.bits = 0;
- pAsm->D.bits = 0;
- pAsm->S[0].bits = 0;
- pAsm->S[1].bits = 0;
- pAsm->S[2].bits = 0;
+ pAsm->D.bits = 0;
+ pAsm->S[0].bits = 0;
+ pAsm->S[1].bits = 0;
+ pAsm->S[2].bits = 0;
- pAsm->uLastPosUpdate = 0;
+ pAsm->uLastPosUpdate = 0;
- *(BITS *) &pAsm->fp_stOutFmt0 = 0;
+ *(BITS *) &pAsm->fp_stOutFmt0 = 0;
- pAsm->uIIns = 0;
- pAsm->uOIns = 0;
- pAsm->number_used_registers = 0;
- pAsm->uUsedConsts = 256;
+ pAsm->uIIns = 0;
+ pAsm->uOIns = 0;
+ pAsm->number_used_registers = 0;
+ pAsm->uUsedConsts = 256;
- // Fragment programs
- pAsm->uBoolConsts = 0;
- pAsm->uIntConsts = 0;
- pAsm->uInsts = 0;
- pAsm->uConsts = 0;
+ // Fragment programs
+ pAsm->uBoolConsts = 0;
+ pAsm->uIntConsts = 0;
+ pAsm->uInsts = 0;
+ pAsm->uConsts = 0;
- pAsm->FCSP = 0;
- pAsm->fc_stack[0].type = FC_NONE;
+ pAsm->FCSP = 0;
+ pAsm->fc_stack[0].type = FC_NONE;
- pAsm->branch_depth = 0;
- pAsm->max_branch_depth = 0;
+ pAsm->aArgSubst[0] =
+ pAsm->aArgSubst[1] =
+ pAsm->aArgSubst[2] =
+ pAsm->aArgSubst[3] = (-1);
- pAsm->aArgSubst[0] =
- pAsm->aArgSubst[1] =
- pAsm->aArgSubst[2] =
- pAsm->aArgSubst[3] = (-1);
+ pAsm->uOutputs = 0;
- pAsm->uOutputs = 0;
+ for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
+ {
+ pAsm->color_export_register_number[i] = (-1);
+ }
- for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
- {
- pAsm->color_export_register_number[i] = (-1);
- }
+ pAsm->depth_export_register_number = (-1);
+ pAsm->stencil_export_register_number = (-1);
+ pAsm->coverage_to_mask_export_register_number = (-1);
+ pAsm->mask_export_register_number = (-1);
- pAsm->depth_export_register_number = (-1);
- pAsm->stencil_export_register_number = (-1);
- pAsm->coverage_to_mask_export_register_number = (-1);
- pAsm->mask_export_register_number = (-1);
+ pAsm->starting_export_register_number = 0;
+ pAsm->starting_vfetch_register_number = 0;
+ pAsm->starting_temp_register_number = 0;
+ pAsm->uFirstHelpReg = 0;
- pAsm->starting_export_register_number = 0;
- pAsm->starting_vfetch_register_number = 0;
- pAsm->starting_temp_register_number = 0;
- pAsm->uFirstHelpReg = 0;
+ pAsm->input_position_is_used = GL_FALSE;
+ pAsm->input_normal_is_used = GL_FALSE;
+ for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
+ {
+ pAsm->input_color_is_used[ i ] = GL_FALSE;
+ }
- pAsm->input_position_is_used = GL_FALSE;
- pAsm->input_normal_is_used = GL_FALSE;
+ for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
+ {
+ pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
+ }
+ for (i=0; i<VERT_ATTRIB_MAX; i++)
+ {
+ pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
+ }
- for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
- {
- pAsm->input_color_is_used[ i ] = GL_FALSE;
- }
+ pAsm->number_of_inputs = 0;
- for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
- {
- pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
- }
+ pAsm->is_tex = GL_FALSE;
+ pAsm->need_tex_barrier = GL_FALSE;
- for (i=0; i<VERT_ATTRIB_MAX; i++)
- {
- pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
- }
+ pAsm->subs = NULL;
+ pAsm->unSubArraySize = 0;
+ pAsm->unSubArrayPointer = 0;
+ pAsm->callers = NULL;
+ pAsm->unCallerArraySize = 0;
+ pAsm->unCallerArrayPointer = 0;
+
+ pAsm->CALLSP = 0;
+ pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
+ pAsm->CALLSTACK[0].plstCFInstructions_local
+ = &(pAsm->pR700Shader->lstCFInstructions);
- pAsm->number_of_inputs = 0;
+ pAsm->CALLSTACK[0].max = 0;
+ pAsm->CALLSTACK[0].current = 0;
- pAsm->is_tex = GL_FALSE;
- pAsm->need_tex_barrier = GL_FALSE;
+ SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
- return 0;
+ pAsm->unCFflags = 0;
+
+ return 0;
}
GLboolean IsTex(gl_inst_opcode Opcode)
@@ -592,6 +618,31 @@ int check_current_clause(r700_AssemblerBase* pAsm,
return GL_TRUE;
}
+GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
+{
+ if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr =
+ (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
+
+ if (pAsm->cf_current_cf_clause_ptr != NULL)
+ {
+ Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
+ AddCFInstruction( pAsm->pR700Shader,
+ (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
+ }
+ else
+ {
+ radeon_error("Could not allocate a new VFetch CF instruction.\n");
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
R700VertexInstruction* vertex_instruction_ptr)
{
@@ -987,7 +1038,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm)
checkop_init(pAsm);
- if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
@@ -998,7 +1050,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm)
{
bSrcConst[0] = GL_FALSE;
}
- if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
@@ -1031,7 +1084,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm)
checkop_init(pAsm);
- if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
@@ -1042,7 +1096,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm)
{
bSrcConst[0] = GL_FALSE;
}
- if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
@@ -1053,7 +1108,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm)
{
bSrcConst[1] = GL_FALSE;
}
- if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
@@ -1153,6 +1209,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm,
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
case PROGRAM_STATE_VAR:
+ case PROGRAM_UNIFORM:
if (1 == pILInst->SrcReg[src].RelAddr)
{
setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
@@ -1166,7 +1223,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm,
pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
break;
case PROGRAM_INPUT:
- setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
+ setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
pAsm->S[fld].src.rtype = SRC_REG_INPUT;
switch (pAsm->currentShaderType)
{
@@ -1179,7 +1236,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm,
}
break;
default:
- radeon_error("Invalid source argument type\n");
+ radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
return GL_FALSE;
}
}
@@ -1294,6 +1351,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
else
{
switch (pILInst->SrcReg[0].File) {
+ case PROGRAM_UNIFORM:
case PROGRAM_CONSTANT:
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
@@ -1315,7 +1373,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
case FRAG_ATTRIB_TEX0:
case FRAG_ATTRIB_TEX1:
case FRAG_ATTRIB_TEX2:
- case FRAG_ATTRIB_TEX3:
+ case FRAG_ATTRIB_TEX3:
case FRAG_ATTRIB_TEX4:
case FRAG_ATTRIB_TEX5:
case FRAG_ATTRIB_TEX6:
@@ -1331,10 +1389,17 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
case FRAG_ATTRIB_PNTC:
fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
break;
- case FRAG_ATTRIB_VAR0:
- fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n");
- break;
}
+
+ if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
+ (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
+ {
+ bValidTexCoord = GL_TRUE;
+ pAsm->S[0].src.reg =
+ pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
+ pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ }
+
break;
}
}
@@ -1517,6 +1582,10 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
{
src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
}
+ else if (pSource->rtype == SRC_REC_LITERAL)
+ {
+ src_sel = SQ_ALU_SRC_LITERAL;
+ }
else
{
radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
@@ -1606,7 +1675,8 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
return GL_FALSE;
}
- if ( pAsm->cf_current_alu_clause_ptr == NULL ||
+ if ( pAsm->alu_x_opcode != 0 ||
+ pAsm->cf_current_alu_clause_ptr == NULL ||
( (pAsm->cf_current_alu_clause_ptr != NULL) &&
(pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
) )
@@ -1636,9 +1706,17 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
- //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1;
pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
- pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
+
+ if(pAsm->alu_x_opcode != 0)
+ {
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
+ pAsm->alu_x_opcode = 0;
+ }
+ else
+ {
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
+ }
pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
@@ -2045,7 +2123,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
if( is_gpr(sel) )
{
if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
- {
+ {
return GL_FALSE;
}
@@ -2057,7 +2135,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
else
{
if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
- {
+ {
return GL_FALSE;
}
}
@@ -2069,7 +2147,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
if( is_cfile(sel) )
{
if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
- {
+ {
return GL_FALSE;
}
}
@@ -2172,7 +2250,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
current_source_index,
pcurrent_source,
scalar_channel_index) )
- {
+ {
return GL_FALSE;
}
@@ -2186,7 +2264,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
current_source_index,
pcurrent_source,
scalar_channel_index) )
- {
+ {
return GL_FALSE;
}
}
@@ -2215,7 +2293,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
}
else
- {
+ {
radeon_error("Only temp destination registers supported for ALU dest regs.\n");
return GL_FALSE;
}
@@ -2329,6 +2407,253 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
}
if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
+ {
+ return GL_FALSE;
+ }
+
+ /*
+ * Judge the type of current instruction, is it vector or scalar
+ * instruction.
+ */
+ if (is_single_scalar_operation)
+ {
+ if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else
+ {
+ if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ contiguous_slots_needed = 0;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm)
+{
+ GLuint number_of_scalar_operations;
+ GLboolean is_single_scalar_operation;
+ GLuint scalar_channel_index;
+
+ PVSSRC * pcurrent_source;
+ int current_source_index;
+ GLuint contiguous_slots_needed;
+
+ GLuint uNumSrc = r700GetNumOperands(pAsm);
+
+ GLboolean bSplitInst = GL_FALSE;
+
+ if (1 == pAsm->D.dst.math)
+ {
+ is_single_scalar_operation = GL_TRUE;
+ number_of_scalar_operations = 1;
+ }
+ else
+ {
+ is_single_scalar_operation = GL_FALSE;
+ number_of_scalar_operations = 4;
+ }
+
+ contiguous_slots_needed = 0;
+
+ if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
+ {
+ contiguous_slots_needed = 4;
+ }
+
+ initialize(pAsm);
+
+ for (scalar_channel_index=0;
+ scalar_channel_index < number_of_scalar_operations;
+ scalar_channel_index++)
+ {
+ R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+ if (alu_instruction_ptr == NULL)
+ {
+ return GL_FALSE;
+ }
+ Init_R700ALUInstruction(alu_instruction_ptr);
+
+ //src 0
+ current_source_index = 0;
+ pcurrent_source = &(pAsm->S[0].src);
+
+ if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+ current_source_index,
+ pcurrent_source,
+ scalar_channel_index) )
+ {
+ return GL_FALSE;
+ }
+
+ if (uNumSrc > 1)
+ {
+ // Process source 1
+ current_source_index = 1;
+ pcurrent_source = &(pAsm->S[current_source_index].src);
+
+ if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+ current_source_index,
+ pcurrent_source,
+ scalar_channel_index) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ //other bits
+ alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
+
+ if( (is_single_scalar_operation == GL_TRUE)
+ || (GL_TRUE == bSplitInst) )
+ {
+ alu_instruction_ptr->m_Word0.f.last = 1;
+ }
+ else
+ {
+ alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
+ }
+
+ alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
+ if(1 == pAsm->D.dst.predicated)
+ {
+ alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
+ alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
+ }
+ else
+ {
+ alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
+ }
+
+ // dst
+ if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
+ (pAsm->D.dst.rtype == DST_REG_OUT) )
+ {
+ alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
+ }
+ else
+ {
+ radeon_error("Only temp destination registers supported for ALU dest regs.\n");
+ return GL_FALSE;
+ }
+
+ alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
+
+ if ( is_single_scalar_operation == GL_TRUE )
+ {
+ // Override scalar_channel_index since only one scalar value will be written
+ if(pAsm->D.dst.writex)
+ {
+ scalar_channel_index = 0;
+ }
+ else if(pAsm->D.dst.writey)
+ {
+ scalar_channel_index = 1;
+ }
+ else if(pAsm->D.dst.writez)
+ {
+ scalar_channel_index = 2;
+ }
+ else if(pAsm->D.dst.writew)
+ {
+ scalar_channel_index = 3;
+ }
+ }
+
+ alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
+
+ alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
+
+ if (pAsm->D.dst.op3)
+ {
+ //op3
+
+ alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
+
+ //There's 3rd src for op3
+ current_source_index = 2;
+ pcurrent_source = &(pAsm->S[current_source_index].src);
+
+ if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+ current_source_index,
+ pcurrent_source,
+ scalar_channel_index) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else
+ {
+ //op2
+ if (pAsm->bR6xx)
+ {
+ alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
+
+ alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
+
+ //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
+ //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
+ switch (scalar_channel_index)
+ {
+ case 0:
+ alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
+ break;
+ case 1:
+ alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
+ break;
+ case 2:
+ alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
+ break;
+ case 3:
+ alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
+ break;
+ default:
+ alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
+ break;
+ }
+ alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
+ }
+ else
+ {
+ alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
+
+ alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
+
+ //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
+ //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
+ switch (scalar_channel_index)
+ {
+ case 0:
+ alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
+ break;
+ case 1:
+ alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
+ break;
+ case 2:
+ alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
+ break;
+ case 3:
+ alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
+ break;
+ default:
+ alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
+ break;
+ }
+ alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
+ }
+ }
+
+ if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
{
return GL_FALSE;
}
@@ -2348,7 +2673,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
{
if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
{
- return 1;
+ return GL_FALSE;
}
}
@@ -2358,6 +2683,259 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
+GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral)
+{
+ R700ALUInstruction * alu_instruction_ptr;
+ R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
+ R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
+
+ GLuint number_of_scalar_operations;
+ GLboolean is_single_scalar_operation;
+ GLuint scalar_channel_index;
+
+ GLuint contiguous_slots_needed;
+ GLuint lastInstruction;
+ GLuint not_masked[4];
+
+ GLuint uNumSrc = r700GetNumOperands(pAsm);
+
+ GLboolean bSplitInst = GL_FALSE;
+
+ number_of_scalar_operations = 0;
+ contiguous_slots_needed = 0;
+
+ if(1 == pAsm->D.dst.writew)
+ {
+ lastInstruction = 3;
+ number_of_scalar_operations++;
+ not_masked[3] = 1;
+ }
+ else
+ {
+ not_masked[3] = 0;
+ }
+ if(1 == pAsm->D.dst.writez)
+ {
+ lastInstruction = 2;
+ number_of_scalar_operations++;
+ not_masked[2] = 1;
+ }
+ else
+ {
+ not_masked[2] = 0;
+ }
+ if(1 == pAsm->D.dst.writey)
+ {
+ lastInstruction = 1;
+ number_of_scalar_operations++;
+ not_masked[1] = 1;
+ }
+ else
+ {
+ not_masked[1] = 0;
+ }
+ if(1 == pAsm->D.dst.writex)
+ {
+ lastInstruction = 0;
+ number_of_scalar_operations++;
+ not_masked[0] = 1;
+ }
+ else
+ {
+ not_masked[0] = 0;
+ }
+
+ if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
+ {
+ contiguous_slots_needed = 4;
+ }
+ else
+ {
+ contiguous_slots_needed = number_of_scalar_operations;
+ }
+
+ if(1 == pAsm->D2.dst2.literal)
+ {
+ contiguous_slots_needed += 1;
+ }
+ else if(2 == pAsm->D2.dst2.literal)
+ {
+ contiguous_slots_needed += 2;
+ }
+
+ initialize(pAsm);
+
+ for (scalar_channel_index=0; scalar_channel_index < 4; scalar_channel_index++)
+ {
+ if(0 == not_masked[scalar_channel_index])
+ {
+ continue;
+ }
+
+ if(scalar_channel_index == lastInstruction)
+ {
+ switch (pAsm->D2.dst2.literal)
+ {
+ case 0:
+ alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+ if (alu_instruction_ptr == NULL)
+ {
+ return GL_FALSE;
+ }
+ Init_R700ALUInstruction(alu_instruction_ptr);
+ break;
+ case 1:
+ alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
+ if (alu_instruction_ptr_hl == NULL)
+ {
+ return GL_FALSE;
+ }
+ Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pLiteral[0], pLiteral[1]);
+ alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
+ break;
+ case 2:
+ alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
+ if (alu_instruction_ptr_fl == NULL)
+ {
+ return GL_FALSE;
+ }
+ Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl, pLiteral[0], pLiteral[1], pLiteral[2], pLiteral[3]);
+ alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
+ break;
+ default:
+ break;
+ };
+ }
+ else
+ {
+ alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+ if (alu_instruction_ptr == NULL)
+ {
+ return GL_FALSE;
+ }
+ Init_R700ALUInstruction(alu_instruction_ptr);
+ }
+
+ //src 0
+ if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+ 0,
+ &(pAsm->S[0].src),
+ scalar_channel_index) )
+ {
+ return GL_FALSE;
+ }
+
+ if (uNumSrc > 1)
+ {
+ // Process source 1
+ if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+ 1,
+ &(pAsm->S[1].src),
+ scalar_channel_index) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ //other bits
+ alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
+
+ if(scalar_channel_index == lastInstruction)
+ {
+ alu_instruction_ptr->m_Word0.f.last = 1;
+ }
+
+ alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
+ if(1 == pAsm->D.dst.predicated)
+ {
+ alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
+ alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
+ }
+ else
+ {
+ alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0;
+ alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0;
+ }
+
+ // dst
+ if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
+ (pAsm->D.dst.rtype == DST_REG_OUT) )
+ {
+ alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
+ }
+ else
+ {
+ radeon_error("Only temp destination registers supported for ALU dest regs.\n");
+ return GL_FALSE;
+ }
+
+ alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
+
+ alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
+
+ alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
+
+ if (pAsm->D.dst.op3)
+ {
+ //op3
+ alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
+
+ //There's 3rd src for op3
+ if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
+ 2,
+ &(pAsm->S[2].src),
+ scalar_channel_index) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else
+ {
+ //op2
+ if (pAsm->bR6xx)
+ {
+ alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
+ alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1;
+ alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
+ }
+ else
+ {
+ alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
+ alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1;
+ alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
+ }
+ }
+
+ if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
+ {
+ return GL_FALSE;
+ }
+
+ if (1 == number_of_scalar_operations)
+ {
+ if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else
+ {
+ if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ contiguous_slots_needed -= 2;
+ }
+
+ return GL_TRUE;
+}
+
GLboolean next_ins(r700_AssemblerBase *pAsm)
{
struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
@@ -2403,6 +2981,70 @@ GLboolean next_ins(r700_AssemblerBase *pAsm)
//reset for next inst.
pAsm->D.bits = 0;
+ pAsm->D2.bits = 0;
+ pAsm->S[0].bits = 0;
+ pAsm->S[1].bits = 0;
+ pAsm->S[2].bits = 0;
+ pAsm->is_tex = GL_FALSE;
+ pAsm->need_tex_barrier = GL_FALSE;
+
+ return GL_TRUE;
+}
+
+GLboolean next_ins2(r700_AssemblerBase *pAsm)
+{
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+ //ALU
+ if( GL_FALSE == assemble_alu_instruction2(pAsm) )
+ {
+ radeon_error("Error assembling ALU instruction\n");
+ return GL_FALSE;
+ }
+
+ if(pAsm->D.dst.rtype == DST_REG_OUT)
+ {
+ if(pAsm->D.dst.op3)
+ {
+ // There is no mask for OP3 instructions, so all channels are written
+ pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
+ }
+ else
+ {
+ pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
+ |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
+ }
+ }
+
+ //reset for next inst.
+ pAsm->D.bits = 0;
+ pAsm->D2.bits = 0;
+ pAsm->S[0].bits = 0;
+ pAsm->S[1].bits = 0;
+ pAsm->S[2].bits = 0;
+ pAsm->is_tex = GL_FALSE;
+ pAsm->need_tex_barrier = GL_FALSE;
+
+ pAsm->D2.bits = 0;
+
+ return GL_TRUE;
+}
+
+/* not work yet */
+GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral)
+{
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+ //ALU
+ if( GL_FALSE == assemble_alu_instruction_literal(pAsm, pLiteral) )
+ {
+ radeon_error("Error assembling ALU instruction\n");
+ return GL_FALSE;
+ }
+
+ //reset for next inst.
+ pAsm->D.bits = 0;
+ pAsm->D2.bits = 0;
pAsm->S[0].bits = 0;
pAsm->S[1].bits = 0;
pAsm->S[2].bits = 0;
@@ -2910,13 +3552,12 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
-GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
-{
- /* TODO: doc says KILL has to be last(end) ALU clause */
-
- checkop1(pAsm);
+GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
+{
+ checkop2(pAsm);
- pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;
+ pAsm->D.dst.opcode = opcode;
+ pAsm->D.dst.math = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
@@ -2926,24 +3567,24 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
pAsm->D.dst.writez = 0;
pAsm->D.dst.writew = 0;
- setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
- pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
- pAsm->S[0].src.reg = 0;
-
- setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
- noneg_PVSSRC(&(pAsm->S[0].src));
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
- if ( GL_FALSE == assemble_src(pAsm, 0, 1) )
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
{
return GL_FALSE;
}
- if ( GL_FALSE == next_ins(pAsm) )
+ if ( GL_FALSE == next_ins2(pAsm) )
{
return GL_FALSE;
}
+ /* Doc says KILL has to be last(end) ALU clause */
pAsm->pR700Shader->killIsUsed = GL_TRUE;
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
return GL_TRUE;
}
@@ -3007,6 +3648,7 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
{
return GL_FALSE;
}
+
if( GL_FALSE == assemble_src(pAsm, 2, -1) )
{
return GL_FALSE;
@@ -3816,6 +4458,74 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
+
+GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
+{
+ if( GL_FALSE == checkop2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = opcode;
+ pAsm->D.dst.math = 1;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
+{
+ if( GL_FALSE == checkop2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = opcode;
+ pAsm->D.dst.math = 1;
+ pAsm->D.dst.predicated = 1;
+ pAsm->D2.dst2.SaturateMode = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
{
@@ -3895,6 +4605,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
{
+ case PROGRAM_UNIFORM:
case PROGRAM_CONSTANT:
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
@@ -4104,6 +4815,9 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
pAsm->is_tex = GL_TRUE;
if ( GL_TRUE == need_barrier )
+
+ pAsm->is_tex = GL_TRUE;
+ if ( GL_TRUE == need_barrier )
{
pAsm->need_tex_barrier = GL_TRUE;
}
@@ -4265,27 +4979,876 @@ GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
-GLboolean assemble_IF(r700_AssemblerBase *pAsm)
+static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason)
+{
+ switch (uReason)
+ {
+ case FC_PUSH_VPM:
+ pAsm->CALLSTACK[pAsm->CALLSP].current--;
+ break;
+ case FC_PUSH_WQM:
+ pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
+ break;
+ case FC_LOOP:
+ pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
+ break;
+ case FC_REP:
+ /* TODO : for 16 vp asic, should -= 2; */
+ pAsm->CALLSTACK[pAsm->CALLSP].current -= 1;
+ break;
+ };
+}
+
+static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly)
+{
+ if(GL_TRUE == bCheckMaxOnly)
+ {
+ switch (uReason)
+ {
+ case FC_PUSH_VPM:
+ if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1)
+ > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max =
+ pAsm->CALLSTACK[pAsm->CALLSP].current + 1;
+ }
+ break;
+ case FC_PUSH_WQM:
+ if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4)
+ > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max =
+ pAsm->CALLSTACK[pAsm->CALLSP].current + 4;
+ }
+ break;
+ }
+ return;
+ }
+
+ switch (uReason)
+ {
+ case FC_PUSH_VPM:
+ pAsm->CALLSTACK[pAsm->CALLSP].current++;
+ break;
+ case FC_PUSH_WQM:
+ pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
+ break;
+ case FC_LOOP:
+ pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
+ break;
+ case FC_REP:
+ /* TODO : for 16 vp asic, should += 2; */
+ pAsm->CALLSTACK[pAsm->CALLSP].current += 1;
+ break;
+ };
+
+ if(pAsm->CALLSTACK[pAsm->CALLSP].current
+ > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max =
+ pAsm->CALLSTACK[pAsm->CALLSP].current;
+ }
+}
+
+GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
+
+ return GL_TRUE;
+}
+
+GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if(GL_TRUE != bHasElse)
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ }
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->FCSP++;
+ pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
+ pAsm->fc_stack[pAsm->FCSP].mid = NULL;
+ pAsm->fc_stack[pAsm->FCSP].midLen= 0;
+ pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
+
+#ifndef USE_CF_FOR_POP_AFTER
+ if(GL_TRUE != bHasElse)
+ {
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
+ }
+#endif /* USE_CF_FOR_POP_AFTER */
+
+ checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
+ 0,
+ sizeof(R700ControlFlowGenericClause *) );
+ pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
+ //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
+
+#ifndef USE_CF_FOR_POP_AFTER
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
+#endif /* USE_CF_FOR_POP_AFTER */
+
+ pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1;
+
return GL_TRUE;
}
GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
{
+#ifdef USE_CF_FOR_POP_AFTER
+ pops(pAsm, 1);
+#endif /* USE_CF_FOR_POP_AFTER */
+
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+ if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
+ {
+ /* no else in between */
+ pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
+ }
+ else
+ {
+ pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
+ }
+
+ if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
+ {
+ FREE(pAsm->fc_stack[pAsm->FCSP].mid);
+ }
+
+ if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
+ {
+ radeon_error("if/endif in shader code are not paired. \n");
+ return GL_FALSE;
+ }
+
+ pAsm->FCSP--;
+
+ decreaseCurrent(pAsm, FC_PUSH_VPM);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->FCSP++;
+ pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
+ pAsm->fc_stack[pAsm->FCSP].mid = NULL;
+ pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
+ pAsm->fc_stack[pAsm->FCSP].midLen = 0;
+ pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
+
+ checkStackDepth(pAsm, FC_LOOP, GL_FALSE);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
+{
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ unsigned int unFCSP;
+ for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
+ {
+ if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+ {
+ break;
+ }
+ }
+ if(0 == FC_LOOP)
+ {
+ radeon_error("Break is not inside loop/endloop pair.\n");
+ return GL_FALSE;
+ }
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
+ (void *)pAsm->fc_stack[unFCSP].mid,
+ sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
+ sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
+ pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
+ pAsm->fc_stack[unFCSP].unNumMid++;
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+ checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
+
+#endif //USE_CF_FOR_CONTINUE_BREAK
+ return GL_TRUE;
+}
+
+GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
+{
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ unsigned int unFCSP;
+ for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
+ {
+ if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+ {
+ break;
+ }
+ }
+ if(0 == FC_LOOP)
+ {
+ radeon_error("Continue is not inside loop/endloop pair.\n");
+ return GL_FALSE;
+ }
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
+ (void *)pAsm->fc_stack[unFCSP].mid,
+ sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
+ sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
+ pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
+ pAsm->fc_stack[unFCSP].unNumMid++;
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+ checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
+
+#endif /* USE_CF_FOR_CONTINUE_BREAK */
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
+{
+ GLuint i;
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
+ pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
+ {
+ pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
+ }
+ if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
+ {
+ FREE(pAsm->fc_stack[pAsm->FCSP].mid);
+ }
+#endif
+
+ if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
+ {
+ radeon_error("loop/endloop in shader code are not paired. \n");
+ return GL_FALSE;
+ }
+
+ GLuint unFCSP;
+ GLuint unIF = 0;
+ if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
+ {
+ for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
+ {
+ if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+ {
+ breakLoopOnFlag(pAsm, unFCSP);
+ break;
+ }
+ else if(FC_IF == pAsm->fc_stack[unFCSP].type)
+ {
+ unIF++;
+ }
+ }
+ if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
+ {
+#ifdef USE_CF_FOR_POP_AFTER
+ returnOnFlag(pAsm, unIF);
+#else
+ returnOnFlag(pAsm, 0);
+#endif /* USE_CF_FOR_POP_AFTER */
+ pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
+ }
+ }
+
+ pAsm->FCSP--;
+
+ decreaseCurrent(pAsm, FC_LOOP);
+
+ return GL_TRUE;
+}
+
+void add_return_inst(r700_AssemblerBase *pAsm)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+}
+
+GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex)
+{
+ /* Put in sub */
+ if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
+ {
+ pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
+ sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
+ sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
+ if(NULL == pAsm->subs)
+ {
+ return GL_FALSE;
+ }
+ pAsm->unSubArraySize += 10;
+ }
+
+ pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex;
+ pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
+ pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
+ pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
+
+ pAsm->CALLSP++;
+ pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer;
+ pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
+ pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
+ = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
+ pAsm->CALLSTACK[pAsm->CALLSP].max = 0;
+ pAsm->CALLSTACK[pAsm->CALLSP].current = 0;
+ SetActiveCFlist(pAsm->pR700Shader,
+ pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
+
+ pAsm->unSubArrayPointer++;
+
+ /* start sub */
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+ pAsm->FCSP++;
+ pAsm->fc_stack[pAsm->FCSP].type = FC_REP;
+
+ checkStackDepth(pAsm, FC_REP, GL_FALSE);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
+{
+ if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP)
+ {
+ radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
+ return GL_FALSE;
+ }
+
+ /* copy max to sub structure */
+ pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax
+ = pAsm->CALLSTACK[pAsm->CALLSP].max;
+
+ decreaseCurrent(pAsm, FC_REP);
+
+ pAsm->CALLSP--;
+ SetActiveCFlist(pAsm->pR700Shader,
+ pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
+
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+ pAsm->FCSP--;
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_RET(r700_AssemblerBase *pAsm)
+{
+ GLuint unIF = 0;
+
+ if(pAsm->CALLSP > 0)
+ { /* in sub */
+ GLuint unFCSP;
+ for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
+ {
+ if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+ {
+ setRetInLoopFlag(pAsm, SQ_SEL_1);
+ breakLoopOnFlag(pAsm, unFCSP);
+ pAsm->unCFflags |= LOOPRET_FLAGS;
+
+ return GL_TRUE;
+ }
+ else if(FC_IF == pAsm->fc_stack[unFCSP].type)
+ {
+ unIF++;
+ }
+ }
+ }
+
+#ifdef USE_CF_FOR_POP_AFTER
+ if(unIF > 0)
+ {
+ pops(pAsm, unIF);
+ }
+#endif /* USE_CF_FOR_POP_AFTER */
+
+ add_return_inst(pAsm);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
+ GLint nILindex,
+ GLuint uiNumberInsts,
+ struct prog_instruction *pILInst)
+{
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ /* Put in caller */
+ if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
+ {
+ pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers,
+ sizeof(CALLER_POINTER) * pAsm->unCallerArraySize,
+ sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
+ if(NULL == pAsm->callers)
+ {
+ return GL_FALSE;
+ }
+ pAsm->unCallerArraySize += 10;
+ }
+
+ pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = nILindex;
+ pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
+
+ pAsm->unCallerArrayPointer++;
+
+ int j;
+ GLuint max;
+ GLuint unSubID;
+ GLboolean bRet;
+ for(j=0; j<pAsm->unSubArrayPointer; j++)
+ {
+ if(nILindex == pAsm->subs[j].subIL_Offset)
+ { /* compiled before */
+
+ max = pAsm->subs[j].unStackDepthMax
+ + pAsm->CALLSTACK[pAsm->CALLSP].current;
+ if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max = max;
+ }
+
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j;
+ return GL_TRUE;
+ }
+ }
+
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
+ unSubID = pAsm->unSubArrayPointer;
+
+ bRet = AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm);
+
+ if(GL_TRUE == bRet)
+ {
+ max = pAsm->subs[unSubID].unStackDepthMax
+ + pAsm->CALLSTACK[pAsm->CALLSP].current;
+ if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max = max;
+ }
+ }
+
+ return bRet;
+}
+
+GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
+{
+ GLfloat fLiteral[2] = {0.1, 0.0};
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+ pAsm->D.dst.op3 = 0;
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = pAsm->flag_reg_index;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+ pAsm->D2.dst2.literal = 1;
+ pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
+ pAsm->D.dst.predicated = 0;
+ /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
+ pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
+#if 0
+ pAsm->S[0].src.rtype = SRC_REC_LITERAL;
+ //pAsm->S[0].src.reg = 0;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ pAsm->S[0].src.swizzlex = SQ_SEL_X;
+ pAsm->S[0].src.swizzley = SQ_SEL_Y;
+ pAsm->S[0].src.swizzlez = SQ_SEL_Z;
+ pAsm->S[0].src.swizzlew = SQ_SEL_W;
+
+ if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
+ {
+ return GL_FALSE;
+ }
+#else
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = 0;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ pAsm->S[0].src.swizzlex = flagValue;
+ pAsm->S[0].src.swizzley = flagValue;
+ pAsm->S[0].src.swizzlez = flagValue;
+ pAsm->S[0].src.swizzlew = flagValue;
+
+ if( GL_FALSE == next_ins2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+#endif
+
+ return GL_TRUE;
+}
+
+GLboolean testFlag(r700_AssemblerBase *pAsm)
+{
+ GLfloat fLiteral[2] = {0.1, 0.0};
+
+ //Test flag
+ GLuint tmp = gethelpr(pAsm);
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE;
+ pAsm->D.dst.math = 1;
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+ pAsm->D2.dst2.literal = 1;
+ pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
+ pAsm->D.dst.predicated = 1;
+
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = pAsm->flag_reg_index;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ pAsm->S[0].src.swizzlex = SQ_SEL_X;
+ pAsm->S[0].src.swizzley = SQ_SEL_Y;
+ pAsm->S[0].src.swizzlez = SQ_SEL_Z;
+ pAsm->S[0].src.swizzlew = SQ_SEL_W;
+#if 0
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ //pAsm->S[1].src.reg = 0;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[1].src));
+ pAsm->S[1].src.swizzlex = SQ_SEL_X;
+ pAsm->S[1].src.swizzley = SQ_SEL_Y;
+ pAsm->S[1].src.swizzlez = SQ_SEL_Z;
+ pAsm->S[1].src.swizzlew = SQ_SEL_W;
+
+ if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
+ {
+ return GL_FALSE;
+ }
+#else
+ pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[1].src.reg = 0;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[1].src));
+ pAsm->S[1].src.swizzlex = SQ_SEL_1;
+ pAsm->S[1].src.swizzley = SQ_SEL_1;
+ pAsm->S[1].src.swizzlez = SQ_SEL_1;
+ pAsm->S[1].src.swizzlew = SQ_SEL_1;
+
+ if( GL_FALSE == next_ins2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+#endif
+
+ checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
+
+ return GL_TRUE;
+}
+
+GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF)
+{
+ testFlag(pAsm);
+ jumpToOffest(pAsm, 1, 4);
+ setRetInLoopFlag(pAsm, SQ_SEL_0);
+ pops(pAsm, unIF + 1);
+ add_return_inst(pAsm);
+
+ return GL_TRUE;
+}
+
+GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
+{
+ testFlag(pAsm);
+
+ //break
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
+ (void *)pAsm->fc_stack[unFCSP].mid,
+ sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
+ sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
+ pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
+ pAsm->fc_stack[unFCSP].unNumMid++;
+
+ pops(pAsm, 1);
+
return GL_TRUE;
}
-GLboolean AssembleInstr(GLuint uiNumberInsts,
+GLboolean AssembleInstr(GLuint uiFirstInst,
+ GLuint uiNumberInsts,
struct prog_instruction *pILInst,
r700_AssemblerBase *pR700AsmCode)
{
GLuint i;
pR700AsmCode->pILInst = pILInst;
- for(i=0; i<uiNumberInsts; i++)
+ for(i=uiFirstInst; i<uiNumberInsts; i++)
{
pR700AsmCode->uiCurInst = i;
+#ifndef USE_CF_FOR_CONTINUE_BREAK
+ if(OPCODE_BRK == pILInst[i+1].Opcode)
+ {
+ switch(pILInst[i].Opcode)
+ {
+ case OPCODE_SLE:
+ pILInst[i].Opcode = OPCODE_SGT;
+ break;
+ case OPCODE_SLT:
+ pILInst[i].Opcode = OPCODE_SGE;
+ break;
+ case OPCODE_SGE:
+ pILInst[i].Opcode = OPCODE_SLT;
+ break;
+ case OPCODE_SGT:
+ pILInst[i].Opcode = OPCODE_SLE;
+ break;
+ case OPCODE_SEQ:
+ pILInst[i].Opcode = OPCODE_SNE;
+ break;
+ case OPCODE_SNE:
+ pILInst[i].Opcode = OPCODE_SEQ;
+ break;
+ default:
+ break;
+ }
+ }
+#endif
+
switch (pILInst[i].Opcode)
{
case OPCODE_ABS:
@@ -4342,7 +5905,8 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
return GL_FALSE;
break;
- //case OP_FLR_INT:
+ //case OP_FLR_INT: ;
+
// if ( GL_FALSE == assemble_FLR_INT() )
// return GL_FALSE;
// break;
@@ -4353,8 +5917,10 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
break;
case OPCODE_KIL:
- if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
- return GL_FALSE;
+ case OPCODE_KIL_NV:
+ /* done at OPCODE_SE/SGT...etc. */
+ /* if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
+ return GL_FALSE; */
break;
case OPCODE_LG2:
if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
@@ -4414,16 +5980,340 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
case OPCODE_SCS:
if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
return GL_FALSE;
- break;
+ break;
+
+ case OPCODE_SEQ:
+ if(OPCODE_IF == pILInst[i+1].Opcode)
+ {
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else if(OPCODE_BRK == pILInst[i+1].Opcode)
+ {
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+#else
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
+#endif
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else if(OPCODE_CONT == pILInst[i+1].Opcode)
+ {
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+#else
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
+#endif
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
+ {
+ if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else
+ {
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ break;
+
+ case OPCODE_SGT:
+ if(OPCODE_IF == pILInst[i+1].Opcode)
+ {
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else if(OPCODE_BRK == pILInst[i+1].Opcode)
+ {
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+#else
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
+#endif
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else if(OPCODE_CONT == pILInst[i+1].Opcode)
+ {
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+#else
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
+#endif
+
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
+ {
+ if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else
+ {
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
+ {
+ return GL_FALSE;
+ }
+ }
+ break;
case OPCODE_SGE:
- if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
- return GL_FALSE;
- break;
+ if(OPCODE_IF == pILInst[i+1].Opcode)
+ {
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else if(OPCODE_BRK == pILInst[i+1].Opcode)
+ {
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+#else
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
+#endif
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else if(OPCODE_CONT == pILInst[i+1].Opcode)
+ {
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+#else
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
+#endif
+
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
+ {
+ if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else
+ {
+ if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ }
+ break;
+
+ /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
case OPCODE_SLT:
- if ( GL_FALSE == assemble_SLT(pR700AsmCode) )
- return GL_FALSE;
- break;
+ {
+ struct prog_src_register SrcRegSave[2];
+ SrcRegSave[0] = pILInst[i].SrcReg[0];
+ SrcRegSave[1] = pILInst[i].SrcReg[1];
+ pILInst[i].SrcReg[0] = SrcRegSave[1];
+ pILInst[i].SrcReg[1] = SrcRegSave[0];
+ if(OPCODE_IF == pILInst[i+1].Opcode)
+ {
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
+ {
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
+ }
+ }
+ else if(OPCODE_BRK == pILInst[i+1].Opcode)
+ {
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+#else
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
+#endif
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
+ {
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
+ }
+ }
+ else if(OPCODE_CONT == pILInst[i+1].Opcode)
+ {
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+#else
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
+#endif
+
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
+ {
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
+ }
+ }
+ else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
+ {
+ if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else
+ {
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
+ {
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
+ }
+ }
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ }
+ break;
+
+ case OPCODE_SLE:
+ {
+ struct prog_src_register SrcRegSave[2];
+ SrcRegSave[0] = pILInst[i].SrcReg[0];
+ SrcRegSave[1] = pILInst[i].SrcReg[1];
+ pILInst[i].SrcReg[0] = SrcRegSave[1];
+ pILInst[i].SrcReg[1] = SrcRegSave[0];
+ if(OPCODE_IF == pILInst[i+1].Opcode)
+ {
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
+ {
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
+ }
+ }
+ else if(OPCODE_BRK == pILInst[i+1].Opcode)
+ {
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+#else
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
+#endif
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
+ {
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
+ }
+ }
+ else if(OPCODE_CONT == pILInst[i+1].Opcode)
+ {
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+#else
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
+#endif
+
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
+ {
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
+ }
+ }
+ else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
+ {
+ if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else
+ {
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
+ {
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
+ }
+ }
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ }
+ break;
+
+ case OPCODE_SNE:
+ if(OPCODE_IF == pILInst[i+1].Opcode)
+ {
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else if(OPCODE_BRK == pILInst[i+1].Opcode)
+ {
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+#else
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
+#endif
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else if(OPCODE_CONT == pILInst[i+1].Opcode)
+ {
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+#else
+ pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
+#endif
+ if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
+ {
+ if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLNE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ else
+ {
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
+ {
+ return GL_FALSE;
+ }
+ }
+ break;
//case OP_STP:
// if ( GL_FALSE == assemble_STP(pR700AsmCode) )
@@ -4457,30 +6347,102 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
+ case OPCODE_TRUNC:
+ if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) )
+ return GL_FALSE;
+ break;
+
case OPCODE_XPD:
if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
return GL_FALSE;
break;
case OPCODE_IF :
- if ( GL_FALSE == assemble_IF(pR700AsmCode) )
- return GL_FALSE;
+ {
+ GLboolean bHasElse = GL_FALSE;
+
+ if(pILInst[pILInst[i].BranchTarget - 1].Opcode == OPCODE_ELSE)
+ {
+ bHasElse = GL_TRUE;
+ }
+
+ if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) )
+ {
+ return GL_FALSE;
+ }
+ }
break;
+
case OPCODE_ELSE :
- radeon_error("Not yet implemented instruction OPCODE_ELSE \n");
- //if ( GL_FALSE == assemble_BAD("ELSE") )
+ if ( GL_FALSE == assemble_ELSE(pR700AsmCode) )
return GL_FALSE;
break;
+
case OPCODE_ENDIF:
if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
return GL_FALSE;
break;
+ case OPCODE_BGNLOOP:
+ if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_BRK:
+ if( GL_FALSE == assemble_BRK(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_CONT:
+ if( GL_FALSE == assemble_CONT(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_ENDLOOP:
+ if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_BGNSUB:
+ if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_RET:
+ if( GL_FALSE == assemble_RET(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_CAL:
+ if( GL_FALSE == assemble_CAL(pR700AsmCode,
+ pILInst[i].BranchTarget,
+ uiNumberInsts,
+ pILInst) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
//case OPCODE_EXPORT:
// if ( GL_FALSE == assemble_EXPORT() )
// return GL_FALSE;
// break;
+ case OPCODE_ENDSUB:
+ return assemble_ENDSUB(pR700AsmCode);
+
case OPCODE_END:
//pR700AsmCode->uiCurInst = i;
//This is to remaind that if in later exoort there is depth/stencil
@@ -4497,6 +6459,123 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_TRUE;
}
+GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
+{
+ setRetInLoopFlag(pAsm, SQ_SEL_0);
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ return GL_TRUE;
+}
+
+GLboolean RelocProgram(r700_AssemblerBase * pAsm)
+{
+ GLuint i;
+ GLuint unCFoffset;
+ TypedShaderList * plstCFmain;
+ TypedShaderList * plstCFsub;
+
+ R700ShaderInstruction * pInst;
+ R700ControlFlowGenericClause * pCFInst;
+
+ plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
+
+ /* remove flags init if they are not used */
+ if((pAsm->unCFflags & HAS_LOOPRET) == 0)
+ {
+ R700ControlFlowALUClause * pCF_ALU;
+ pInst = plstCFmain->pHead;
+ while(pInst)
+ {
+ if(SIT_CF_ALU == pInst->m_ShaderInstType)
+ {
+ pCF_ALU = (R700ControlFlowALUClause *)pInst;
+ if(0 == pCF_ALU->m_Word1.f.count)
+ {
+ pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
+ }
+ else
+ {
+ R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
+
+ pALU->m_pLinkedALUClause = NULL;
+ pALU = (R700ALUInstruction *)(pALU->pNextInst);
+ pALU->m_pLinkedALUClause = pCF_ALU;
+ pCF_ALU->m_pLinkedALUInstruction = pALU;
+
+ pCF_ALU->m_Word1.f.count--;
+ }
+ break;
+ }
+ pInst = pInst->pNextInst;
+ };
+ }
+
+ if(pAsm->CALLSTACK[0].max > 0)
+ {
+ pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2;
+ }
+
+ if(0 == pAsm->unSubArrayPointer)
+ {
+ return GL_TRUE;
+ }
+
+ unCFoffset = plstCFmain->uNumOfNode;
+
+ /* Reloc subs */
+ for(i=0; i<pAsm->unSubArrayPointer; i++)
+ {
+ pAsm->subs[i].unCFoffset = unCFoffset;
+ plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
+
+ pInst = plstCFsub->pHead;
+
+ /* reloc instructions */
+ while(pInst)
+ {
+ if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
+ {
+ pCFInst = (R700ControlFlowGenericClause *)pInst;
+
+ switch (pCFInst->m_Word1.f.cf_inst)
+ {
+ case SQ_CF_INST_POP:
+ case SQ_CF_INST_JUMP:
+ case SQ_CF_INST_ELSE:
+ case SQ_CF_INST_LOOP_END:
+ case SQ_CF_INST_LOOP_START:
+ case SQ_CF_INST_LOOP_START_NO_AL:
+ case SQ_CF_INST_LOOP_CONTINUE:
+ case SQ_CF_INST_LOOP_BREAK:
+ pCFInst->m_Word0.f.addr += unCFoffset;
+ break;
+ default:
+ break;
+ }
+ }
+
+ pInst->m_uIndex += unCFoffset;
+
+ pInst = pInst->pNextInst;
+ };
+
+ /* Put sub into main */
+ plstCFmain->pTail->pNextInst = plstCFsub->pHead;
+ plstCFmain->pTail = plstCFsub->pTail;
+ plstCFmain->uNumOfNode += plstCFsub->uNumOfNode;
+
+ unCFoffset += plstCFsub->uNumOfNode;
+ }
+
+ /* reloc callers */
+ for(i=0; i<pAsm->unCallerArrayPointer; i++)
+ {
+ pAsm->callers[i].cf_ptr->m_Word0.f.addr
+ = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
+ }
+
+ return GL_TRUE;
+}
+
GLboolean Process_Export(r700_AssemblerBase* pAsm,
GLuint type,
GLuint export_starting_index,
@@ -4791,6 +6870,25 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
export_starting_index++;
}
}
+
+ for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(OutputsWritten & unBit)
+ {
+ if( GL_FALSE == Process_Export(pR700AsmCode,
+ SQ_EXPORT_PARAM,
+ export_starting_index,
+ 1,
+ pR700AsmCode->ucVP_OutputMap[i],
+ GL_FALSE) )
+ {
+ return GL_FALSE;
+ }
+
+ export_starting_index++;
+ }
+ }
// At least one param should be exported
if (export_count)
@@ -4825,6 +6923,16 @@ GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
{
FREE(pR700AsmCode->pucOutMask);
FREE(pR700AsmCode->pInstDeps);
+
+ if(NULL != pR700AsmCode->subs)
+ {
+ FREE(pR700AsmCode->subs);
+ }
+ if(NULL != pR700AsmCode->callers)
+ {
+ FREE(pR700AsmCode->callers);
+ }
+
return GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h
index c66db502a1..130fc89dae 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.h
+++ b/src/mesa/drivers/dri/r600/r700_assembler.h
@@ -72,7 +72,8 @@ typedef enum SrcRegisterType
SRC_REG_INPUT = 1,
SRC_REG_CONSTANT = 2,
SRC_REG_ALT_TEMPORARY = 3,
- NUMBER_OF_SRC_REG_TYPE = 4
+ SRC_REC_LITERAL = 4,
+ NUMBER_OF_SRC_REG_TYPE = 5
} SrcRegisterType;
typedef enum DstRegisterType
@@ -111,6 +112,12 @@ typedef struct PVSDSTtag
BITS addrmode1:1; //32
} PVSDST;
+typedef struct PVSINSTtag
+{
+ BITS literal :2;
+ BITS SaturateMode :2;
+} PVSINST;
+
typedef struct PVSSRCtag
{
BITS rtype:4;
@@ -148,6 +155,7 @@ typedef union PVSDWORDtag
{
BITS bits;
PVSDST dst;
+ PVSINST dst2;
PVSSRC src;
PVSMATH math;
float f;
@@ -251,6 +259,8 @@ enum
FC_IF = 1,
FC_LOOP = 2,
FC_REP = 3,
+ FC_PUSH_VPM = 4,
+ FC_PUSH_WQM = 5,
COND_NONE = 0,
COND_BOOL = 1,
@@ -263,22 +273,52 @@ enum
typedef struct FC_LEVEL
{
- unsigned int first; ///< first fc instruction on level (if, rep, loop)
- unsigned int* mid; ///< middle instructions - else or all breaks on this level
- unsigned int midLen;
- unsigned int type;
- unsigned int cond;
- unsigned int inv;
- unsigned int bpush; ///< 1 if first instruction does branch stack push
- int id; ///< id of bool or int variable
+ R700ControlFlowGenericClause * first;
+ R700ControlFlowGenericClause ** mid;
+ unsigned int unNumMid;
+ unsigned int midLen;
+ unsigned int type;
+ unsigned int cond;
+ unsigned int inv;
+ int id; ///< id of bool or int variable
} FC_LEVEL;
typedef struct VTX_FETCH_METHOD
{
- GLboolean bEnableMini;
- GLuint mega_fetch_remainder;
+ GLboolean bEnableMini;
+ GLuint mega_fetch_remainder;
} VTX_FETCH_METHOD;
+typedef struct SUB_OFFSET
+{
+ GLint subIL_Offset;
+ GLuint unCFoffset;
+ GLuint unStackDepthMax;
+ TypedShaderList lstCFInstructions_local;
+} SUB_OFFSET;
+
+typedef struct CALLER_POINTER
+{
+ GLint subIL_Offset;
+ GLint subDescIndex;
+ R700ControlFlowGenericClause* cf_ptr;
+} CALLER_POINTER;
+
+#define SQ_MAX_CALL_DEPTH 0x00000020
+
+typedef struct CALL_LEVEL
+{
+ unsigned int FCSP_BeforeEntry;
+ GLint subDescIndex;
+ GLushort current;
+ GLushort max;
+ TypedShaderList * plstCFInstructions_local;
+} CALL_LEVEL;
+
+#define HAS_CURRENT_LOOPRET 0x1L
+#define HAS_LOOPRET 0x2L
+#define LOOPRET_FLAGS HAS_LOOPRET | HAS_CURRENT_LOOPRET
+
typedef struct r700_AssemblerBase
{
R700ControlFlowSXClause* cf_last_export_ptr;
@@ -294,11 +334,14 @@ typedef struct r700_AssemblerBase
// No clause has been created yet
CF_CLAUSE_TYPE cf_current_clause_type;
+ BITS alu_x_opcode;
+
GLuint number_of_exports;
GLuint number_of_colorandz_exports;
GLuint number_of_export_opcodes;
PVSDWORD D;
+ PVSDWORD D2;
PVSDWORD S[3];
unsigned int uLastPosUpdate;
@@ -310,6 +353,8 @@ typedef struct r700_AssemblerBase
unsigned int number_used_registers;
unsigned int uUsedConsts;
+ unsigned int flag_reg_index;
+
// Fragment programs
unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX];
unsigned int uiFP_OutputMap[FRAG_RESULT_MAX];
@@ -330,9 +375,6 @@ typedef struct r700_AssemblerBase
unsigned int FCSP;
FC_LEVEL fc_stack[32];
- unsigned int branch_depth;
- unsigned int max_branch_depth;
-
//-----------------------------------------------------------------------------------
// ArgSubst used in Assemble_Source() function
//-----------------------------------------------------------------------------------
@@ -378,6 +420,18 @@ typedef struct r700_AssemblerBase
GLboolean is_tex;
/* we inserted helper intructions and need barrier on next TEX ins */
GLboolean need_tex_barrier;
+
+ SUB_OFFSET * subs;
+ GLuint unSubArraySize;
+ GLuint unSubArrayPointer;
+ CALLER_POINTER * callers;
+ GLuint unCallerArraySize;
+ GLuint unCallerArrayPointer;
+ unsigned int CALLSP;
+ CALL_LEVEL CALLSTACK[SQ_MAX_CALL_DEPTH];
+
+ GLuint unCFflags;
+
} r700_AssemblerBase;
//Internal use
@@ -446,6 +500,10 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
R700ALUInstruction* alu_instruction_ptr,
GLuint contiguous_slots_needed);
+
+GLboolean add_cf_instruction(r700_AssemblerBase* pAsm);
+void add_return_inst(r700_AssemblerBase *pAsm);
+
void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
int source_index,
BITS* psrc_sel,
@@ -467,6 +525,21 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
R700ALUInstruction* alu_instruction_ptr);
GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm);
GLboolean next_ins(r700_AssemblerBase *pAsm);
+
+GLboolean next_ins2(r700_AssemblerBase *pAsm);
+GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm);
+
+/* TODO : merge next_ins/2/literal, assemble_alu_instruction/2/literal */
+GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral);
+GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral);
+
+GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops);
+GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset);
+GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue);
+GLboolean testFlag(r700_AssemblerBase *pAsm);
+GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP);
+GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF);
+
GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode);
GLboolean assemble_ABS(r700_AssemblerBase *pAsm);
GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
@@ -481,7 +554,7 @@ GLboolean assemble_EXP(r700_AssemblerBase *pAsm);
GLboolean assemble_FLR(r700_AssemblerBase *pAsm);
GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm);
GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
-GLboolean assemble_KIL(r700_AssemblerBase *pAsm);
+GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode);
GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
GLboolean assemble_LOG(r700_AssemblerBase *pAsm);
@@ -497,14 +570,32 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm);
GLboolean assemble_SIN(r700_AssemblerBase *pAsm);
GLboolean assemble_SCS(r700_AssemblerBase *pAsm);
GLboolean assemble_SGE(r700_AssemblerBase *pAsm);
+
+GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode);
+GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode);
+
GLboolean assemble_SLT(r700_AssemblerBase *pAsm);
GLboolean assemble_STP(r700_AssemblerBase *pAsm);
GLboolean assemble_TEX(r700_AssemblerBase *pAsm);
GLboolean assemble_XPD(r700_AssemblerBase *pAsm);
GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm);
-GLboolean assemble_IF(r700_AssemblerBase *pAsm);
+GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse);
+GLboolean assemble_ELSE(r700_AssemblerBase *pAsm);
GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm);
+GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm);
+GLboolean assemble_BRK(r700_AssemblerBase *pAsm);
+GLboolean assemble_COND(r700_AssemblerBase *pAsm);
+GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm);
+
+GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex);
+GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm);
+GLboolean assemble_RET(r700_AssemblerBase *pAsm);
+GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
+ GLint nILindex,
+ GLuint uiNumberInsts,
+ struct prog_instruction *pILInst);
+
GLboolean Process_Export(r700_AssemblerBase* pAsm,
GLuint type,
GLuint export_starting_index,
@@ -516,12 +607,16 @@ GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm,
//Interface
-GLboolean AssembleInstr(GLuint uiNumberInsts,
+GLboolean AssembleInstr(GLuint uiFirstInst,
+ GLuint uiNumberInsts,
struct prog_instruction *pILInst,
r700_AssemblerBase *pR700AsmCode);
GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
+GLboolean RelocProgram(r700_AssemblerBase * pAsm);
+GLboolean InitShaderProgram(r700_AssemblerBase * pAsm);
+
int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader);
GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode);
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index 8538e3582b..d702740014 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -446,68 +446,77 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *
static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom)
{
- context_t *context = R700_CONTEXT(ctx);
- R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
- struct radeon_bo * pbo;
- BATCH_LOCALS(&context->radeon);
- radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ struct radeon_bo * pbo;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
- pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context));
+ pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context));
- if (!pbo)
- return;
+ if (!pbo)
+ return;
- r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+ r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
- BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
- R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1);
- R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All);
- R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All,
- pbo,
- r700->ps.SQ_PGM_START_PS.u32All,
- RADEON_GEM_DOMAIN_GTT, 0, 0);
- END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(SQ_PGM_START_PS, 1);
+ R600_OUT_BATCH(r700->ps.SQ_PGM_START_PS.u32All);
+ R600_OUT_BATCH_RELOC(r700->ps.SQ_PGM_START_PS.u32All,
+ pbo,
+ r700->ps.SQ_PGM_START_PS.u32All,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
- BEGIN_BATCH_NO_AUTOSTATE(9);
- R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All);
- R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All);
- R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All);
- END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(9);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_PS, r700->ps.SQ_PGM_RESOURCES_PS.u32All);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_EXPORTS_PS, r700->ps.SQ_PGM_EXPORTS_PS.u32All);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_PS, r700->ps.SQ_PGM_CF_OFFSET_PS.u32All);
+ END_BATCH();
- COMMIT_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ R600_OUT_BATCH_REGVAL(SQ_LOOP_CONST_0, 0x01000FFF);
+ END_BATCH();
+
+ COMMIT_BATCH();
}
static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom)
{
- context_t *context = R700_CONTEXT(ctx);
- R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
- struct radeon_bo * pbo;
- BATCH_LOCALS(&context->radeon);
- radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
+ struct radeon_bo * pbo;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
- pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context));
+ pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context));
- if (!pbo)
- return;
+ if (!pbo)
+ return;
- r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+ r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
- BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
- R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1);
- R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All);
- R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All,
- pbo,
- r700->vs.SQ_PGM_START_VS.u32All,
- RADEON_GEM_DOMAIN_GTT, 0, 0);
- END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(SQ_PGM_START_VS, 1);
+ R600_OUT_BATCH(r700->vs.SQ_PGM_START_VS.u32All);
+ R600_OUT_BATCH_RELOC(r700->vs.SQ_PGM_START_VS.u32All,
+ pbo,
+ r700->vs.SQ_PGM_START_VS.u32All,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
- BEGIN_BATCH_NO_AUTOSTATE(6);
- R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All);
- R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All);
- END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_RESOURCES_VS, r700->vs.SQ_PGM_RESOURCES_VS.u32All);
+ R600_OUT_BATCH_REGVAL(SQ_PGM_CF_OFFSET_VS, r700->vs.SQ_PGM_CF_OFFSET_VS.u32All);
+ END_BATCH();
- COMMIT_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + 32*4), 0x0100000F);
+ //R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + (SQ_LOOP_CONST_vs<2)), 0x0100000F);
+ END_BATCH();
+
+ COMMIT_BATCH();
}
static void r700SendFSState(GLcontext *ctx, struct radeon_state_atom *atom)
@@ -1305,8 +1314,8 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState);
ALLOC_STATE(vpt, always, 16, r700SendViewportState);
ALLOC_STATE(fs, always, 18, r700SendFSState);
- ALLOC_STATE(vs, always, 18, r700SendVSState);
- ALLOC_STATE(ps, always, 21, r700SendPSState);
+ ALLOC_STATE(vs, always, 21, r700SendVSState);
+ ALLOC_STATE(ps, always, 24, r700SendPSState);
ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts);
ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts);
ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState);
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index ccafd433bf..e9ef6c8695 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -44,12 +44,18 @@
//TODO : Validate FP input with VP output.
void Map_Fragment_Program(r700_AssemblerBase *pAsm,
- struct gl_fragment_program *mesa_fp)
+ struct gl_fragment_program *mesa_fp,
+ GLcontext *ctx)
{
unsigned int unBit;
unsigned int i;
GLuint ui;
+ /* match fp inputs with vp exports. */
+ struct r700_vertex_program_cont *vpc =
+ (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
+ GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
+
pAsm->number_used_registers = 0;
//Input mapping : mesa_fp->Base.InputsRead set the flag, set in
@@ -61,33 +67,89 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm,
pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++;
}
- unBit = 1 << FRAG_ATTRIB_COL0;
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << VERT_RESULT_COL0;
+ if(OutputsWritten & unBit)
{
pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
}
- unBit = 1 << FRAG_ATTRIB_COL1;
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << VERT_RESULT_COL1;
+ if(OutputsWritten & unBit)
{
pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
}
- unBit = 1 << FRAG_ATTRIB_FOGC;
- if(mesa_fp->Base.InputsRead & unBit)
- {
- pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
- }
+ unBit = 1 << VERT_RESULT_FOGC;
+ if(OutputsWritten & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
+ }
for(i=0; i<8; i++)
{
- unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << (VERT_RESULT_TEX0 + i);
+ if(OutputsWritten & unBit)
{
pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
}
}
+/* order has been taken care of */
+#if 1
+ for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(OutputsWritten & unBit)
+ {
+ pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0] = pAsm->number_used_registers++;
+ }
+ }
+#else
+ if( (mesa_fp->Base.InputsRead >> FRAG_ATTRIB_VAR0) > 0 )
+ {
+ struct r700_vertex_program_cont *vpc =
+ (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
+ struct gl_program_parameter_list * VsVarying = vpc->mesa_program.Base.Varying;
+ struct gl_program_parameter_list * PsVarying = mesa_fp->Base.Varying;
+ struct gl_program_parameter * pVsParam;
+ struct gl_program_parameter * pPsParam;
+ GLuint j, k;
+ GLuint unMaxVarying = 0;
+
+ for(i=0; i<VsVarying->NumParameters; i++)
+ {
+ pAsm->uiFP_AttributeMap[i + FRAG_ATTRIB_VAR0] = 0;
+ }
+
+ for(i=FRAG_ATTRIB_VAR0; i<FRAG_ATTRIB_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ j = i - FRAG_ATTRIB_VAR0;
+ pPsParam = PsVarying->Parameters + j;
+
+ for(k=0; k<VsVarying->NumParameters; k++)
+ {
+ pVsParam = VsVarying->Parameters + k;
+
+ if( strcmp(pPsParam->Name, pVsParam->Name) == 0)
+ {
+ pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers + k;
+ if(k > unMaxVarying)
+ {
+ unMaxVarying = k;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ pAsm->number_used_registers += unMaxVarying + 1;
+ }
+#endif
+
/* Map temporary registers (GPRs) */
pAsm->starting_temp_register_number = pAsm->number_used_registers;
@@ -127,6 +189,8 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm,
pAsm->pucOutMask[ui] = 0x0;
}
+ pAsm->flag_reg_index = pAsm->number_used_registers++;
+
pAsm->uFirstHelpReg = pAsm->number_used_registers;
}
@@ -233,7 +297,8 @@ GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
}
GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
- struct gl_fragment_program *mesa_fp)
+ struct gl_fragment_program *mesa_fp,
+ GLcontext *ctx)
{
GLuint number_of_colors_exported;
GLboolean z_enabled = GL_FALSE;
@@ -241,14 +306,17 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
//Init_Program
Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
- Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp);
+ Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx);
if( GL_FALSE == Find_Instruction_Dependencies_fp(fp, mesa_fp) )
{
return GL_FALSE;
}
+
+ InitShaderProgram(&(fp->r700AsmCode));
- if( GL_FALSE == AssembleInstr(mesa_fp->Base.NumInstructions,
+ if( GL_FALSE == AssembleInstr(0,
+ mesa_fp->Base.NumInstructions,
&(mesa_fp->Base.Instructions[0]),
&(fp->r700AsmCode)) )
{
@@ -260,6 +328,11 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
return GL_FALSE;
}
+ if( GL_FALSE == RelocProgram(&(fp->r700AsmCode)) )
+ {
+ return GL_FALSE;
+ }
+
fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0
: (fp->r700AsmCode.number_used_registers - 1);
@@ -300,7 +373,7 @@ void r700SelectFragmentShader(GLcontext *ctx)
}
if (GL_FALSE == fp->translated)
- r700TranslateFragmentShader(fp, &(fp->mesa_program));
+ r700TranslateFragmentShader(fp, &(fp->mesa_program), ctx);
}
void * r700GetActiveFpShaderBo(GLcontext * ctx)
@@ -394,6 +467,9 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
EXPORT_MODE_shift, EXPORT_MODE_mask);
// emit ps input map
+ struct r700_vertex_program_cont *vpc =
+ (struct r700_vertex_program_cont *)ctx->VertexProgram._Current;
+ GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
unBit = 1 << FRAG_ATTRIB_WPOS;
if(mesa_fp->Base.InputsRead & unBit)
{
@@ -407,8 +483,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
}
- unBit = 1 << FRAG_ATTRIB_COL0;
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << VERT_RESULT_COL0;
+ if(OutputsWritten & unBit)
{
ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0];
SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
@@ -420,8 +496,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
}
- unBit = 1 << FRAG_ATTRIB_COL1;
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << VERT_RESULT_COL1;
+ if(OutputsWritten & unBit)
{
ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1];
SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
@@ -433,8 +509,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
}
- unBit = 1 << FRAG_ATTRIB_FOGC;
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << VERT_RESULT_FOGC;
+ if(OutputsWritten & unBit)
{
ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC];
SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
@@ -448,8 +524,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
for(i=0; i<8; i++)
{
- unBit = 1 << (FRAG_ATTRIB_TEX0 + i);
- if(mesa_fp->Base.InputsRead & unBit)
+ unBit = 1 << (VERT_RESULT_TEX0 + i);
+ if(OutputsWritten & unBit)
{
ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
@@ -459,6 +535,22 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
}
}
+ for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(OutputsWritten & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0];
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+ }
+
exportCount = (r700->ps.SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
if (r700->CB_SHADER_CONTROL.u32All != ((1 << exportCount) - 1))
{
@@ -469,7 +561,8 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
/* sent out shader constants. */
paramList = fp->mesa_program.Base.Parameters;
- if(NULL != paramList) {
+ if(NULL != paramList)
+ {
_mesa_load_state_parameters(ctx, paramList);
if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
@@ -482,10 +575,10 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
unNumParamData = paramList->NumParameters;
for(ui=0; ui<unNumParamData; ui++) {
- r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
- r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
- r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
- r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ r700->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
+ r700->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
+ r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
+ r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
}
} else
r700->ps.num_consts = 0;
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h
index cbb108d212..e562bfa478 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.h
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.h
@@ -49,12 +49,14 @@ struct r700_fragment_program
/* Internal */
void Map_Fragment_Program(r700_AssemblerBase *pAsm,
- struct gl_fragment_program *mesa_fp);
+ struct gl_fragment_program *mesa_fp,
+ GLcontext *ctx);
GLboolean Find_Instruction_Dependencies_fp(struct r700_fragment_program *fp,
struct gl_fragment_program *mesa_fp);
GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
- struct gl_fragment_program *mesa_vp);
+ struct gl_fragment_program *mesa_vp,
+ GLcontext *ctx);
/* Interface */
extern void r700SelectFragmentShader(GLcontext *ctx);
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 47f89c91f8..eab27cbd84 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -526,6 +526,9 @@ static void r700ConvertAttrib(GLcontext *ctx, int count,
radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset,
sizeof(GLfloat) * input->Size * count, 32);
+
+ radeon_bo_map(attr->bo, 1);
+
dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
assert(src_ptr != NULL);
@@ -559,6 +562,8 @@ static void r700ConvertAttrib(GLcontext *ctx, int count,
break;
}
+ radeon_bo_unmap(attr->bo);
+
if (mapped_named_bo)
{
ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
@@ -577,6 +582,8 @@ static void r700AlignDataToDword(GLcontext *ctx,
radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32);
+ radeon_bo_map(attr->bo, 1);
+
if (!input->BufferObj->Pointer)
{
ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
@@ -596,6 +603,7 @@ static void r700AlignDataToDword(GLcontext *ctx,
}
}
+ radeon_bo_unmap(attr->bo);
if (mapped_named_bo)
{
ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
@@ -664,14 +672,18 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input
radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo,
&context->stream_desc[index].bo_offset, size, 32);
+
+ radeon_bo_map(context->stream_desc[index].bo, 1);
assert(context->stream_desc[index].bo->ptr != NULL);
+
+
dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr,
context->stream_desc[index].bo_offset);
switch (context->stream_desc[index].dwords)
{
case 1:
- radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
break;
case 2:
radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count);
@@ -686,6 +698,7 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input
assert(0);
break;
}
+ radeon_bo_unmap(context->stream_desc[index].bo);
}
}
@@ -757,6 +770,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
&context->ind_buf.bo_offset, size, 4);
+ radeon_bo_map(context->ind_buf.bo, 1);
assert(context->ind_buf.bo->ptr != NULL);
out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
@@ -770,6 +784,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
*out++ = in[i];
}
+ radeon_bo_unmap(context->ind_buf.bo);
#if MESA_BIG_ENDIAN
}
else
@@ -780,6 +795,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
&context->ind_buf.bo_offset, size, 4);
+ radeon_bo_map(context->ind_buf.bo, 1);
assert(context->ind_buf.bo->ptr != NULL);
out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
@@ -792,6 +808,7 @@ static void r700FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
{
*out++ = in[i];
}
+ radeon_bo_unmap(context->ind_buf.bo);
#endif
}
@@ -837,11 +854,13 @@ static void r700SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer
radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
&context->ind_buf.bo_offset, size, 4);
+ radeon_bo_map(context->ind_buf.bo, 1);
assert(context->ind_buf.bo->ptr != NULL);
dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
_mesa_memcpy(dst_ptr, src_ptr, size);
+ radeon_bo_unmap(context->ind_buf.bo);
context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
context->ind_buf.count = mesa_ind_buf->count;
diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c
index 955ea4e4e1..2eed1acc2f 100644
--- a/src/mesa/drivers/dri/r600/r700_shader.c
+++ b/src/mesa/drivers/dri/r600/r700_shader.c
@@ -159,13 +159,18 @@ void Init_R700_Shader(R700_Shader * pShader)
pShader->lstVTXInstructions.uNumOfNode=0;
}
+void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF)
+{
+ pShader->plstCFInstructions_active = plstCF;
+}
+
void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst)
{
R700ControlFlowSXClause* pSXClause;
R700ControlFlowSMXClause* pSMXClause;
- pCFInst->m_uIndex = pShader->lstCFInstructions.uNumOfNode;
- AddInstToList(&(pShader->lstCFInstructions),
+ pCFInst->m_uIndex = pShader->plstCFInstructions_active->uNumOfNode;
+ AddInstToList(pShader->plstCFInstructions_active,
(R700ShaderInstruction*)pCFInst);
pShader->uShaderBinaryDWORDSize += GetInstructionSize(pCFInst->m_ShaderInstType);
diff --git a/src/mesa/drivers/dri/r600/r700_shader.h b/src/mesa/drivers/dri/r600/r700_shader.h
index c6a058617e..0599ffd901 100644
--- a/src/mesa/drivers/dri/r600/r700_shader.h
+++ b/src/mesa/drivers/dri/r600/r700_shader.h
@@ -109,6 +109,7 @@ typedef struct R700_Shader
GLuint uStackSize;
GLuint uMaxCallDepth;
+ TypedShaderList * plstCFInstructions_active;
TypedShaderList lstCFInstructions;
TypedShaderList lstALUInstructions;
TypedShaderList lstTEXInstructions;
@@ -132,13 +133,13 @@ void TakeInstOutFromList(TypedShaderList * plstCFInstructions, R700ShaderInstruc
void ResolveLinks(R700_Shader *pShader);
void Assemble(R700_Shader *pShader);
-
//Interface
void Init_R700_Shader(R700_Shader * pShader);
void AddCFInstruction(R700_Shader *pShader, R700ControlFlowInstruction *pCFInst);
void AddVTXInstruction(R700_Shader *pShader, R700VertexInstruction *pVTXInst);
void AddTEXInstruction(R700_Shader *pShader, R700TextureInstruction *pTEXInst);
void AddALUInstruction(R700_Shader *pShader, R700ALUInstruction *pALUInst);
+void SetActiveCFlist(R700_Shader *pShader, TypedShaderList * plstCF);
void LoadProgram(R700_Shader *pShader);
void UpdateShaderRegisters(R700_Shader *pShader);
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index ffc6068bd8..d3d1da7959 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -111,6 +111,15 @@ unsigned int Map_Vertex_Output(r700_AssemblerBase *pAsm,
}
}
+ for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[i] = unTotal++;
+ }
+ }
+
return (unTotal - unStart);
}
@@ -235,6 +244,8 @@ void Map_Vertex_Program(GLcontext *ctx,
pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
}
+ pAsm->flag_reg_index = pAsm->number_used_registers++;
+
pAsm->uFirstHelpReg = pAsm->number_used_registers;
}
@@ -324,7 +335,10 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
return NULL;
}
- if(GL_FALSE == AssembleInstr(vp->mesa_program->Base.NumInstructions,
+ InitShaderProgram(&(vp->r700AsmCode));
+
+ if(GL_FALSE == AssembleInstr(0,
+ vp->mesa_program->Base.NumInstructions,
&(vp->mesa_program->Base.Instructions[0]),
&(vp->r700AsmCode)) )
{
@@ -336,6 +350,11 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
return NULL;
}
+ if( GL_FALSE == RelocProgram(&(vp->r700AsmCode)) )
+ {
+ return GL_FALSE;
+ }
+
vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0
: (vp->r700AsmCode.number_used_registers - 1);
@@ -612,6 +631,12 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx)
paramList = vp->mesa_program->Base.Parameters;
if(NULL != paramList) {
+ /* vp->mesa_program was cloned, not updated by glsl shader api. */
+ /* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */
+ /* so, use ctx->VertexProgem._Current */
+ struct gl_program_parameter_list *paramListOrginal =
+ paramListOrginal = ctx->VertexProgram._Current->Base.Parameters;
+
_mesa_load_state_parameters(ctx, paramList);
if (paramList->NumParameters > R700_MAX_DX9_CONSTS)
@@ -624,10 +649,20 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx)
unNumParamData = paramList->NumParameters;
for(ui=0; ui<unNumParamData; ui++) {
- r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
- r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
- r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
- r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM)
+ {
+ r700->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
+ r700->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
+ r700->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
+ r700->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+ }
+ else
+ {
+ r700->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
+ r700->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
+ r700->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
+ r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ }
}
} else
r700->vs.num_consts = 0;