summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r600/r700_assembler.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c')
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c2780
1 files changed, 2484 insertions, 296 deletions
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 903b6968be..0ff16b4ddd 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -32,12 +32,49 @@
#include "main/mtypes.h"
#include "main/imports.h"
+#include "shader/prog_parameter.h"
#include "radeon_debug.h"
#include "r600_context.h"
#include "r700_assembler.h"
+#define USE_CF_FOR_CONTINUE_BREAK 1
+#define USE_CF_FOR_POP_AFTER 1
+
+struct prog_instruction noise1_insts[12] = {
+ {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
+ {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
+ {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
+};
+float noise1_const[2][4] = {
+ {0.300000f, 0.900000f, 0.500000f, 0.300000f}
+};
+
+COMPILED_SUB noise1_presub = {
+ &(noise1_insts[0]),
+ 12,
+ 2,
+ 1,
+ 0,
+ &(noise1_const[0]),
+ SWIZZLE_X,
+ SWIZZLE_X,
+ SWIZZLE_X,
+ SWIZZLE_X,
+ {0,0,0},
+ 0
+};
+
BITS addrmode_PVSDST(PVSDST * pPVSDST)
{
return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
@@ -327,21 +364,27 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
return(format);
}
-unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
+unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3)
{
- if(pAsm->D.dst.op3)
+ if(nIsOp3 > 0)
{
return 3;
}
- switch (pAsm->D.dst.opcode)
+ switch (opcode)
{
- case SQ_OP2_INST_ADD:
+ case SQ_OP2_INST_ADD:
+ case SQ_OP2_INST_KILLE:
+ case SQ_OP2_INST_KILLGT:
+ case SQ_OP2_INST_KILLGE:
+ case SQ_OP2_INST_KILLNE:
case SQ_OP2_INST_MUL:
case SQ_OP2_INST_MAX:
case SQ_OP2_INST_MIN:
//case SQ_OP2_INST_MAX_DX10:
//case SQ_OP2_INST_MIN_DX10:
+ case SQ_OP2_INST_SETE:
+ case SQ_OP2_INST_SETNE:
case SQ_OP2_INST_SETGT:
case SQ_OP2_INST_SETGE:
case SQ_OP2_INST_PRED_SETE:
@@ -354,9 +397,10 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
return 2;
case SQ_OP2_INST_MOV:
+ case SQ_OP2_INST_MOVA_FLOOR:
case SQ_OP2_INST_FRACT:
case SQ_OP2_INST_FLOOR:
- case SQ_OP2_INST_KILLGT:
+ case SQ_OP2_INST_TRUNC:
case SQ_OP2_INST_EXP_IEEE:
case SQ_OP2_INST_LOG_CLAMPED:
case SQ_OP2_INST_LOG_IEEE:
@@ -368,7 +412,7 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
return 1;
default: radeon_error(
- "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
+ "Need instruction operand number for %x.\n", opcode);
};
return 3;
@@ -382,103 +426,128 @@ int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700
pAsm->pR700Shader = pShader;
pAsm->currentShaderType = spt;
- pAsm->cf_last_export_ptr = NULL;
+ pAsm->cf_last_export_ptr = NULL;
- pAsm->cf_current_export_clause_ptr = NULL;
- pAsm->cf_current_alu_clause_ptr = NULL;
- pAsm->cf_current_tex_clause_ptr = NULL;
- pAsm->cf_current_vtx_clause_ptr = NULL;
- pAsm->cf_current_cf_clause_ptr = NULL;
+ pAsm->cf_current_export_clause_ptr = NULL;
+ pAsm->cf_current_alu_clause_ptr = NULL;
+ pAsm->cf_current_tex_clause_ptr = NULL;
+ pAsm->cf_current_vtx_clause_ptr = NULL;
+ pAsm->cf_current_cf_clause_ptr = NULL;
- // No clause has been created yet
- pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
+ // No clause has been created yet
+ pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
- pAsm->number_of_colorandz_exports = 0;
- pAsm->number_of_exports = 0;
- pAsm->number_of_export_opcodes = 0;
+ pAsm->number_of_colorandz_exports = 0;
+ pAsm->number_of_exports = 0;
+ pAsm->number_of_export_opcodes = 0;
+ pAsm->alu_x_opcode = 0;
- pAsm->D.bits = 0;
- pAsm->S[0].bits = 0;
- pAsm->S[1].bits = 0;
- pAsm->S[2].bits = 0;
+ pAsm->D2.bits = 0;
- pAsm->uLastPosUpdate = 0;
+ pAsm->D.bits = 0;
+ pAsm->S[0].bits = 0;
+ pAsm->S[1].bits = 0;
+ pAsm->S[2].bits = 0;
+
+ pAsm->uLastPosUpdate = 0;
- *(BITS *) &pAsm->fp_stOutFmt0 = 0;
+ *(BITS *) &pAsm->fp_stOutFmt0 = 0;
- pAsm->uIIns = 0;
- pAsm->uOIns = 0;
- pAsm->number_used_registers = 0;
- pAsm->uUsedConsts = 256;
+ pAsm->uIIns = 0;
+ pAsm->uOIns = 0;
+ pAsm->number_used_registers = 0;
+ pAsm->uUsedConsts = 256;
- // Fragment programs
- pAsm->uBoolConsts = 0;
- pAsm->uIntConsts = 0;
- pAsm->uInsts = 0;
- pAsm->uConsts = 0;
+ // Fragment programs
+ pAsm->uBoolConsts = 0;
+ pAsm->uIntConsts = 0;
+ pAsm->uInsts = 0;
+ pAsm->uConsts = 0;
- pAsm->FCSP = 0;
- pAsm->fc_stack[0].type = FC_NONE;
+ pAsm->FCSP = 0;
+ pAsm->fc_stack[0].type = FC_NONE;
- pAsm->branch_depth = 0;
- pAsm->max_branch_depth = 0;
+ pAsm->aArgSubst[0] =
+ pAsm->aArgSubst[1] =
+ pAsm->aArgSubst[2] =
+ pAsm->aArgSubst[3] = (-1);
- pAsm->aArgSubst[0] =
- pAsm->aArgSubst[1] =
- pAsm->aArgSubst[2] =
- pAsm->aArgSubst[3] = (-1);
+ pAsm->uOutputs = 0;
- pAsm->uOutputs = 0;
+ for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
+ {
+ pAsm->color_export_register_number[i] = (-1);
+ }
- for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
- {
- pAsm->color_export_register_number[i] = (-1);
- }
+ pAsm->depth_export_register_number = (-1);
+ pAsm->stencil_export_register_number = (-1);
+ pAsm->coverage_to_mask_export_register_number = (-1);
+ pAsm->mask_export_register_number = (-1);
- pAsm->depth_export_register_number = (-1);
- pAsm->stencil_export_register_number = (-1);
- pAsm->coverage_to_mask_export_register_number = (-1);
- pAsm->mask_export_register_number = (-1);
+ pAsm->starting_export_register_number = 0;
+ pAsm->starting_vfetch_register_number = 0;
+ pAsm->starting_temp_register_number = 0;
+ pAsm->uFirstHelpReg = 0;
- pAsm->starting_export_register_number = 0;
- pAsm->starting_vfetch_register_number = 0;
- pAsm->starting_temp_register_number = 0;
- pAsm->uFirstHelpReg = 0;
+ pAsm->input_position_is_used = GL_FALSE;
+ pAsm->input_normal_is_used = GL_FALSE;
+ for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
+ {
+ pAsm->input_color_is_used[ i ] = GL_FALSE;
+ }
- pAsm->input_position_is_used = GL_FALSE;
- pAsm->input_normal_is_used = GL_FALSE;
+ for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
+ {
+ pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
+ }
+ for (i=0; i<VERT_ATTRIB_MAX; i++)
+ {
+ pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
+ }
- for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
- {
- pAsm->input_color_is_used[ i ] = GL_FALSE;
- }
+ pAsm->number_of_inputs = 0;
- for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
- {
- pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
- }
+ pAsm->is_tex = GL_FALSE;
+ pAsm->need_tex_barrier = GL_FALSE;
- for (i=0; i<VERT_ATTRIB_MAX; i++)
- {
- pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
- }
+ pAsm->subs = NULL;
+ pAsm->unSubArraySize = 0;
+ pAsm->unSubArrayPointer = 0;
+ pAsm->callers = NULL;
+ pAsm->unCallerArraySize = 0;
+ pAsm->unCallerArrayPointer = 0;
+
+ pAsm->CALLSP = 0;
+ pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
+ pAsm->CALLSTACK[0].plstCFInstructions_local
+ = &(pAsm->pR700Shader->lstCFInstructions);
+
+ pAsm->CALLSTACK[0].max = 0;
+ pAsm->CALLSTACK[0].current = 0;
+
+ SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
- pAsm->number_of_inputs = 0;
+ pAsm->unCFflags = 0;
- pAsm->is_tex = GL_FALSE;
- pAsm->need_tex_barrier = GL_FALSE;
+ pAsm->presubs = NULL;
+ pAsm->unPresubArraySize = 0;
+ pAsm->unNumPresub = 0;
+ pAsm->unCurNumILInsts = 0;
- return 0;
+ pAsm->unVetTexBits = 0;
+
+ return 0;
}
GLboolean IsTex(gl_inst_opcode Opcode)
{
- if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
+ if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
+ (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) )
{
return GL_TRUE;
}
@@ -591,6 +660,31 @@ int check_current_clause(r700_AssemblerBase* pAsm,
return GL_TRUE;
}
+GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
+{
+ if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr =
+ (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
+
+ if (pAsm->cf_current_cf_clause_ptr != NULL)
+ {
+ Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
+ AddCFInstruction( pAsm->pR700Shader,
+ (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
+ }
+ else
+ {
+ radeon_error("Could not allocate a new VFetch CF instruction.\n");
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
R700VertexInstruction* vertex_instruction_ptr)
{
@@ -797,6 +891,7 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
GLubyte element,
GLuint _signed,
GLboolean normalize,
+ GLenum format,
VTX_FETCH_METHOD * pFetchMethod)
{
GLuint client_size_inbyte;
@@ -845,10 +940,21 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
- vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
- vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
- vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
- vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+ if(format == GL_BGRA)
+ {
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+ }
+ else
+ {
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
+ vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+
+ }
vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
@@ -986,7 +1092,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm)
checkop_init(pAsm);
- if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
@@ -997,7 +1104,8 @@ GLboolean checkop2(r700_AssemblerBase* pAsm)
{
bSrcConst[0] = GL_FALSE;
}
- if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
@@ -1030,7 +1138,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm)
checkop_init(pAsm);
- if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
@@ -1041,7 +1150,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm)
{
bSrcConst[0] = GL_FALSE;
}
- if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
@@ -1052,7 +1162,8 @@ GLboolean checkop3(r700_AssemblerBase* pAsm)
{
bSrcConst[1] = GL_FALSE;
}
- if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
+ if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) ||
+ (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
(pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
(pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
(pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
@@ -1152,6 +1263,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm,
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
case PROGRAM_STATE_VAR:
+ case PROGRAM_UNIFORM:
if (1 == pILInst->SrcReg[src].RelAddr)
{
setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
@@ -1162,10 +1274,18 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm,
}
pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
- pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
+ if(pILInst->SrcReg[src].Index < 0)
+ {
+ WARN_ONCE("Negative register offsets not supported yet!\n");
+ pAsm->S[fld].src.reg = 0;
+ }
+ else
+ {
+ pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
+ }
break;
case PROGRAM_INPUT:
- setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
+ setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
pAsm->S[fld].src.rtype = SRC_REG_INPUT;
switch (pAsm->currentShaderType)
{
@@ -1178,7 +1298,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm,
}
break;
default:
- radeon_error("Invalid source argument type\n");
+ radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
return GL_FALSE;
}
}
@@ -1234,6 +1354,15 @@ GLboolean assemble_dst(r700_AssemblerBase *pAsm)
pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
+ if(pILInst->SaturateMode == SATURATE_ZERO_ONE)
+ {
+ pAsm->D2.dst2.SaturateMode = 1;
+ }
+ else
+ {
+ pAsm->D2.dst2.SaturateMode = 0;
+ }
+
return GL_TRUE;
}
@@ -1293,6 +1422,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
else
{
switch (pILInst->SrcReg[0].File) {
+ case PROGRAM_UNIFORM:
case PROGRAM_CONSTANT:
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
@@ -1305,25 +1435,65 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
break;
case PROGRAM_INPUT:
- switch (pILInst->SrcReg[0].Index)
+ if(SPT_VP == pAsm->currentShaderType)
{
- case FRAG_ATTRIB_COL0:
- case FRAG_ATTRIB_COL1:
- case FRAG_ATTRIB_TEX0:
- case FRAG_ATTRIB_TEX1:
- case FRAG_ATTRIB_TEX2:
- case FRAG_ATTRIB_TEX3:
- case FRAG_ATTRIB_TEX4:
- case FRAG_ATTRIB_TEX5:
- case FRAG_ATTRIB_TEX6:
- case FRAG_ATTRIB_TEX7:
- bValidTexCoord = GL_TRUE;
+ switch (pILInst->SrcReg[0].Index)
+ {
+ case VERT_ATTRIB_TEX0:
+ case VERT_ATTRIB_TEX1:
+ case VERT_ATTRIB_TEX2:
+ case VERT_ATTRIB_TEX3:
+ case VERT_ATTRIB_TEX4:
+ case VERT_ATTRIB_TEX5:
+ case VERT_ATTRIB_TEX6:
+ case VERT_ATTRIB_TEX7:
+ bValidTexCoord = GL_TRUE;
+ pAsm->S[0].src.reg =
+ pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
+ pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ break;
+ }
+ }
+ else
+ {
+ switch (pILInst->SrcReg[0].Index)
+ {
+ case FRAG_ATTRIB_WPOS:
+ case FRAG_ATTRIB_COL0:
+ case FRAG_ATTRIB_COL1:
+ case FRAG_ATTRIB_FOGC:
+ case FRAG_ATTRIB_TEX0:
+ case FRAG_ATTRIB_TEX1:
+ case FRAG_ATTRIB_TEX2:
+ case FRAG_ATTRIB_TEX3:
+ case FRAG_ATTRIB_TEX4:
+ case FRAG_ATTRIB_TEX5:
+ case FRAG_ATTRIB_TEX6:
+ case FRAG_ATTRIB_TEX7:
+ bValidTexCoord = GL_TRUE;
+ pAsm->S[0].src.reg =
+ pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
+ pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ break;
+ case FRAG_ATTRIB_FACE:
+ fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
+ break;
+ case FRAG_ATTRIB_PNTC:
+ fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
+ break;
+ }
+
+ if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
+ (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
+ {
+ bValidTexCoord = GL_TRUE;
pAsm->S[0].src.reg =
pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
pAsm->S[0].src.rtype = SRC_REG_INPUT;
- break;
+ }
}
- break;
+
+ break;
}
}
@@ -1368,8 +1538,17 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize
tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+ tex_instruction_ptr->m_Word0.f.alt_const = 0;
- tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
+ if(SPT_VP == pAsm->currentShaderType)
+ {
+ tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX;
+ pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
+ }
+ else
+ {
+ tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
+ }
tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
if (normalized) {
@@ -1388,7 +1567,6 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize
tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
-
tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
// dst
@@ -1505,6 +1683,10 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
{
src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
}
+ else if (pSource->rtype == SRC_REC_LITERAL)
+ {
+ src_sel = SQ_ALU_SRC_LITERAL;
+ }
else
{
radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
@@ -1594,7 +1776,8 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
return GL_FALSE;
}
- if ( pAsm->cf_current_alu_clause_ptr == NULL ||
+ if ( pAsm->alu_x_opcode != 0 ||
+ pAsm->cf_current_alu_clause_ptr == NULL ||
( (pAsm->cf_current_alu_clause_ptr != NULL) &&
(pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
) )
@@ -1624,9 +1807,17 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
- //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1;
pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
- pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
+
+ if(pAsm->alu_x_opcode != 0)
+ {
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
+ pAsm->alu_x_opcode = 0;
+ }
+ else
+ {
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
+ }
pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
@@ -1634,7 +1825,7 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
}
else
{
- pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
}
// If this clause constains any instruction that is forward dependent on a TEX instruction,
@@ -1911,7 +2102,7 @@ GLboolean check_scalar(r700_AssemblerBase* pAsm,
GLuint swizzle_key;
- GLuint number_of_operands = r700GetNumOperands(pAsm);
+ GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
for (src=0; src<number_of_operands; src++)
{
@@ -2000,7 +2191,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
GLuint swizzle_key;
- GLuint number_of_operands = r700GetNumOperands(pAsm);
+ GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
for (src=0; src<number_of_operands; src++)
{
@@ -2033,7 +2224,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
if( is_gpr(sel) )
{
if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
- {
+ {
return GL_FALSE;
}
@@ -2045,7 +2236,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
else
{
if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
- {
+ {
return GL_FALSE;
}
}
@@ -2057,7 +2248,7 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
if( is_cfile(sel) )
{
if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
- {
+ {
return GL_FALSE;
}
}
@@ -2069,6 +2260,10 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
{
+ R700ALUInstruction * alu_instruction_ptr;
+ R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
+ R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
+
GLuint number_of_scalar_operations;
GLboolean is_single_scalar_operation;
GLuint scalar_channel_index;
@@ -2077,7 +2272,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
int current_source_index;
GLuint contiguous_slots_needed;
- GLuint uNumSrc = r700GetNumOperands(pAsm);
+ GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
//GLuint channel_swizzle, j;
//GLuint chan_counter[4] = {0, 0, 0, 0};
//PVSSRC * pSource[3];
@@ -2134,23 +2329,44 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
contiguous_slots_needed = 0;
- if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
+ if(!is_single_scalar_operation)
{
contiguous_slots_needed = 4;
}
+ contiguous_slots_needed += pAsm->D2.dst2.literal_slots;
+
initialize(pAsm);
for (scalar_channel_index=0;
scalar_channel_index < number_of_scalar_operations;
scalar_channel_index++)
{
- R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
- if (alu_instruction_ptr == NULL)
- {
- return GL_FALSE;
- }
- Init_R700ALUInstruction(alu_instruction_ptr);
+ if(scalar_channel_index == (number_of_scalar_operations-1))
+ {
+ switch(pAsm->D2.dst2.literal_slots)
+ {
+ case 0:
+ alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+ Init_R700ALUInstruction(alu_instruction_ptr);
+ break;
+ case 1:
+ alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
+ Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f);
+ alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
+ break;
+ case 2:
+ alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
+ Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f);
+ alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
+ break;
+ };
+ }
+ else
+ {
+ alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+ Init_R700ALUInstruction(alu_instruction_ptr);
+ }
//src 0
current_source_index = 0;
@@ -2160,7 +2376,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
current_source_index,
pcurrent_source,
scalar_channel_index) )
- {
+ {
return GL_FALSE;
}
@@ -2174,13 +2390,13 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
current_source_index,
pcurrent_source,
scalar_channel_index) )
- {
+ {
return GL_FALSE;
}
}
//other bits
- alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
+ alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode;
if( (is_single_scalar_operation == GL_TRUE)
|| (GL_TRUE == bSplitInst) )
@@ -2192,9 +2408,17 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
}
- alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
+ alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
+ if(1 == pAsm->D.dst.predicated)
+ {
+ alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
+ alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
+ }
+ else
+ {
+ alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
+ }
// dst
if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
@@ -2203,7 +2427,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
}
else
- {
+ {
radeon_error("Only temp destination registers supported for ALU dest regs.\n");
return GL_FALSE;
}
@@ -2233,7 +2457,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
- alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
+ alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
if (pAsm->D.dst.op3)
{
@@ -2260,8 +2484,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
{
alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
- alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs;
+ alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs;
//alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
//alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
@@ -2289,8 +2513,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
{
alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
- alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
- alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
+ alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs;
+ alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs;
//alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
//alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
@@ -2317,7 +2541,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
}
if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
- {
+ {
return GL_FALSE;
}
@@ -2328,19 +2552,19 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
if (is_single_scalar_operation)
{
if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
- {
+ {
return GL_FALSE;
}
}
else
{
if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
- {
- return 1;
+ {
+ return GL_FALSE;
}
}
- contiguous_slots_needed = 0;
+ contiguous_slots_needed -= 1;
}
return GL_TRUE;
@@ -2391,11 +2615,14 @@ GLboolean next_ins(r700_AssemblerBase *pAsm)
//reset for next inst.
pAsm->D.bits = 0;
+ pAsm->D2.bits = 0;
pAsm->S[0].bits = 0;
pAsm->S[1].bits = 0;
pAsm->S[2].bits = 0;
pAsm->is_tex = GL_FALSE;
pAsm->need_tex_barrier = GL_FALSE;
+ pAsm->D2.bits = 0;
+ pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0;
return GL_TRUE;
}
@@ -2514,6 +2741,35 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
+GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
+{ /* TODO: ar values dont' persist between clauses */
+ if( GL_FALSE == checkop1(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = 0;
+ pAsm->D.dst.writex = 0;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
GLboolean assemble_BAD(char *opcode_str)
{
radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
@@ -2599,9 +2855,44 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
-GLboolean assemble_COS(r700_AssemblerBase *pAsm)
+GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
{
- return assemble_math_function(pAsm, SQ_OP2_INST_COS);
+ int tmp;
+ checkop1(pAsm);
+
+ tmp = gethelpr(pAsm);
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ assemble_src(pAsm, 0, -1);
+
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->C[0].f = 1/(3.1415926535 * 2);
+ pAsm->C[1].f = 0.0F;
+ next_ins(pAsm);
+
+ pAsm->D.dst.opcode = opcode;
+ pAsm->D.dst.math = 1;
+
+ assemble_dst(pAsm);
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ next_ins(pAsm);
+
+ //TODO - replicate if more channels set in WriteMask
+ return GL_TRUE;
+
}
GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
@@ -2635,7 +2926,7 @@ GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
}
else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
{
- onecomp_PVSSRC(&(pAsm->S[1].src), 3);
+ onecomp_PVSSRC(&(pAsm->S[0].src), 3);
}
if ( GL_FALSE == next_ins(pAsm) )
@@ -2688,6 +2979,133 @@ GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
{
return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
}
+
+GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
+{
+ BITS tmp;
+
+ checkop1(pAsm);
+
+ tmp = gethelpr(pAsm);
+
+ // FLOOR tmp.x, a.x
+ // EX2 dst.x tmp.x
+
+ if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
+ pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ pAsm->D.dst.math = 1;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ // FRACT dst.y a.x
+
+ if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ // EX2 dst.z, a.x
+
+ if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ pAsm->D.dst.math = 1;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ // MOV dst.w 1.0
+
+ if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ }
+
+ return GL_TRUE;
+}
GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
{
@@ -2742,17 +3160,19 @@ GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
-GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
-{
- checkop1(pAsm);
+GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
+{
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
- pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;
-
- if ( GL_FALSE == assemble_dst(pAsm) )
- {
- return GL_FALSE;
- }
+ if(pILInst->Opcode == OPCODE_KIL)
+ checkop1(pAsm);
+
+ pAsm->D.dst.opcode = opcode;
+ //pAsm->D.dst.math = 1;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = 0;
pAsm->D.dst.writex = 0;
pAsm->D.dst.writey = 0;
pAsm->D.dst.writez = 0;
@@ -2761,30 +3181,34 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
pAsm->S[0].src.reg = 0;
-
setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
noneg_PVSSRC(&(pAsm->S[0].src));
- pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
-
- if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File)
+ if(pILInst->Opcode == OPCODE_KIL_NV)
{
- pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[1].src.reg = 0;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
+ neg_PVSSRC(&(pAsm->S[1].src));
}
else
- { //PROGRAM_OUTPUT
- pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index];
+ {
+ if( GL_FALSE == assemble_src(pAsm, 0, 1) )
+ {
+ return GL_FALSE;
+ }
+
}
-
- setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
- noswizzle_PVSSRC(&(pAsm->S[1].src));
-
+
if ( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
+ /* Doc says KILL has to be last(end) ALU clause */
pAsm->pR700Shader->killIsUsed = GL_TRUE;
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
return GL_TRUE;
}
@@ -2848,6 +3272,7 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
{
return GL_FALSE;
}
+
if( GL_FALSE == assemble_src(pAsm, 2, -1) )
{
return GL_FALSE;
@@ -2878,6 +3303,217 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
+GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
+{
+ BITS tmp1, tmp2, tmp3;
+
+ checkop1(pAsm);
+
+ tmp1 = gethelpr(pAsm);
+ tmp2 = gethelpr(pAsm);
+ tmp3 = gethelpr(pAsm);
+
+ // FIXME: The hardware can do fabs() directly on input
+ // elements, but the compiler doesn't have the
+ // capability to use that.
+
+ // MAX tmp1.x, a.x, -a.x (fabs(a.x))
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp1;
+ pAsm->D.dst.writex = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->S[1].bits = pAsm->S[0].bits;
+ flipneg_PVSSRC(&(pAsm->S[1].src));
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // Entire algo:
+ //
+ // LG2 tmp2.x, tmp1.x
+ // FLOOR tmp3.x, tmp2.x
+ // MOV dst.x, tmp3.x
+ // ADD tmp3.x, tmp2.x, -tmp3.x
+ // EX2 dst.y, tmp3.x
+ // MOV dst.z, tmp2.x
+ // MOV dst.w, 1.0
+
+ // LG2 tmp2.x, tmp1.x
+ // FLOOR tmp3.x, tmp2.x
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
+ pAsm->D.dst.math = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp2;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp1;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp3;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp2;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // MOV dst.x, tmp3.x
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp3;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // ADD tmp3.x, tmp2.x, -tmp3.x
+ // EX2 dst.y, tmp3.x
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp3;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp2;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[1].src.reg = tmp3;
+
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+ neg_PVSSRC(&(pAsm->S[1].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ pAsm->D.dst.math = 1;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp3;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // MOV dst.z, tmp2.x
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp2;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ // MOV dst.w 1.0
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp1;
+
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
{
int tmp, ii;
@@ -3035,6 +3671,7 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
pAsm->S[0].src.rtype = srcType;
pAsm->S[0].src.reg = srcReg;
setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
pAsm->S[1].src.reg = tmp;
setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
@@ -3371,77 +4008,137 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
}
-GLboolean assemble_SIN(r700_AssemblerBase *pAsm)
-{
- return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
-}
-
GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
{
BITS tmp;
- checkop1(pAsm);
+ checkop1(pAsm);
- tmp = gethelpr(pAsm);
+ tmp = gethelpr(pAsm);
+ /* tmp.x = src /2*PI */
+ pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
- // COS tmp.x, a.x
- pAsm->D.dst.opcode = SQ_OP2_INST_COS;
- pAsm->D.dst.math = 1;
+ assemble_src(pAsm, 0, -1);
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->C[0].f = 1/(3.1415926535 * 2);
+ pAsm->C[1].f = 0.0F;
- if( GL_FALSE == assemble_src(pAsm, 0, -1) )
- {
- return GL_FALSE;
- }
+ next_ins(pAsm);
- if ( GL_FALSE == next_ins(pAsm) )
- {
- return GL_FALSE;
- }
+ // COS dst.x, a.x
+ pAsm->D.dst.opcode = SQ_OP2_INST_COS;
+ pAsm->D.dst.math = 1;
- // SIN tmp.y, a.x
- pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
- pAsm->D.dst.math = 1;
+ assemble_dst(pAsm);
+ /* mask y */
+ pAsm->D.dst.writey = 0;
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writey = 1;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
- if( GL_FALSE == assemble_src(pAsm, 0, -1) )
- {
- return GL_FALSE;
- }
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
- if( GL_FALSE == next_ins(pAsm) )
- {
- return GL_FALSE;
- }
+ // SIN dst.y, a.x
+ pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
+ pAsm->D.dst.math = 1;
- // MOV dst.mask, tmp
- pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+ assemble_dst(pAsm);
+ /* mask x */
+ pAsm->D.dst.writex = 0;
- if( GL_FALSE == assemble_dst(pAsm) )
- {
- return GL_FALSE;
- }
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+ noneg_PVSSRC(&(pAsm->S[0].src));
- setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
- pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
- pAsm->S[0].src.reg = tmp;
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
- noswizzle_PVSSRC(&(pAsm->S[0].src));
- pAsm->S[0].src.swizzlez = SQ_SEL_0;
- pAsm->S[0].src.swizzlew = SQ_SEL_0;
+ return GL_TRUE;
+}
- if ( GL_FALSE == next_ins(pAsm) )
- {
- return GL_FALSE;
- }
+GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
+{
+ if( GL_FALSE == checkop2(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = opcode;
+ //pAsm->D.dst.math = 1;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == assemble_src(pAsm, 1, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
+{
+ struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
+
+ pAsm->D.dst.opcode = opcode;
+ pAsm->D.dst.math = 1;
+ pAsm->D.dst.predicated = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = pAsm->uHelpReg;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number;
+ pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7;
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[1].src.reg = pAsm->uHelpReg;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[1].src));
+ pAsm->S[1].src.swizzlex = SQ_SEL_0;
+ pAsm->S[1].src.swizzley = SQ_SEL_0;
+ pAsm->S[1].src.swizzlez = SQ_SEL_0;
+ pAsm->S[1].src.swizzlew = SQ_SEL_0;
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
return GL_TRUE;
}
@@ -3524,6 +4221,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
{
+ case PROGRAM_UNIFORM:
case PROGRAM_CONSTANT:
case PROGRAM_LOCAL_PARAM:
case PROGRAM_ENV_PARAM:
@@ -3544,22 +4242,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
need_barrier = GL_TRUE;
}
- switch (pAsm->pILInst[pAsm->uiCurInst].Opcode)
- {
- case OPCODE_TEX:
- break;
- case OPCODE_TXB:
- radeon_error("do not support TXB yet\n");
- return GL_FALSE;
- break;
- case OPCODE_TXP:
- break;
- default:
- radeon_error("Internal error: bad texture op (not TEX)\n");
- return GL_FALSE;
- break;
- }
-
if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
{
GLuint tmp = gethelpr(pAsm);
@@ -3637,24 +4319,6 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
- * have to do explicit instruction
- */
- pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp1;
- pAsm->D.dst.writez = 1;
-
- setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
- pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
- pAsm->S[0].src.reg = tmp1;
- noswizzle_PVSSRC(&(pAsm->S[0].src));
- pAsm->S[1].bits = pAsm->S[0].bits;
- flipneg_PVSSRC(&(pAsm->S[1].src));
-
- next_ins(pAsm);
-
/* tmp1.z = RCP_e(|tmp1.z|) */
pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
pAsm->D.dst.math = 1;
@@ -3667,13 +4331,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
pAsm->S[0].src.reg = tmp1;
pAsm->S[0].src.swizzlex = SQ_SEL_Z;
+ pAsm->S[0].src.abs = 1;
next_ins(pAsm);
/* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
* MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
* muladd has no writemask, have to use another temp
- * also no support for imm constants, so add 1 here
*/
pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
pAsm->D.dst.op3 = 1;
@@ -3690,30 +4354,12 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
pAsm->S[1].src.reg = tmp1;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
- pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
+ /* immediate c 1.5 */
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->C[0].f = 1.5F;
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
pAsm->S[2].src.reg = tmp1;
- setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1);
-
- next_ins(pAsm);
-
- /* ADD the remaining .5 */
- pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
- setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
- pAsm->D.dst.rtype = DST_REG_TEMPORARY;
- pAsm->D.dst.reg = tmp2;
- pAsm->D.dst.writex = 1;
- pAsm->D.dst.writey = 1;
- pAsm->D.dst.writez = 0;
- pAsm->D.dst.writew = 0;
-
- setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
- pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
- pAsm->S[0].src.reg = tmp2;
- noswizzle_PVSSRC(&(pAsm->S[0].src));
- setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
- pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
- pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5
- noswizzle_PVSSRC(&(pAsm->S[1].src));
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X);
next_ins(pAsm);
@@ -3738,14 +4384,35 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
}
- pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+ switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
+ {
+ case OPCODE_DDX:
+ /* will these need WQM(1) on CF inst ? */
+ pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
+ break;
+ case OPCODE_DDY:
+ pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
+ break;
+ case OPCODE_TXB:
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
+ break;
+ default:
+ if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C;
+ else
+ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
+ }
+
+ pAsm->is_tex = GL_TRUE;
+ if ( GL_TRUE == need_barrier )
+
pAsm->is_tex = GL_TRUE;
if ( GL_TRUE == need_barrier )
{
pAsm->need_tex_barrier = GL_TRUE;
}
// Set src1 to tex unit id
- pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
+ pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
//No sw info from mesa compiler, so hard code here.
@@ -3779,11 +4446,46 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
pAsm->S[0].src.swizzlew = SQ_SEL_Y;
}
+ if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
+ {
+ /* compare value goes to w chan ? */
+ pAsm->S[0].src.swizzlew = SQ_SEL_Z;
+ }
+
if ( GL_FALSE == next_ins(pAsm) )
{
return GL_FALSE;
}
+ /* add ARB shadow ambient but clamp to 0..1 */
+ if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
+ {
+ /* ADD_SAT dst, dst, ambient[texunit] */
+ pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ pAsm->D2.dst2.SaturateMode = 1;
+
+ pAsm->S[0].src.rtype = pAsm->D.dst.rtype;
+ pAsm->S[0].src.reg = pAsm->D.dst.reg;
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+ noneg_PVSSRC(&(pAsm->S[0].src));
+
+ pAsm->S[1].src.rtype = SRC_REG_CONSTANT;
+ pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
+ noswizzle_PVSSRC(&(pAsm->S[1].src));
+ noneg_PVSSRC(&(pAsm->S[1].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ }
+
return GL_TRUE;
}
@@ -3902,27 +4604,909 @@ GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
-GLboolean assemble_IF(r700_AssemblerBase *pAsm)
+static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason)
{
+ switch (uReason)
+ {
+ case FC_PUSH_VPM:
+ pAsm->CALLSTACK[pAsm->CALLSP].current--;
+ break;
+ case FC_PUSH_WQM:
+ pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
+ break;
+ case FC_LOOP:
+ pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
+ break;
+ case FC_REP:
+ /* TODO : for 16 vp asic, should -= 2; */
+ pAsm->CALLSTACK[pAsm->CALLSP].current -= 1;
+ break;
+ };
+}
+
+static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly)
+{
+ if(GL_TRUE == bCheckMaxOnly)
+ {
+ switch (uReason)
+ {
+ case FC_PUSH_VPM:
+ if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1)
+ > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max =
+ pAsm->CALLSTACK[pAsm->CALLSP].current + 1;
+ }
+ break;
+ case FC_PUSH_WQM:
+ if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4)
+ > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max =
+ pAsm->CALLSTACK[pAsm->CALLSP].current + 4;
+ }
+ break;
+ }
+ return;
+ }
+
+ switch (uReason)
+ {
+ case FC_PUSH_VPM:
+ pAsm->CALLSTACK[pAsm->CALLSP].current++;
+ break;
+ case FC_PUSH_WQM:
+ pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
+ break;
+ case FC_LOOP:
+ pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
+ break;
+ case FC_REP:
+ /* TODO : for 16 vp asic, should += 2; */
+ pAsm->CALLSTACK[pAsm->CALLSP].current += 1;
+ break;
+ };
+
+ if(pAsm->CALLSTACK[pAsm->CALLSP].current
+ > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max =
+ pAsm->CALLSTACK[pAsm->CALLSP].current;
+ }
+}
+
+GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
+
+ return GL_TRUE;
+}
+
+GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
+{
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+ assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
+
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ if(GL_TRUE != bHasElse)
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ }
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->FCSP++;
+ pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
+ pAsm->fc_stack[pAsm->FCSP].mid = NULL;
+ pAsm->fc_stack[pAsm->FCSP].midLen= 0;
+ pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
+
+#ifndef USE_CF_FOR_POP_AFTER
+ if(GL_TRUE != bHasElse)
+ {
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
+ }
+#endif /* USE_CF_FOR_POP_AFTER */
+
+ checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
+ 0,
+ sizeof(R700ControlFlowGenericClause *) );
+ pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
+ //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
+
+#ifndef USE_CF_FOR_POP_AFTER
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
+#endif /* USE_CF_FOR_POP_AFTER */
+
+ pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1;
+
return GL_TRUE;
}
GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
{
+#ifdef USE_CF_FOR_POP_AFTER
+ pops(pAsm, 1);
+#endif /* USE_CF_FOR_POP_AFTER */
+
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+ if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
+ {
+ /* no else in between */
+ pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
+ }
+ else
+ {
+ pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
+ }
+
+ if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
+ {
+ FREE(pAsm->fc_stack[pAsm->FCSP].mid);
+ }
+
+ if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
+ {
+ radeon_error("if/endif in shader code are not paired. \n");
+ return GL_FALSE;
+ }
+
+ pAsm->FCSP--;
+
+ decreaseCurrent(pAsm, FC_PUSH_VPM);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->FCSP++;
+ pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
+ pAsm->fc_stack[pAsm->FCSP].mid = NULL;
+ pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
+ pAsm->fc_stack[pAsm->FCSP].midLen = 0;
+ pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
+
+ checkStackDepth(pAsm, FC_LOOP, GL_FALSE);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
+{
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+ assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
+
+ unsigned int unFCSP;
+ for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
+ {
+ if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+ {
+ break;
+ }
+ }
+ if(0 == FC_LOOP)
+ {
+ radeon_error("Break is not inside loop/endloop pair.\n");
+ return GL_FALSE;
+ }
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
+ (void *)pAsm->fc_stack[unFCSP].mid,
+ sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
+ sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
+ pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
+ pAsm->fc_stack[unFCSP].unNumMid++;
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+ checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
+
+#endif //USE_CF_FOR_CONTINUE_BREAK
+ return GL_TRUE;
+}
+
+GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
+{
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+ assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
+
+ unsigned int unFCSP;
+ for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
+ {
+ if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+ {
+ break;
+ }
+ }
+ if(0 == FC_LOOP)
+ {
+ radeon_error("Continue is not inside loop/endloop pair.\n");
+ return GL_FALSE;
+ }
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
+ (void *)pAsm->fc_stack[unFCSP].mid,
+ sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
+ sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
+ pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
+ pAsm->fc_stack[unFCSP].unNumMid++;
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+ checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
+
+#endif /* USE_CF_FOR_CONTINUE_BREAK */
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
+{
+ GLuint i;
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
+ pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+
+#ifdef USE_CF_FOR_CONTINUE_BREAK
+ for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
+ {
+ pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
+ }
+ if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
+ {
+ FREE(pAsm->fc_stack[pAsm->FCSP].mid);
+ }
+#endif
+
+ if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
+ {
+ radeon_error("loop/endloop in shader code are not paired. \n");
+ return GL_FALSE;
+ }
+
+ GLuint unFCSP;
+ GLuint unIF = 0;
+ if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
+ {
+ for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
+ {
+ if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+ {
+ breakLoopOnFlag(pAsm, unFCSP);
+ break;
+ }
+ else if(FC_IF == pAsm->fc_stack[unFCSP].type)
+ {
+ unIF++;
+ }
+ }
+ if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
+ {
+#ifdef USE_CF_FOR_POP_AFTER
+ returnOnFlag(pAsm, unIF);
+#else
+ returnOnFlag(pAsm, 0);
+#endif /* USE_CF_FOR_POP_AFTER */
+ pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
+ }
+ }
+
+ pAsm->FCSP--;
+
+ decreaseCurrent(pAsm, FC_LOOP);
+
+ return GL_TRUE;
+}
+
+void add_return_inst(r700_AssemblerBase *pAsm)
+{
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+ //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+}
+
+GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
+{
+ /* Put in sub */
+ if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
+ {
+ pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
+ sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
+ sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
+ if(NULL == pAsm->subs)
+ {
+ return GL_FALSE;
+ }
+ pAsm->unSubArraySize += 10;
+ }
+
+ pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift;
+ pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
+ pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
+ pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
+
+ pAsm->CALLSP++;
+ pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer;
+ pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
+ pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
+ = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
+ pAsm->CALLSTACK[pAsm->CALLSP].max = 0;
+ pAsm->CALLSTACK[pAsm->CALLSP].current = 0;
+ SetActiveCFlist(pAsm->pR700Shader,
+ pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
+
+ pAsm->unSubArrayPointer++;
+
+ /* start sub */
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+ pAsm->FCSP++;
+ pAsm->fc_stack[pAsm->FCSP].type = FC_REP;
+
+ checkStackDepth(pAsm, FC_REP, GL_FALSE);
+
return GL_TRUE;
}
-GLboolean AssembleInstr(GLuint uiNumberInsts,
+GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
+{
+ if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP)
+ {
+ radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
+ return GL_FALSE;
+ }
+
+ /* copy max to sub structure */
+ pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax
+ = pAsm->CALLSTACK[pAsm->CALLSP].max;
+
+ decreaseCurrent(pAsm, FC_REP);
+
+ pAsm->CALLSP--;
+ SetActiveCFlist(pAsm->pR700Shader,
+ pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
+
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+ pAsm->FCSP--;
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_RET(r700_AssemblerBase *pAsm)
+{
+ GLuint unIF = 0;
+
+ if(pAsm->CALLSP > 0)
+ { /* in sub */
+ GLuint unFCSP;
+ for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
+ {
+ if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
+ {
+ setRetInLoopFlag(pAsm, SQ_SEL_1);
+ breakLoopOnFlag(pAsm, unFCSP);
+ pAsm->unCFflags |= LOOPRET_FLAGS;
+
+ return GL_TRUE;
+ }
+ else if(FC_IF == pAsm->fc_stack[unFCSP].type)
+ {
+ unIF++;
+ }
+ }
+ }
+
+#ifdef USE_CF_FOR_POP_AFTER
+ if(unIF > 0)
+ {
+ pops(pAsm, unIF);
+ }
+#endif /* USE_CF_FOR_POP_AFTER */
+
+ add_return_inst(pAsm);
+
+ return GL_TRUE;
+}
+
+GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
+ GLint nILindex,
+ GLuint uiIL_Shift,
+ GLuint uiNumberInsts,
+ struct prog_instruction *pILInst,
+ PRESUB_DESC * pPresubDesc)
+{
+ GLint uiIL_Offset;
+
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ /* Put in caller */
+ if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
+ {
+ pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers,
+ sizeof(CALLER_POINTER) * pAsm->unCallerArraySize,
+ sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
+ if(NULL == pAsm->callers)
+ {
+ return GL_FALSE;
+ }
+ pAsm->unCallerArraySize += 10;
+ }
+
+ uiIL_Offset = nILindex + uiIL_Shift;
+ pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset;
+ pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
+
+ pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL;
+ pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL;
+
+ pAsm->unCallerArrayPointer++;
+
+ int j;
+ GLuint max;
+ GLuint unSubID;
+ GLboolean bRet;
+ for(j=0; j<pAsm->unSubArrayPointer; j++)
+ {
+ if(uiIL_Offset == pAsm->subs[j].subIL_Offset)
+ { /* compiled before */
+
+ max = pAsm->subs[j].unStackDepthMax
+ + pAsm->CALLSTACK[pAsm->CALLSP].current;
+ if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max = max;
+ }
+
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j;
+ return GL_TRUE;
+ }
+ }
+
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
+ unSubID = pAsm->unSubArrayPointer;
+
+ bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm);
+
+ if(GL_TRUE == bRet)
+ {
+ max = pAsm->subs[unSubID].unStackDepthMax
+ + pAsm->CALLSTACK[pAsm->CALLSP].current;
+ if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
+ {
+ pAsm->CALLSTACK[pAsm->CALLSP].max = max;
+ }
+
+ pAsm->subs[unSubID].pPresubDesc = pPresubDesc;
+ }
+
+ return bRet;
+}
+
+GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
+{
+ GLfloat fLiteral[2] = {0.1, 0.0};
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+ pAsm->D.dst.op3 = 0;
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = pAsm->flag_reg_index;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
+ pAsm->D.dst.predicated = 0;
+ /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
+ pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
+ pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
+#if 0
+ pAsm->S[0].src.rtype = SRC_REC_LITERAL;
+ //pAsm->S[0].src.reg = 0;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ pAsm->S[0].src.swizzlex = SQ_SEL_X;
+ pAsm->S[0].src.swizzley = SQ_SEL_Y;
+ pAsm->S[0].src.swizzlez = SQ_SEL_Z;
+ pAsm->S[0].src.swizzlew = SQ_SEL_W;
+
+ if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
+ {
+ return GL_FALSE;
+ }
+#else
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = 0;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ pAsm->S[0].src.swizzlex = flagValue;
+ pAsm->S[0].src.swizzley = flagValue;
+ pAsm->S[0].src.swizzlez = flagValue;
+ pAsm->S[0].src.swizzlew = flagValue;
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+#endif
+
+ return GL_TRUE;
+}
+
+GLboolean testFlag(r700_AssemblerBase *pAsm)
+{
+ GLfloat fLiteral[2] = {0.1, 0.0};
+
+ //Test flag
+ GLuint tmp = gethelpr(pAsm);
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE;
+ pAsm->D.dst.math = 1;
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 0;
+ pAsm->D.dst.writez = 0;
+ pAsm->D.dst.writew = 0;
+ pAsm->D2.dst2.literal_slots = 1;
+ pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
+ pAsm->D.dst.predicated = 1;
+ pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
+
+ pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[0].src.reg = pAsm->flag_reg_index;
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[0].src));
+ pAsm->S[0].src.swizzlex = SQ_SEL_X;
+ pAsm->S[0].src.swizzley = SQ_SEL_Y;
+ pAsm->S[0].src.swizzlez = SQ_SEL_Z;
+ pAsm->S[0].src.swizzlew = SQ_SEL_W;
+#if 0
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ //pAsm->S[1].src.reg = 0;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[1].src));
+ pAsm->S[1].src.swizzlex = SQ_SEL_X;
+ pAsm->S[1].src.swizzley = SQ_SEL_Y;
+ pAsm->S[1].src.swizzlez = SQ_SEL_Z;
+ pAsm->S[1].src.swizzlew = SQ_SEL_W;
+
+ if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
+ {
+ return GL_FALSE;
+ }
+#else
+ pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
+ pAsm->S[1].src.reg = 0;
+ setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
+ noneg_PVSSRC(&(pAsm->S[1].src));
+ pAsm->S[1].src.swizzlex = SQ_SEL_1;
+ pAsm->S[1].src.swizzley = SQ_SEL_1;
+ pAsm->S[1].src.swizzlez = SQ_SEL_1;
+ pAsm->S[1].src.swizzlew = SQ_SEL_1;
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+#endif
+
+ checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
+
+ return GL_TRUE;
+}
+
+GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF)
+{
+ testFlag(pAsm);
+ jumpToOffest(pAsm, 1, 4);
+ setRetInLoopFlag(pAsm, SQ_SEL_0);
+ pops(pAsm, unIF + 1);
+ add_return_inst(pAsm);
+
+ return GL_TRUE;
+}
+
+GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
+{
+ testFlag(pAsm);
+
+ //break
+ if(GL_FALSE == add_cf_instruction(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+
+ pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
+ (void *)pAsm->fc_stack[unFCSP].mid,
+ sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
+ sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
+ pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
+ pAsm->fc_stack[unFCSP].unNumMid++;
+
+ pops(pAsm, 1);
+
+ return GL_TRUE;
+}
+
+GLboolean AssembleInstr(GLuint uiFirstInst,
+ GLuint uiIL_Shift,
+ GLuint uiNumberInsts,
struct prog_instruction *pILInst,
r700_AssemblerBase *pR700AsmCode)
{
GLuint i;
pR700AsmCode->pILInst = pILInst;
- for(i=0; i<uiNumberInsts; i++)
+ for(i=uiFirstInst; i<uiNumberInsts; i++)
{
pR700AsmCode->uiCurInst = i;
+#ifndef USE_CF_FOR_CONTINUE_BREAK
+ if(OPCODE_BRK == pILInst[i+1].Opcode)
+ {
+ switch(pILInst[i].Opcode)
+ {
+ case OPCODE_SLE:
+ pILInst[i].Opcode = OPCODE_SGT;
+ break;
+ case OPCODE_SLT:
+ pILInst[i].Opcode = OPCODE_SGE;
+ break;
+ case OPCODE_SGE:
+ pILInst[i].Opcode = OPCODE_SLT;
+ break;
+ case OPCODE_SGT:
+ pILInst[i].Opcode = OPCODE_SLE;
+ break;
+ case OPCODE_SEQ:
+ pILInst[i].Opcode = OPCODE_SNE;
+ break;
+ case OPCODE_SNE:
+ pILInst[i].Opcode = OPCODE_SEQ;
+ break;
+ default:
+ break;
+ }
+ }
+#endif
+ if(pILInst[i].CondUpdate == 1)
+ {
+ /* remember dest register used for cond evaluation */
+ /* XXX also handle PROGRAM_OUTPUT registers here? */
+ pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index;
+ }
+
switch (pILInst[i].Opcode)
{
case OPCODE_ABS:
@@ -3936,8 +5520,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
break;
case OPCODE_ARL:
- radeon_error("Not yet implemented instruction OPCODE_ARL \n");
- //if ( GL_FALSE == assemble_BAD("ARL") )
+ if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
return GL_FALSE;
break;
case OPCODE_ARR:
@@ -3951,7 +5534,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
case OPCODE_COS:
- if ( GL_FALSE == assemble_COS(pR700AsmCode) )
+ if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) )
return GL_FALSE;
break;
@@ -3972,16 +5555,16 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
case OPCODE_EXP:
- radeon_error("Not yet implemented instruction OPCODE_EXP \n");
- //if ( GL_FALSE == assemble_BAD("EXP") )
+ if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
return GL_FALSE;
- break; // approx of EX2
+ break;
case OPCODE_FLR:
if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
return GL_FALSE;
break;
- //case OP_FLR_INT:
+ //case OP_FLR_INT: ;
+
// if ( GL_FALSE == assemble_FLR_INT() )
// return GL_FALSE;
// break;
@@ -3992,7 +5575,8 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
break;
case OPCODE_KIL:
- if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
+ case OPCODE_KIL_NV:
+ if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
return GL_FALSE;
break;
case OPCODE_LG2:
@@ -4008,10 +5592,9 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
case OPCODE_LOG:
- radeon_error("Not yet implemented instruction OPCODE_LOG \n");
- //if ( GL_FALSE == assemble_BAD("LOG") )
+ if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
return GL_FALSE;
- break; // approx of LG2
+ break;
case OPCODE_MAD:
if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
@@ -4033,6 +5616,26 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
case OPCODE_MUL:
if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
return GL_FALSE;
+ break;
+
+ case OPCODE_NOISE1:
+ {
+ callPreSub(pR700AsmCode,
+ GLSL_NOISE1,
+ &noise1_presub,
+ pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number,
+ 1);
+ radeon_error("noise1: not yet supported shader instruction\n");
+ };
+ break;
+ case OPCODE_NOISE2:
+ radeon_error("noise2: not yet supported shader instruction\n");
+ break;
+ case OPCODE_NOISE3:
+ radeon_error("noise3: not yet supported shader instruction\n");
+ break;
+ case OPCODE_NOISE4:
+ radeon_error("noise4: not yet supported shader instruction\n");
break;
case OPCODE_POW:
@@ -4048,22 +5651,78 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
case OPCODE_SIN:
- if ( GL_FALSE == assemble_SIN(pR700AsmCode) )
+ if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) )
return GL_FALSE;
break;
case OPCODE_SCS:
if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
return GL_FALSE;
- break;
+ break;
+
+ case OPCODE_SEQ:
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_SGT:
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
+ {
+ return GL_FALSE;
+ }
+ break;
case OPCODE_SGE:
if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
+ {
return GL_FALSE;
- break;
+ }
+ break;
+
+ /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
case OPCODE_SLT:
- if ( GL_FALSE == assemble_SLT(pR700AsmCode) )
+ {
+ struct prog_src_register SrcRegSave[2];
+ SrcRegSave[0] = pILInst[i].SrcReg[0];
+ SrcRegSave[1] = pILInst[i].SrcReg[1];
+ pILInst[i].SrcReg[0] = SrcRegSave[1];
+ pILInst[i].SrcReg[1] = SrcRegSave[0];
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
+ {
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
+ }
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ }
+ break;
+
+ case OPCODE_SLE:
+ {
+ struct prog_src_register SrcRegSave[2];
+ SrcRegSave[0] = pILInst[i].SrcReg[0];
+ SrcRegSave[1] = pILInst[i].SrcReg[1];
+ pILInst[i].SrcReg[0] = SrcRegSave[1];
+ pILInst[i].SrcReg[1] = SrcRegSave[0];
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
+ {
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ return GL_FALSE;
+ }
+ pILInst[i].SrcReg[0] = SrcRegSave[0];
+ pILInst[i].SrcReg[1] = SrcRegSave[1];
+ }
+ break;
+
+ case OPCODE_SNE:
+ if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
+ {
return GL_FALSE;
- break;
+ }
+ break;
//case OP_STP:
// if ( GL_FALSE == assemble_STP(pR700AsmCode) )
@@ -4089,7 +5748,8 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
}
}
break;
-
+ case OPCODE_DDX:
+ case OPCODE_DDY:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
@@ -4097,30 +5757,104 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_FALSE;
break;
+ case OPCODE_TRUNC:
+ if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) )
+ return GL_FALSE;
+ break;
+
case OPCODE_XPD:
if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
return GL_FALSE;
break;
- case OPCODE_IF :
- if ( GL_FALSE == assemble_IF(pR700AsmCode) )
- return GL_FALSE;
+ case OPCODE_IF:
+ {
+ GLboolean bHasElse = GL_FALSE;
+
+ if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE)
+ {
+ bHasElse = GL_TRUE;
+ }
+
+ if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) )
+ {
+ return GL_FALSE;
+ }
+ }
break;
+
case OPCODE_ELSE :
- radeon_error("Not yet implemented instruction OPCODE_ELSE \n");
- //if ( GL_FALSE == assemble_BAD("ELSE") )
+ if ( GL_FALSE == assemble_ELSE(pR700AsmCode) )
return GL_FALSE;
break;
+
case OPCODE_ENDIF:
if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
return GL_FALSE;
break;
+ case OPCODE_BGNLOOP:
+ if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_BRK:
+ if( GL_FALSE == assemble_BRK(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_CONT:
+ if( GL_FALSE == assemble_CONT(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_ENDLOOP:
+ if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_BGNSUB:
+ if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_RET:
+ if( GL_FALSE == assemble_RET(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
+ case OPCODE_CAL:
+ if( GL_FALSE == assemble_CAL(pR700AsmCode,
+ pILInst[i].BranchTarget,
+ uiIL_Shift,
+ uiNumberInsts,
+ pILInst,
+ NULL) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
//case OPCODE_EXPORT:
// if ( GL_FALSE == assemble_EXPORT() )
// return GL_FALSE;
// break;
+ case OPCODE_ENDSUB:
+ return assemble_ENDSUB(pR700AsmCode);
+
case OPCODE_END:
//pR700AsmCode->uiCurInst = i;
//This is to remaind that if in later exoort there is depth/stencil
@@ -4137,6 +5871,417 @@ GLboolean AssembleInstr(GLuint uiNumberInsts,
return GL_TRUE;
}
+GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
+{
+ setRetInLoopFlag(pAsm, SQ_SEL_0);
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ return GL_TRUE;
+}
+
+GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
+{
+ GLuint i;
+ GLuint unCFoffset;
+ TypedShaderList * plstCFmain;
+ TypedShaderList * plstCFsub;
+
+ R700ShaderInstruction * pInst;
+ R700ControlFlowGenericClause * pCFInst;
+
+ R700ControlFlowALUClause * pCF_ALU;
+ R700ALUInstruction * pALU;
+ GLuint unConstOffset = 0;
+ GLuint unRegOffset;
+ GLuint unMinRegIndex;
+
+ plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
+
+ /* remove flags init if they are not used */
+ if((pAsm->unCFflags & HAS_LOOPRET) == 0)
+ {
+ R700ControlFlowALUClause * pCF_ALU;
+ pInst = plstCFmain->pHead;
+ while(pInst)
+ {
+ if(SIT_CF_ALU == pInst->m_ShaderInstType)
+ {
+ pCF_ALU = (R700ControlFlowALUClause *)pInst;
+ if(0 == pCF_ALU->m_Word1.f.count)
+ {
+ pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
+ }
+ else
+ {
+ R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
+
+ pALU->m_pLinkedALUClause = NULL;
+ pALU = (R700ALUInstruction *)(pALU->pNextInst);
+ pALU->m_pLinkedALUClause = pCF_ALU;
+ pCF_ALU->m_pLinkedALUInstruction = pALU;
+
+ pCF_ALU->m_Word1.f.count--;
+ }
+ break;
+ }
+ pInst = pInst->pNextInst;
+ };
+ }
+
+ if(pAsm->CALLSTACK[0].max > 0)
+ {
+ pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2;
+ }
+
+ if(0 == pAsm->unSubArrayPointer)
+ {
+ return GL_TRUE;
+ }
+
+ unCFoffset = plstCFmain->uNumOfNode;
+
+ if(NULL != pILProg->Parameters)
+ {
+ unConstOffset = pILProg->Parameters->NumParameters;
+ }
+
+ /* Reloc subs */
+ for(i=0; i<pAsm->unSubArrayPointer; i++)
+ {
+ pAsm->subs[i].unCFoffset = unCFoffset;
+ plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
+
+ pInst = plstCFsub->pHead;
+
+ /* reloc instructions */
+ while(pInst)
+ {
+ if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
+ {
+ pCFInst = (R700ControlFlowGenericClause *)pInst;
+
+ switch (pCFInst->m_Word1.f.cf_inst)
+ {
+ case SQ_CF_INST_POP:
+ case SQ_CF_INST_JUMP:
+ case SQ_CF_INST_ELSE:
+ case SQ_CF_INST_LOOP_END:
+ case SQ_CF_INST_LOOP_START:
+ case SQ_CF_INST_LOOP_START_NO_AL:
+ case SQ_CF_INST_LOOP_CONTINUE:
+ case SQ_CF_INST_LOOP_BREAK:
+ pCFInst->m_Word0.f.addr += unCFoffset;
+ break;
+ default:
+ break;
+ }
+ }
+
+ pInst->m_uIndex += unCFoffset;
+
+ pInst = pInst->pNextInst;
+ };
+
+ if(NULL != pAsm->subs[i].pPresubDesc)
+ {
+ GLuint uNumSrc;
+
+ unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex;
+ unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg;
+ unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart;
+
+ pInst = plstCFsub->pHead;
+ while(pInst)
+ {
+ if(SIT_CF_ALU == pInst->m_ShaderInstType)
+ {
+ pCF_ALU = (R700ControlFlowALUClause *)pInst;
+
+ pALU = pCF_ALU->m_pLinkedALUInstruction;
+ for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+ {
+ pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
+
+ if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word0.f.src0_sel += unConstOffset;
+ }
+
+ if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F)
+ >= SQ_OP3_INST_MUL_LIT )
+ { /* op3 : 3 srcs */
+ if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word1_OP3.f.src2_sel += unConstOffset;
+ }
+ if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word0.f.src1_sel += unConstOffset;
+ }
+ }
+ else
+ {
+ if(pAsm->bR6xx)
+ {
+ uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
+ }
+ else
+ {
+ uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
+ }
+ if(2 == uNumSrc)
+ { /* 2 srcs */
+ if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
+ {
+ pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
+ }
+ else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
+ {
+ pALU->m_Word0.f.src1_sel += unConstOffset;
+ }
+ }
+ }
+ pALU = (R700ALUInstruction*)(pALU->pNextInst);
+ }
+ }
+ pInst = pInst->pNextInst;
+ };
+ }
+
+ /* Put sub into main */
+ plstCFmain->pTail->pNextInst = plstCFsub->pHead;
+ plstCFmain->pTail = plstCFsub->pTail;
+ plstCFmain->uNumOfNode += plstCFsub->uNumOfNode;
+
+ unCFoffset += plstCFsub->uNumOfNode;
+ }
+
+ /* reloc callers */
+ for(i=0; i<pAsm->unCallerArrayPointer; i++)
+ {
+ pAsm->callers[i].cf_ptr->m_Word0.f.addr
+ = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
+
+ if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc)
+ {
+ unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex;
+ unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg;
+
+ if(NULL != pAsm->callers[i].prelude_cf_ptr)
+ {
+ pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr);
+ pALU = pCF_ALU->m_pLinkedALUInstruction;
+ for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+ {
+ pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
+ pALU = (R700ALUInstruction*)(pALU->pNextInst);
+ }
+ }
+ if(NULL != pAsm->callers[i].finale_cf_ptr)
+ {
+ pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr);
+ pALU = pCF_ALU->m_pLinkedALUInstruction;
+ for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
+ {
+ pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
+ pALU = (R700ALUInstruction*)(pALU->pNextInst);
+ }
+ }
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean callPreSub(r700_AssemblerBase* pAsm,
+ LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
+ COMPILED_SUB * pCompiledSub,
+ GLshort uOutReg,
+ GLshort uNumValidSrc)
+{
+ /* save assemble context */
+ GLuint starting_temp_register_number_save;
+ GLuint number_used_registers_save;
+ GLuint uFirstHelpReg_save;
+ GLuint uHelpReg_save;
+ GLuint uiCurInst_save;
+ struct prog_instruction *pILInst_save;
+ PRESUB_DESC * pPresubDesc;
+ GLboolean bRet;
+ int i;
+
+ R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
+
+ /* copy srcs to presub inputs */
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ for(i=0; i<uNumValidSrc; i++)
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i];
+ pAsm->D.dst.writex = 1;
+ pAsm->D.dst.writey = 1;
+ pAsm->D.dst.writez = 1;
+ pAsm->D.dst.writew = 1;
+
+ if( GL_FALSE == assemble_src(pAsm, i, 0) )
+ {
+ return GL_FALSE;
+ }
+
+ next_ins(pAsm);
+ }
+ if(uNumValidSrc > 0)
+ {
+ prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr;
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ }
+
+ /* browse thro existing presubs. */
+ for(i=0; i<pAsm->unNumPresub; i++)
+ {
+ if(pAsm->presubs[i].sptSigniture == scriptSigniture)
+ {
+ break;
+ }
+ }
+
+ if(i == pAsm->unNumPresub)
+ { /* not loaded yet */
+ /* save assemble context */
+ number_used_registers_save = pAsm->number_used_registers;
+ uFirstHelpReg_save = pAsm->uFirstHelpReg;
+ uHelpReg_save = pAsm->uHelpReg;
+ starting_temp_register_number_save = pAsm->starting_temp_register_number;
+ pILInst_save = pAsm->pILInst;
+ uiCurInst_save = pAsm->uiCurInst;
+
+ /* alloc in presub */
+ if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize )
+ {
+ pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs,
+ sizeof(PRESUB_DESC) * pAsm->unPresubArraySize,
+ sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) );
+ if(NULL == pAsm->presubs)
+ {
+ radeon_error("No memeory to allocate built in shader function description structures. \n");
+ return GL_FALSE;
+ }
+ pAsm->unPresubArraySize += 4;
+ }
+
+ pPresubDesc = &(pAsm->presubs[i]);
+ pPresubDesc->sptSigniture = scriptSigniture;
+
+ /* constants offsets need to be final resolved at reloc. */
+ if(0 == pAsm->unNumPresub)
+ {
+ pPresubDesc->unConstantsStart = 0;
+ }
+ else
+ {
+ pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart
+ + pAsm->presubs[i-1].pCompiledSub->NumParameters;
+ }
+
+ pPresubDesc->pCompiledSub = pCompiledSub;
+
+ pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts;
+ pPresubDesc->maxStartReg = uFirstHelpReg_save;
+ pAsm->unCurNumILInsts += pCompiledSub->NumInstructions;
+
+ pAsm->unNumPresub++;
+
+ /* setup new assemble context */
+ pAsm->starting_temp_register_number = 0;
+ pAsm->number_used_registers = pCompiledSub->NumTemporaries;
+ pAsm->uFirstHelpReg = pAsm->number_used_registers;
+ pAsm->uHelpReg = pAsm->uFirstHelpReg;
+
+ bRet = assemble_CAL(pAsm,
+ 0,
+ pPresubDesc->subIL_Shift,
+ pCompiledSub->NumInstructions,
+ pCompiledSub->Instructions,
+ pPresubDesc);
+
+
+ pPresubDesc->number_used_registers = pAsm->number_used_registers;
+
+ /* restore assemble context */
+ pAsm->number_used_registers = number_used_registers_save;
+ pAsm->uFirstHelpReg = uFirstHelpReg_save;
+ pAsm->uHelpReg = uHelpReg_save;
+ pAsm->starting_temp_register_number = starting_temp_register_number_save;
+ pAsm->pILInst = pILInst_save;
+ pAsm->uiCurInst = uiCurInst_save;
+ }
+ else
+ { /* was loaded */
+ pPresubDesc = &(pAsm->presubs[i]);
+
+ bRet = assemble_CAL(pAsm,
+ 0,
+ pPresubDesc->subIL_Shift,
+ pCompiledSub->NumInstructions,
+ pCompiledSub->Instructions,
+ pPresubDesc);
+ }
+
+ if(GL_FALSE == bRet)
+ {
+ radeon_error("Shader presub assemble failed. \n");
+ }
+ else
+ {
+ /* copy presub output to real dst */
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = pCompiledSub->dstRegIndex;
+ pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX;
+ pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY;
+ pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ;
+ pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW;
+
+ next_ins(pAsm);
+
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr;
+ pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
+ pAsm->alu_x_opcode = SQ_CF_INST_ALU;
+ }
+
+ if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers )
+ {
+ pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg;
+ }
+ if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg)
+ {
+ pPresubDesc->maxStartReg = pAsm->uFirstHelpReg;
+ }
+
+ return bRet;
+}
+
GLboolean Process_Export(r700_AssemblerBase* pAsm,
GLuint type,
GLuint export_starting_index,
@@ -4282,6 +6427,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
GLbitfield OutputsWritten)
{
unsigned int unBit;
+ GLuint export_count = 0;
if(pR700AsmCode->depth_export_register_number >= 0)
{
@@ -4303,6 +6449,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
{
return GL_FALSE;
}
+ export_count++;
}
unBit = 1 << FRAG_RESULT_DEPTH;
if(OutputsWritten & unBit)
@@ -4316,8 +6463,15 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
{
return GL_FALSE;
}
+ export_count++;
}
-
+ /* Need to export something, otherwise we'll hang
+ * results are undefined anyway */
+ if(export_count == 0)
+ {
+ Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
+ }
+
if(pR700AsmCode->cf_last_export_ptr != NULL)
{
pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
@@ -4422,6 +6576,25 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
export_starting_index++;
}
}
+
+ for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(OutputsWritten & unBit)
+ {
+ if( GL_FALSE == Process_Export(pR700AsmCode,
+ SQ_EXPORT_PARAM,
+ export_starting_index,
+ 1,
+ pR700AsmCode->ucVP_OutputMap[i],
+ GL_FALSE) )
+ {
+ return GL_FALSE;
+ }
+
+ export_starting_index++;
+ }
+ }
// At least one param should be exported
if (export_count)
@@ -4456,6 +6629,21 @@ GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
{
FREE(pR700AsmCode->pucOutMask);
FREE(pR700AsmCode->pInstDeps);
+
+ if(NULL != pR700AsmCode->subs)
+ {
+ FREE(pR700AsmCode->subs);
+ }
+ if(NULL != pR700AsmCode->callers)
+ {
+ FREE(pR700AsmCode->callers);
+ }
+
+ if(NULL != pR700AsmCode->presubs)
+ {
+ FREE(pR700AsmCode->presubs);
+ }
+
return GL_TRUE;
}