diff options
Diffstat (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c')
-rw-r--r-- | src/mesa/drivers/dri/r600/r700_assembler.c | 6668 |
1 files changed, 6668 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c new file mode 100644 index 0000000000..de5c5d89fe --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -0,0 +1,6668 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com> + */ + +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#include "main/mtypes.h" +#include "main/imports.h" +#include "shader/prog_parameter.h" + +#include "radeon_debug.h" +#include "r600_context.h" + +#include "r700_assembler.h" + +#define USE_CF_FOR_CONTINUE_BREAK 1 +#define USE_CF_FOR_POP_AFTER 1 + +struct prog_instruction noise1_insts[12] = { + {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}, + {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0} +}; +float noise1_const[2][4] = { + {0.300000f, 0.900000f, 0.500000f, 0.300000f} +}; + +COMPILED_SUB noise1_presub = { + &(noise1_insts[0]), + 12, + 2, + 1, + 0, + &(noise1_const[0]), + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + SWIZZLE_X, + {0,0,0}, + 0 +}; + +BITS addrmode_PVSDST(PVSDST * pPVSDST) +{ + return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1); +} + +void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode) +{ + pPVSDST->addrmode0 = addrmode & 1; + pPVSDST->addrmode1 = (addrmode >> 1) & 1; +} + +void nomask_PVSDST(PVSDST * pPVSDST) +{ + pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1; +} + +BITS addrmode_PVSSRC(PVSSRC* pPVSSRC) +{ + return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1); +} + +void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode) +{ + pPVSSRC->addrmode0 = addrmode & 1; + pPVSSRC->addrmode1 = (addrmode >> 1) & 1; +} + + +void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz) +{ + pPVSSRC->swizzlex = + pPVSSRC->swizzley = + pPVSSRC->swizzlez = + pPVSSRC->swizzlew = swz; +} + +void noswizzle_PVSSRC(PVSSRC* pPVSSRC) +{ + pPVSSRC->swizzlex = SQ_SEL_X; + pPVSSRC->swizzley = SQ_SEL_Y; + pPVSSRC->swizzlez = SQ_SEL_Z; + pPVSSRC->swizzlew = SQ_SEL_W; +} + +void +swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w) +{ + switch (x) + { + case SQ_SEL_X: x = pPVSSRC->swizzlex; + break; + case SQ_SEL_Y: x = pPVSSRC->swizzley; + break; + case SQ_SEL_Z: x = pPVSSRC->swizzlez; + break; + case SQ_SEL_W: x = pPVSSRC->swizzlew; + break; + default:; + } + + switch (y) + { + case SQ_SEL_X: y = pPVSSRC->swizzlex; + break; + case SQ_SEL_Y: y = pPVSSRC->swizzley; + break; + case SQ_SEL_Z: y = pPVSSRC->swizzlez; + break; + case SQ_SEL_W: y = pPVSSRC->swizzlew; + break; + default:; + } + + switch (z) + { + case SQ_SEL_X: z = pPVSSRC->swizzlex; + break; + case SQ_SEL_Y: z = pPVSSRC->swizzley; + break; + case SQ_SEL_Z: z = pPVSSRC->swizzlez; + break; + case SQ_SEL_W: z = pPVSSRC->swizzlew; + break; + default:; + } + + switch (w) + { + case SQ_SEL_X: w = pPVSSRC->swizzlex; + break; + case SQ_SEL_Y: w = pPVSSRC->swizzley; + break; + case SQ_SEL_Z: w = pPVSSRC->swizzlez; + break; + case SQ_SEL_W: w = pPVSSRC->swizzlew; + break; + default:; + } + + pPVSSRC->swizzlex = x; + pPVSSRC->swizzley = y; + pPVSSRC->swizzlez = z; + pPVSSRC->swizzlew = w; +} + +void neg_PVSSRC(PVSSRC* pPVSSRC) +{ + pPVSSRC->negx = 1; + pPVSSRC->negy = 1; + pPVSSRC->negz = 1; + pPVSSRC->negw = 1; +} + +void noneg_PVSSRC(PVSSRC* pPVSSRC) +{ + pPVSSRC->negx = 0; + pPVSSRC->negy = 0; + pPVSSRC->negz = 0; + pPVSSRC->negw = 0; +} + +// negate argument (for SUB instead of ADD and alike) +void flipneg_PVSSRC(PVSSRC* pPVSSRC) +{ + pPVSSRC->negx = !pPVSSRC->negx; + pPVSSRC->negy = !pPVSSRC->negy; + pPVSSRC->negz = !pPVSSRC->negz; + pPVSSRC->negw = !pPVSSRC->negw; +} + +void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c) +{ + switch (c) + { + case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break; + case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break; + case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break; + case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break; + default:; + } +} + +void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c) +{ + switch (c) + { + case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break; + case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break; + case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break; + case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break; + default:; + } +} + +BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0) +{ + return (pOutVTXFmt0->point_size | + pOutVTXFmt0->edge_flag | + pOutVTXFmt0->rta_index | + pOutVTXFmt0->kill_flag | + pOutVTXFmt0->viewport_index); +} + +BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) +{ + return (pFPOutFmt->depth | + pFPOutFmt->stencil_ref | + pFPOutFmt->mask | + pFPOutFmt->coverage_to_mask); +} + +GLboolean is_reduction_opcode(PVSDWORD* dest) +{ + if (dest->dst.op3 == 0) + { + if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) ) + { + return GL_TRUE; + } + } + return GL_FALSE; +} + +GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) +{ + GLuint format = FMT_INVALID; + GLuint uiElemSize = 0; + + switch (eType) + { + case GL_BYTE: + case GL_UNSIGNED_BYTE: + uiElemSize = 1; + switch(nChannels) + { + case 1: + format = FMT_8; break; + case 2: + format = FMT_8_8; break; + case 3: + format = FMT_8_8_8; break; + case 4: + format = FMT_8_8_8_8; break; + default: + break; + } + break; + + case GL_UNSIGNED_SHORT: + case GL_SHORT: + uiElemSize = 2; + switch(nChannels) + { + case 1: + format = FMT_16; break; + case 2: + format = FMT_16_16; break; + case 3: + format = FMT_16_16_16; break; + case 4: + format = FMT_16_16_16_16; break; + default: + break; + } + break; + + case GL_UNSIGNED_INT: + case GL_INT: + uiElemSize = 4; + switch(nChannels) + { + case 1: + format = FMT_32; break; + case 2: + format = FMT_32_32; break; + case 3: + format = FMT_32_32_32; break; + case 4: + format = FMT_32_32_32_32; break; + default: + break; + } + break; + + case GL_FLOAT: + uiElemSize = 4; + switch(nChannels) + { + case 1: + format = FMT_32_FLOAT; break; + case 2: + format = FMT_32_32_FLOAT; break; + case 3: + format = FMT_32_32_32_FLOAT; break; + case 4: + format = FMT_32_32_32_32_FLOAT; break; + default: + break; + } + break; + case GL_DOUBLE: + uiElemSize = 8; + switch(nChannels) + { + case 1: + format = FMT_32_FLOAT; break; + case 2: + format = FMT_32_32_FLOAT; break; + case 3: + format = FMT_32_32_32_FLOAT; break; + case 4: + format = FMT_32_32_32_32_FLOAT; break; + default: + break; + } + break; + default: + ; + //GL_ASSERT_NO_CASE(); + } + + if(NULL != pClient_size) + { + *pClient_size = uiElemSize * nChannels; + } + + return(format); +} + +unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3) +{ + if(nIsOp3 > 0) + { + return 3; + } + + switch (opcode) + { + case SQ_OP2_INST_ADD: + case SQ_OP2_INST_KILLE: + case SQ_OP2_INST_KILLGT: + case SQ_OP2_INST_KILLGE: + case SQ_OP2_INST_KILLNE: + case SQ_OP2_INST_MUL: + case SQ_OP2_INST_MAX: + case SQ_OP2_INST_MIN: + //case SQ_OP2_INST_MAX_DX10: + //case SQ_OP2_INST_MIN_DX10: + case SQ_OP2_INST_SETE: + case SQ_OP2_INST_SETNE: + case SQ_OP2_INST_SETGT: + case SQ_OP2_INST_SETGE: + case SQ_OP2_INST_PRED_SETE: + case SQ_OP2_INST_PRED_SETGT: + case SQ_OP2_INST_PRED_SETGE: + case SQ_OP2_INST_PRED_SETNE: + case SQ_OP2_INST_DOT4: + case SQ_OP2_INST_DOT4_IEEE: + case SQ_OP2_INST_CUBE: + return 2; + + case SQ_OP2_INST_MOV: + case SQ_OP2_INST_MOVA_FLOOR: + case SQ_OP2_INST_FRACT: + case SQ_OP2_INST_FLOOR: + case SQ_OP2_INST_TRUNC: + case SQ_OP2_INST_EXP_IEEE: + case SQ_OP2_INST_LOG_CLAMPED: + case SQ_OP2_INST_LOG_IEEE: + case SQ_OP2_INST_RECIP_IEEE: + case SQ_OP2_INST_RECIPSQRT_IEEE: + case SQ_OP2_INST_FLT_TO_INT: + case SQ_OP2_INST_SIN: + case SQ_OP2_INST_COS: + return 1; + + default: radeon_error( + "Need instruction operand number for %x.\n", opcode); + }; + + return 3; +} + +int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader) +{ + GLuint i; + + Init_R700_Shader(pShader); + pAsm->pR700Shader = pShader; + pAsm->currentShaderType = spt; + + pAsm->cf_last_export_ptr = NULL; + + pAsm->cf_current_export_clause_ptr = NULL; + pAsm->cf_current_alu_clause_ptr = NULL; + pAsm->cf_current_tex_clause_ptr = NULL; + pAsm->cf_current_vtx_clause_ptr = NULL; + pAsm->cf_current_cf_clause_ptr = NULL; + + // No clause has been created yet + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + + pAsm->number_of_colorandz_exports = 0; + pAsm->number_of_exports = 0; + pAsm->number_of_export_opcodes = 0; + + pAsm->alu_x_opcode = 0; + + pAsm->D2.bits = 0; + + pAsm->D.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; + + pAsm->uLastPosUpdate = 0; + + *(BITS *) &pAsm->fp_stOutFmt0 = 0; + + pAsm->uIIns = 0; + pAsm->uOIns = 0; + pAsm->number_used_registers = 0; + pAsm->uUsedConsts = 256; + + + // Fragment programs + pAsm->uBoolConsts = 0; + pAsm->uIntConsts = 0; + pAsm->uInsts = 0; + pAsm->uConsts = 0; + + pAsm->FCSP = 0; + pAsm->fc_stack[0].type = FC_NONE; + + pAsm->aArgSubst[0] = + pAsm->aArgSubst[1] = + pAsm->aArgSubst[2] = + pAsm->aArgSubst[3] = (-1); + + pAsm->uOutputs = 0; + + for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++) + { + pAsm->color_export_register_number[i] = (-1); + } + + + pAsm->depth_export_register_number = (-1); + pAsm->stencil_export_register_number = (-1); + pAsm->coverage_to_mask_export_register_number = (-1); + pAsm->mask_export_register_number = (-1); + + pAsm->starting_export_register_number = 0; + pAsm->starting_vfetch_register_number = 0; + pAsm->starting_temp_register_number = 0; + pAsm->uFirstHelpReg = 0; + + pAsm->input_position_is_used = GL_FALSE; + pAsm->input_normal_is_used = GL_FALSE; + + for (i=0; i<NUMBER_OF_INPUT_COLORS; i++) + { + pAsm->input_color_is_used[ i ] = GL_FALSE; + } + + for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++) + { + pAsm->input_texture_unit_is_used[ i ] = GL_FALSE; + } + + for (i=0; i<VERT_ATTRIB_MAX; i++) + { + pAsm->vfetch_instruction_ptr_array[ i ] = NULL; + } + + pAsm->number_of_inputs = 0; + + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; + + pAsm->subs = NULL; + pAsm->unSubArraySize = 0; + pAsm->unSubArrayPointer = 0; + pAsm->callers = NULL; + pAsm->unCallerArraySize = 0; + pAsm->unCallerArrayPointer = 0; + + pAsm->CALLSP = 0; + pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0; + pAsm->CALLSTACK[0].plstCFInstructions_local + = &(pAsm->pR700Shader->lstCFInstructions); + + pAsm->CALLSTACK[0].max = 0; + pAsm->CALLSTACK[0].current = 0; + + SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local); + + pAsm->unCFflags = 0; + + pAsm->presubs = NULL; + pAsm->unPresubArraySize = 0; + pAsm->unNumPresub = 0; + pAsm->unCurNumILInsts = 0; + + pAsm->unVetTexBits = 0; + + return 0; +} + +GLboolean IsTex(gl_inst_opcode Opcode) +{ + if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) || + (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) ) + { + return GL_TRUE; + } + return GL_FALSE; +} + +GLboolean IsAlu(gl_inst_opcode Opcode) +{ + //TODO : more for fc and ex for higher spec. + if( IsTex(Opcode) ) + { + return GL_FALSE; + } + return GL_TRUE; +} + +int check_current_clause(r700_AssemblerBase* pAsm, + CF_CLAUSE_TYPE new_clause_type) +{ + if (pAsm->cf_current_clause_type != new_clause_type) + { //Close last open clause + switch (pAsm->cf_current_clause_type) + { + case CF_ALU_CLAUSE: + if ( pAsm->cf_current_alu_clause_ptr != NULL) + { + pAsm->cf_current_alu_clause_ptr = NULL; + } + break; + case CF_VTX_CLAUSE: + if ( pAsm->cf_current_vtx_clause_ptr != NULL) + { + pAsm->cf_current_vtx_clause_ptr = NULL; + } + break; + case CF_TEX_CLAUSE: + if ( pAsm->cf_current_tex_clause_ptr != NULL) + { + pAsm->cf_current_tex_clause_ptr = NULL; + } + break; + case CF_EXPORT_CLAUSE: + if ( pAsm->cf_current_export_clause_ptr != NULL) + { + pAsm->cf_current_export_clause_ptr = NULL; + } + break; + case CF_OTHER_CLAUSE: + if ( pAsm->cf_current_cf_clause_ptr != NULL) + { + pAsm->cf_current_cf_clause_ptr = NULL; + } + break; + case CF_EMPTY_CLAUSE: + break; + default: + radeon_error( + "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type); + return GL_FALSE; + } + + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + + // Create new clause + switch (new_clause_type) + { + case CF_ALU_CLAUSE: + pAsm->cf_current_clause_type = CF_ALU_CLAUSE; + break; + case CF_VTX_CLAUSE: + pAsm->cf_current_clause_type = CF_VTX_CLAUSE; + break; + case CF_TEX_CLAUSE: + pAsm->cf_current_clause_type = CF_TEX_CLAUSE; + break; + case CF_EXPORT_CLAUSE: + { + R700ControlFlowSXClause* pR700ControlFlowSXClause + = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause); + + // Add new export instruction to control flow program + if (pR700ControlFlowSXClause != 0) + { + pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause; + Init_R700ControlFlowSXClause(pR700ControlFlowSXClause); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pR700ControlFlowSXClause ); + } + else + { + radeon_error( + "Error allocating new EXPORT CF instruction in check_current_clause. \n"); + return GL_FALSE; + } + pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE; + } + break; + case CF_EMPTY_CLAUSE: + break; + case CF_OTHER_CLAUSE: + pAsm->cf_current_clause_type = CF_OTHER_CLAUSE; + break; + default: + radeon_error( + "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type); + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean add_cf_instruction(r700_AssemblerBase* pAsm) +{ + if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE)) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr = + (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause); + + if (pAsm->cf_current_cf_clause_ptr != NULL) + { + Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr ); + } + else + { + radeon_error("Could not allocate a new VFetch CF instruction.\n"); + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm, + R700VertexInstruction* vertex_instruction_ptr) +{ + if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) ) + { + return GL_FALSE; + } + + if( pAsm->cf_current_vtx_clause_ptr == NULL || + ( (pAsm->cf_current_vtx_clause_ptr != NULL) && + (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1) + ) ) + { + // Create new Vfetch control flow instruction for this new clause + pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause); + + if (pAsm->cf_current_vtx_clause_ptr != NULL) + { + Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr ); + } + else + { + radeon_error("Could not allocate a new VFetch CF instruction.\n"); + return GL_FALSE; + } + + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1; + + LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr ); + } + else + { + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++; + } + + AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr); + + return GL_TRUE; +} + +GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, + R700TextureInstruction* tex_instruction_ptr) +{ + if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) ) + { + return GL_FALSE; + } + + if ( pAsm->cf_current_tex_clause_ptr == NULL || + ( (pAsm->cf_current_tex_clause_ptr != NULL) && + (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1) + ) ) + { + // new tex cf instruction for this new clause + pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause); + + if (pAsm->cf_current_tex_clause_ptr != NULL) + { + Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr ); + } + else + { + radeon_error("Could not allocate a new TEX CF instruction.\n"); + return GL_FALSE; + } + + pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1; + } + else + { + pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++; + } + + // If this clause constains any TEX instruction that is dependent on a previous instruction, + // set the barrier bit + if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE ) + { + pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1; + } + + if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction) + { + pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr; + tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr; + } + + AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr); + + return GL_TRUE; +} + +GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, + GLuint gl_client_id, + GLuint destination_register, + GLuint number_of_elements, + GLenum dataElementType, + VTX_FETCH_METHOD* pFetchMethod) +{ + GLuint client_size_inbyte; + GLuint data_format; + GLuint mega_fetch_count; + GLuint is_mega_fetch_flag; + + R700VertexGenericFetch* vfetch_instruction_ptr; + R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ]; + + if (assembled_vfetch_instruction_ptr == NULL) + { + vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch); + if (vfetch_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700VertexGenericFetch(vfetch_instruction_ptr); + } + else + { + vfetch_instruction_ptr = assembled_vfetch_instruction_ptr; + } + + data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte); + + if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here + { + //TODO : mini fetch + mega_fetch_count = 0; + is_mega_fetch_flag = 0; + } + else + { + mega_fetch_count = MEGA_FETCH_BYTES - 1; + is_mega_fetch_flag = 0x1; + pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte; + } + + vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH; + vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA; + vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + + vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id; + vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0; + vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X; + vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count; + + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W; + + vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1; + + // Destination register + vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; + vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE; + + vfetch_instruction_ptr->m_Word2.f.offset = 0; + vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0; + + vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag; + + if (assembled_vfetch_instruction_ptr == NULL) + { + if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) + { + return GL_FALSE; + } + + if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL) + { + return GL_FALSE; + } + else + { + pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr; + } + } + + return GL_TRUE; +} + +GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm, + GLuint destination_register, + GLenum type, + GLint size, + GLubyte element, + GLuint _signed, + GLboolean normalize, + GLenum format, + VTX_FETCH_METHOD * pFetchMethod) +{ + GLuint client_size_inbyte; + GLuint data_format; + GLuint mega_fetch_count; + GLuint is_mega_fetch_flag; + + R700VertexGenericFetch* vfetch_instruction_ptr; + R700VertexGenericFetch* assembled_vfetch_instruction_ptr + = pAsm->vfetch_instruction_ptr_array[element]; + + if (assembled_vfetch_instruction_ptr == NULL) + { + vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch); + if (vfetch_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700VertexGenericFetch(vfetch_instruction_ptr); + } + else + { + vfetch_instruction_ptr = assembled_vfetch_instruction_ptr; + } + + data_format = GetSurfaceFormat(type, size, &client_size_inbyte); + + if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here + { + //TODO : mini fetch + mega_fetch_count = 0; + is_mega_fetch_flag = 0; + } + else + { + mega_fetch_count = MEGA_FETCH_BYTES - 1; + is_mega_fetch_flag = 0x1; + pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte; + } + + vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH; + vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA; + vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + + vfetch_instruction_ptr->m_Word0.f.buffer_id = element; + vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0; + vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X; + vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count; + + if(format == GL_BGRA) + { + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + } + else + { + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W; + + } + + vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1; + vfetch_instruction_ptr->m_Word1.f.data_format = data_format; + vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE; + + if(1 == _signed) + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED; + } + else + { + vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED; + } + + if(GL_TRUE == normalize) + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM; + } + else + { + vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT; + } + + // Destination register + vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; + vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE; + + vfetch_instruction_ptr->m_Word2.f.offset = 0; + vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0; + + vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag; + + if (assembled_vfetch_instruction_ptr == NULL) + { + if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) + { + return GL_FALSE; + } + + if (pAsm->vfetch_instruction_ptr_array[element] != NULL) + { + return GL_FALSE; + } + else + { + pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr; + } + } + + return GL_TRUE; +} + +GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm) +{ + GLint i; + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + pAsm->cf_current_vtx_clause_ptr = NULL; + + for (i=0; i<VERT_ATTRIB_MAX; i++) + { + pAsm->vfetch_instruction_ptr_array[ i ] = NULL; + } + + cleanup_vfetch_shaderinst(pAsm->pR700Shader); + + return GL_TRUE; +} + +GLuint gethelpr(r700_AssemblerBase* pAsm) +{ + GLuint r = pAsm->uHelpReg; + pAsm->uHelpReg++; + if (pAsm->uHelpReg > pAsm->number_used_registers) + { + pAsm->number_used_registers = pAsm->uHelpReg; + } + return r; +} +void resethelpr(r700_AssemblerBase* pAsm) +{ + pAsm->uHelpReg = pAsm->uFirstHelpReg; +} + +void checkop_init(r700_AssemblerBase* pAsm) +{ + resethelpr(pAsm); + pAsm->aArgSubst[0] = + pAsm->aArgSubst[1] = + pAsm->aArgSubst[2] = + pAsm->aArgSubst[3] = -1; +} + +static GLboolean next_ins(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if (GL_TRUE == pAsm->is_tex) + { + if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) + { + if (GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE)) + { + radeon_error("Error assembling TEX instruction\n"); + return GL_FALSE; + } + } + else + { + if (GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE)) + { + radeon_error("Error assembling TEX instruction\n"); + return GL_FALSE; + } + } + } + else + { //ALU + if (GL_FALSE == assemble_alu_instruction(pAsm)) + { + radeon_error("Error assembling ALU instruction\n"); + return GL_FALSE; + } + } + + if (pAsm->D.dst.rtype == DST_REG_OUT) + { + assert(pAsm->D.dst.reg >= pAsm->starting_export_register_number); + + if (pAsm->D.dst.op3) + { + // There is no mask for OP3 instructions, so all channels are written + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; + } + else + { + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] + |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; + } + } + + //reset for next inst. + pAsm->D.bits = 0; + pAsm->D2.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; + pAsm->is_tex = GL_FALSE; + pAsm->need_tex_barrier = GL_FALSE; + pAsm->D2.bits = 0; + pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0; + return GL_TRUE; +} + +GLboolean mov_temp(r700_AssemblerBase* pAsm, int src) +{ + GLuint tmp = gethelpr(pAsm); + + //mov src to temp helper gpr. + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, src, 0) ) + { + return GL_FALSE; + } + + noswizzle_PVSSRC(&(pAsm->S[0].src)); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->aArgSubst[1 + src] = tmp; + + return GL_TRUE; +} + +GLboolean checkop1(r700_AssemblerBase* pAsm) +{ + checkop_init(pAsm); + return GL_TRUE; +} + +GLboolean checkop2(r700_AssemblerBase* pAsm) +{ + GLboolean bSrcConst[2]; + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + checkop_init(pAsm); + + if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[0] = GL_TRUE; + } + else + { + bSrcConst[0] = GL_FALSE; + } + if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[1] = GL_TRUE; + } + else + { + bSrcConst[1] = GL_FALSE; + } + + if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) ) + { + if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index) + { + if( GL_FALSE == mov_temp(pAsm, 1) ) + { + return GL_FALSE; + } + } + } + + return GL_TRUE; +} + +GLboolean checkop3(r700_AssemblerBase* pAsm) +{ + GLboolean bSrcConst[3]; + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + checkop_init(pAsm); + + if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[0] = GL_TRUE; + } + else + { + bSrcConst[0] = GL_FALSE; + } + if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[1] = GL_TRUE; + } + else + { + bSrcConst[1] = GL_FALSE; + } + if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) || + (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[2] = GL_TRUE; + } + else + { + bSrcConst[2] = GL_FALSE; + } + + if( (GL_TRUE == bSrcConst[0]) && + (GL_TRUE == bSrcConst[1]) && + (GL_TRUE == bSrcConst[2]) ) + { + if( GL_FALSE == mov_temp(pAsm, 1) ) + { + return GL_FALSE; + } + if( GL_FALSE == mov_temp(pAsm, 2) ) + { + return GL_FALSE; + } + + return GL_TRUE; + } + else if( (GL_TRUE == bSrcConst[0]) && + (GL_TRUE == bSrcConst[1]) ) + { + if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index) + { + if( GL_FALSE == mov_temp(pAsm, 1) ) + { + return 1; + } + } + + return GL_TRUE; + } + else if ( (GL_TRUE == bSrcConst[0]) && + (GL_TRUE == bSrcConst[2]) ) + { + if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index) + { + if( GL_FALSE == mov_temp(pAsm, 2) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; + } + else if( (GL_TRUE == bSrcConst[1]) && + (GL_TRUE == bSrcConst[2]) ) + { + if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index) + { + if( GL_FALSE == mov_temp(pAsm, 2) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; + } + + return GL_TRUE; +} + +GLboolean assemble_src(r700_AssemblerBase *pAsm, + int src, + int fld) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if (fld == -1) + { + fld = src; + } + + if(pAsm->aArgSubst[1+src] >= 0) + { + assert(fld >= 0); + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src]; + } + else + { + switch (pILInst->SrcReg[src].File) + { + case PROGRAM_TEMPORARY: + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number; + break; + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: + if (1 == pILInst->SrcReg[src].RelAddr) + { + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0); + } + else + { + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + } + + pAsm->S[fld].src.rtype = SRC_REG_CONSTANT; + if(pILInst->SrcReg[src].Index < 0) + { + WARN_ONCE("Negative register offsets not supported yet!\n"); + pAsm->S[fld].src.reg = 0; + } + else + { + pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index; + } + break; + case PROGRAM_INPUT: + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + pAsm->S[fld].src.rtype = SRC_REG_INPUT; + switch (pAsm->currentShaderType) + { + case SPT_FP: + pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index]; + break; + case SPT_VP: + pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index]; + break; + } + break; + default: + radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File); + return GL_FALSE; + } + } + + pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7; + pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7; + pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7; + pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7; + + pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1; + pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1; + pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1; + pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1; + + return GL_TRUE; +} + +GLboolean assemble_dst(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + switch (pILInst->DstReg.File) + { + case PROGRAM_TEMPORARY: + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number; + break; + case PROGRAM_ADDRESS: + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_A0; + pAsm->D.dst.reg = 0; + break; + case PROGRAM_OUTPUT: + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_OUT; + switch (pAsm->currentShaderType) + { + case SPT_FP: + pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index]; + break; + case SPT_VP: + pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index]; + break; + } + break; + default: + radeon_error("Invalid destination output argument type\n"); + return GL_FALSE; + } + + pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1; + pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1; + pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1; + pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1; + + if(pILInst->SaturateMode == SATURATE_ZERO_ONE) + { + pAsm->D2.dst2.SaturateMode = 1; + } + else + { + pAsm->D2.dst2.SaturateMode = 0; + } + + return GL_TRUE; +} + +GLboolean tex_dst(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if(PROGRAM_TEMPORARY == pILInst->DstReg.File) + { + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + } + else if(PROGRAM_OUTPUT == pILInst->DstReg.File) + { + pAsm->D.dst.rtype = DST_REG_OUT; + switch (pAsm->currentShaderType) + { + case SPT_FP: + pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index]; + break; + case SPT_VP: + pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index]; + break; + } + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + } + else + { + radeon_error("Invalid destination output argument type\n"); + return GL_FALSE; + } + + pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1; + pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1; + pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1; + pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1; + + return GL_TRUE; +} + +GLboolean tex_src(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + GLboolean bValidTexCoord = GL_FALSE; + + if(pAsm->aArgSubst[1] >= 0) + { + bValidTexCoord = GL_TRUE; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->aArgSubst[1]; + } + else + { + switch (pILInst->SrcReg[0].File) { + case PROGRAM_UNIFORM: + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + break; + case PROGRAM_TEMPORARY: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + + pAsm->starting_temp_register_number; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + break; + case PROGRAM_INPUT: + if(SPT_VP == pAsm->currentShaderType) + { + switch (pILInst->SrcReg[0].Index) + { + case VERT_ATTRIB_TEX0: + case VERT_ATTRIB_TEX1: + case VERT_ATTRIB_TEX2: + case VERT_ATTRIB_TEX3: + case VERT_ATTRIB_TEX4: + case VERT_ATTRIB_TEX5: + case VERT_ATTRIB_TEX6: + case VERT_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + } + else + { + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_WPOS: + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_FOGC: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + case FRAG_ATTRIB_FACE: + fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n"); + break; + case FRAG_ATTRIB_PNTC: + fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n"); + break; + } + + if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) || + (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) ) + { + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + } + } + + break; + } + } + + if(GL_TRUE == bValidTexCoord) + { + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + } + else + { + radeon_error("Invalid source texcoord for TEX instruction\n"); + return GL_FALSE; + } + + pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7; + pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7; + pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7; + pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7; + + pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1; + pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1; + pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1; + pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1; + + return GL_TRUE; +} + +GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized) +{ + PVSSRC * texture_coordinate_source; + PVSSRC * texture_unit_source; + + R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction); + if (tex_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700TextureInstruction(tex_instruction_ptr); + + texture_coordinate_source = &(pAsm->S[0].src); + texture_unit_source = &(pAsm->S[1].src); + + tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode; + tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0; + tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + tex_instruction_ptr->m_Word0.f.alt_const = 0; + + if(SPT_VP == pAsm->currentShaderType) + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX; + pAsm->unVetTexBits |= 1 << texture_unit_source->reg; + } + else + { + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + } + + tex_instruction_ptr->m_Word1.f.lod_bias = 0x0; + if (normalized) { + tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED; + } else { + /* XXX: UNNORMALIZED tex coords have limited wrap modes */ + tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED; + } + + tex_instruction_ptr->m_Word2.f.offset_x = 0x0; + tex_instruction_ptr->m_Word2.f.offset_y = 0x0; + tex_instruction_ptr->m_Word2.f.offset_z = 0x0; + tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg; + + // dst + if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || + (pAsm->D.dst.rtype == DST_REG_OUT) ) + { + tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg; + tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + + tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; + tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; + + tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK); + tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK); + tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK); + tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK); + + + tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex; + tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley; + tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez; + tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew; + } + else + { + radeon_error("Only temp destination registers supported for TEX dest regs.\n"); + return GL_FALSE; + } + + if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +void initialize(r700_AssemblerBase *pAsm) +{ + GLuint cycle, component; + + for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++) + { + for (component=0; component<NUMBER_OF_COMPONENTS; component++) + { + pAsm->hw_gpr[cycle][component] = (-1); + } + } + for (component=0; component<NUMBER_OF_COMPONENTS; component++) + { + pAsm->hw_cfile_addr[component] = (-1); + pAsm->hw_cfile_chan[component] = (-1); + } +} + +GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, + int source_index, + PVSSRC* pSource, + BITS scalar_channel_index) +{ + BITS src_sel; + BITS src_rel; + BITS src_chan; + BITS src_neg; + + //-------------------------------------------------------------------------- + // Source for operands src0, src1. + // Values [0,127] correspond to GPR[0..127]. + // Values [256,511] correspond to cfile constants c[0..255]. + + //-------------------------------------------------------------------------- + // Other special values are shown in the list below. + + // 248 SQ_ALU_SRC_0: special constant 0.0. + // 249 SQ_ALU_SRC_1: special constant 1.0 float. + + // 250 SQ_ALU_SRC_1_INT: special constant 1 integer. + // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. + + // 252 SQ_ALU_SRC_0_5: special constant 0.5 float. + // 253 SQ_ALU_SRC_LITERAL: literal constant. + + // 254 SQ_ALU_SRC_PV: previous vector result. + // 255 SQ_ALU_SRC_PS: previous scalar result. + //-------------------------------------------------------------------------- + + BITS channel_swizzle; + switch (scalar_channel_index) + { + case 0: channel_swizzle = pSource->swizzlex; break; + case 1: channel_swizzle = pSource->swizzley; break; + case 2: channel_swizzle = pSource->swizzlez; break; + case 3: channel_swizzle = pSource->swizzlew; break; + default: channel_swizzle = SQ_SEL_MASK; break; + } + + if(channel_swizzle == SQ_SEL_0) + { + src_sel = SQ_ALU_SRC_0; + } + else if (channel_swizzle == SQ_SEL_1) + { + src_sel = SQ_ALU_SRC_1; + } + else + { + if ( (pSource->rtype == SRC_REG_TEMPORARY) || + (pSource->rtype == SRC_REG_INPUT) + ) + { + src_sel = pSource->reg; + } + else if (pSource->rtype == SRC_REG_CONSTANT) + { + src_sel = pSource->reg + CFILE_REGISTER_OFFSET; + } + else if (pSource->rtype == SRC_REC_LITERAL) + { + src_sel = SQ_ALU_SRC_LITERAL; + } + else + { + radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n", + source_index, pSource->rtype); + return GL_FALSE; + } + } + + if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) ) + { + src_rel = SQ_ABSOLUTE; + } + else + { + src_rel = SQ_RELATIVE; + } + + switch (channel_swizzle) + { + case SQ_SEL_X: + src_chan = SQ_CHAN_X; + break; + case SQ_SEL_Y: + src_chan = SQ_CHAN_Y; + break; + case SQ_SEL_Z: + src_chan = SQ_CHAN_Z; + break; + case SQ_SEL_W: + src_chan = SQ_CHAN_W; + break; + case SQ_SEL_0: + case SQ_SEL_1: + // Does not matter since src_sel controls + src_chan = SQ_CHAN_X; + break; + default: + radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle); + return GL_FALSE; + break; + } + + switch (scalar_channel_index) + { + case 0: src_neg = pSource->negx; break; + case 1: src_neg = pSource->negy; break; + case 2: src_neg = pSource->negz; break; + case 3: src_neg = pSource->negw; break; + default: src_neg = 0; break; + } + + switch (source_index) + { + case 0: + assert(alu_instruction_ptr); + alu_instruction_ptr->m_Word0.f.src0_sel = src_sel; + alu_instruction_ptr->m_Word0.f.src0_rel = src_rel; + alu_instruction_ptr->m_Word0.f.src0_chan = src_chan; + alu_instruction_ptr->m_Word0.f.src0_neg = src_neg; + break; + case 1: + assert(alu_instruction_ptr); + alu_instruction_ptr->m_Word0.f.src1_sel = src_sel; + alu_instruction_ptr->m_Word0.f.src1_rel = src_rel; + alu_instruction_ptr->m_Word0.f.src1_chan = src_chan; + alu_instruction_ptr->m_Word0.f.src1_neg = src_neg; + break; + case 2: + assert(alu_instruction_ptr); + alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel; + alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel; + alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan; + alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg; + break; + default: + radeon_error("Only three sources allowed in ALU opcodes.\n"); + return GL_FALSE; + break; + } + + return GL_TRUE; +} + +GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, + R700ALUInstruction* alu_instruction_ptr, + GLuint contiguous_slots_needed) +{ + if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) ) + { + return GL_FALSE; + } + + if ( pAsm->alu_x_opcode != 0 || + pAsm->cf_current_alu_clause_ptr == NULL || + ( (pAsm->cf_current_alu_clause_ptr != NULL) && + (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) ) + ) ) + { + + //new cf inst for this clause + pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause); + + // link the new cf to cf segment + if(NULL != pAsm->cf_current_alu_clause_ptr) + { + Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr ); + } + else + { + radeon_error("Could not allocate a new ALU CF instruction.\n"); + return GL_FALSE; + } + + pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP; + + pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0; + + pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0; + + if(pAsm->alu_x_opcode != 0) + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode; + pAsm->alu_x_opcode = 0; + } + else + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU; + } + + pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1; + } + else + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2); + } + + // If this clause constains any instruction that is forward dependent on a TEX instruction, + // set the whole_quad_mode for this clause + if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1; + } + + if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) ) + { + alu_instruction_ptr->m_Word0.f.last = 1; + } + + if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction) + { + pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr; + alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr; + } + + AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr); + + return GL_TRUE; +} + +void get_src_properties(R700ALUInstruction* alu_instruction_ptr, + int source_index, + BITS* psrc_sel, + BITS* psrc_rel, + BITS* psrc_chan, + BITS* psrc_neg) +{ + switch (source_index) + { + case 0: + *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ; + *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ; + *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan; + *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ; + break; + + case 1: + *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ; + *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ; + *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan; + *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ; + break; + + case 2: + *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel; + *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel; + *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan; + *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg; + break; + } +} + +int is_cfile(BITS sel) +{ + if (sel > 255 && sel < 512) + { + return 1; + } + return 0; +} + +int is_const(BITS sel) +{ + if (is_cfile(sel)) + { + return 1; + } + else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL) + { + return 1; + } + return 0; +} + +int is_gpr(BITS sel) +{ + if (sel >= 0 && sel < 128) + { + return 1; + } + return 0; +} + +const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000 + SQ_ALU_VEC_120, //001 + SQ_ALU_VEC_102, //010 + + SQ_ALU_VEC_201, //011 + SQ_ALU_VEC_012, //100 + SQ_ALU_VEC_021, //101 + + SQ_ALU_VEC_012, //110 + SQ_ALU_VEC_012}; //111 + +const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000 + SQ_ALU_SCL_122, //001 + SQ_ALU_SCL_122, //010 + + SQ_ALU_SCL_221, //011 + SQ_ALU_SCL_212, //100 + SQ_ALU_SCL_122, //101 + + SQ_ALU_SCL_122, //110 + SQ_ALU_SCL_122}; //111 + +GLboolean reserve_cfile(r700_AssemblerBase* pAsm, + GLuint sel, + GLuint chan) +{ + int res_match = (-1); + int res_empty = (-1); + + GLint res; + + for (res=3; res>=0; res--) + { + if(pAsm->hw_cfile_addr[ res] < 0) + { + res_empty = res; + } + else if( (pAsm->hw_cfile_addr[res] == (int)sel) + && + (pAsm->hw_cfile_chan[ res ] == (int) chan) ) + { + res_match = res; + } + } + + if(res_match >= 0) + { + // Read for this scalar component already reserved, nothing to do here. + ; + } + else if(res_empty >= 0) + { + pAsm->hw_cfile_addr[ res_empty ] = sel; + pAsm->hw_cfile_chan[ res_empty ] = chan; + } + else + { + radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n"); + return GL_FALSE; + } + return GL_TRUE; +} + +GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle) +{ + if(pAsm->hw_gpr[cycle][chan] < 0) + { + pAsm->hw_gpr[cycle][chan] = sel; + } + else if(pAsm->hw_gpr[cycle][chan] != (int)sel) + { + radeon_error("Another scalar operation has already used GPR read port for given channel\n"); + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle) +{ + switch (swiz) + { + case SQ_ALU_SCL_210: + { + int table[3] = {2, 1, 0}; + *pCycle = table[sel]; + return GL_TRUE; + } + break; + case SQ_ALU_SCL_122: + { + int table[3] = {1, 2, 2}; + *pCycle = table[sel]; + return GL_TRUE; + } + break; + case SQ_ALU_SCL_212: + { + int table[3] = {2, 1, 2}; + *pCycle = table[sel]; + return GL_TRUE; + } + break; + case SQ_ALU_SCL_221: + { + int table[3] = {2, 2, 1}; + *pCycle = table[sel]; + return GL_TRUE; + } + break; + default: + radeon_error("Bad Scalar bank swizzle value\n"); + break; + } + + return GL_FALSE; +} + +GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle) +{ + switch (swiz) + { + case SQ_ALU_VEC_012: + { + int table[3] = {0, 1, 2}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_021: + { + int table[3] = {0, 2, 1}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_120: + { + int table[3] = {1, 2, 0}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_102: + { + int table[3] = {1, 0, 2}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_201: + { + int table[3] = {2, 0, 1}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_210: + { + int table[3] = {2, 1, 0}; + *pCycle = table[sel]; + } + break; + default: + radeon_error("Bad Vec bank swizzle value\n"); + return GL_FALSE; + break; + } + + return GL_TRUE; +} + +GLboolean check_scalar(r700_AssemblerBase* pAsm, + R700ALUInstruction* alu_instruction_ptr) +{ + GLuint cycle; + GLuint bank_swizzle; + GLuint const_count = 0; + + BITS sel; + BITS chan; + BITS rel; + BITS neg; + + GLuint src; + + BITS src_sel [3] = {0,0,0}; + BITS src_chan[3] = {0,0,0}; + BITS src_rel [3] = {0,0,0}; + BITS src_neg [3] = {0,0,0}; + + GLuint swizzle_key; + + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); + + for (src=0; src<number_of_operands; src++) + { + get_src_properties(alu_instruction_ptr, + src, + &(src_sel[src]), + &(src_rel[src]), + &(src_chan[src]), + &(src_neg[src]) ); + } + + + swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) + + (is_const( src_sel[1] ) ? 2 : 0) + + (is_const( src_sel[2] ) ? 1 : 0) ); + + alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ]; + + for (src=0; src<number_of_operands; src++) + { + sel = src_sel [src]; + chan = src_chan[src]; + rel = src_rel [src]; + neg = src_neg [src]; + + if (is_const( sel )) + { + // Any constant, including literal and inline constants + const_count++; + + if (is_cfile( sel )) + { + reserve_cfile(pAsm, sel, chan); + } + + } + } + + for (src=0; src<number_of_operands; src++) + { + sel = src_sel [src]; + chan = src_chan[src]; + rel = src_rel [src]; + neg = src_neg [src]; + + if( is_gpr(sel) ) + { + bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle; + + if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) ) + { + return GL_FALSE; + } + + if(cycle < const_count) + { + if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) ) + { + return GL_FALSE; + } + } + } + } + + return GL_TRUE; +} + +GLboolean check_vector(r700_AssemblerBase* pAsm, + R700ALUInstruction* alu_instruction_ptr) +{ + GLuint cycle; + GLuint bank_swizzle; + GLuint const_count = 0; + + GLuint src; + + BITS sel; + BITS chan; + BITS rel; + BITS neg; + + BITS src_sel [3] = {0,0,0}; + BITS src_chan[3] = {0,0,0}; + BITS src_rel [3] = {0,0,0}; + BITS src_neg [3] = {0,0,0}; + + GLuint swizzle_key; + + GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); + + for (src=0; src<number_of_operands; src++) + { + get_src_properties(alu_instruction_ptr, + src, + &(src_sel[src]), + &(src_rel[src]), + &(src_chan[src]), + &(src_neg[src]) ); + } + + + swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) + + (is_const( src_sel[1] ) ? 2 : 0) + + (is_const( src_sel[2] ) ? 1 : 0) + ); + + alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key]; + + for (src=0; src<number_of_operands; src++) + { + sel = src_sel [src]; + chan = src_chan[src]; + rel = src_rel [src]; + neg = src_neg [src]; + + + bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle; + + if( is_gpr(sel) ) + { + if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) ) + { + return GL_FALSE; + } + + if ( (src == 1) && + (sel == src_sel[0]) && + (chan == src_chan[0]) ) + { + } + else + { + if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) ) + { + return GL_FALSE; + } + } + } + else if( is_const(sel) ) + { + const_count++; + + if( is_cfile(sel) ) + { + if( GL_FALSE == reserve_cfile(pAsm, sel, chan) ) + { + return GL_FALSE; + } + } + } + } + + return GL_TRUE; +} + +GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) +{ + R700ALUInstruction * alu_instruction_ptr = NULL; + R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl; + R700ALUInstructionFullLiteral * alu_instruction_ptr_fl; + + GLuint number_of_scalar_operations; + GLboolean is_single_scalar_operation; + GLuint scalar_channel_index; + + PVSSRC * pcurrent_source; + int current_source_index; + GLuint contiguous_slots_needed; + + GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3); + //GLuint channel_swizzle, j; + //GLuint chan_counter[4] = {0, 0, 0, 0}; + //PVSSRC * pSource[3]; + GLboolean bSplitInst = GL_FALSE; + + if (1 == pAsm->D.dst.math) + { + is_single_scalar_operation = GL_TRUE; + number_of_scalar_operations = 1; + } + else + { + is_single_scalar_operation = GL_FALSE; + number_of_scalar_operations = 4; + +/* current assembler doesn't do more than 1 register per source */ +#if 0 + /* check read port, only very preliminary algorithm, not count in + src0/1 same comp case and prev slot repeat case; also not count relative + addressing. TODO: improve performance. */ + for(j=0; j<uNumSrc; j++) + { + pSource[j] = &(pAsm->S[j].src); + } + for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++) + { + for(j=0; j<uNumSrc; j++) + { + switch (scalar_channel_index) + { + case 0: channel_swizzle = pSource[j]->swizzlex; break; + case 1: channel_swizzle = pSource[j]->swizzley; break; + case 2: channel_swizzle = pSource[j]->swizzlez; break; + case 3: channel_swizzle = pSource[j]->swizzlew; break; + default: channel_swizzle = SQ_SEL_MASK; break; + } + if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) || + (pSource[j]->rtype == SRC_REG_INPUT)) + && (channel_swizzle <= SQ_SEL_W) ) + { + chan_counter[channel_swizzle]++; + } + } + } + if( (chan_counter[SQ_SEL_X] > 3) + || (chan_counter[SQ_SEL_Y] > 3) + || (chan_counter[SQ_SEL_Z] > 3) + || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */ + { + bSplitInst = GL_TRUE; + } +#endif + } + + contiguous_slots_needed = 0; + + if(!is_single_scalar_operation) + { + contiguous_slots_needed = 4; + } + + contiguous_slots_needed += pAsm->D2.dst2.literal_slots; + + initialize(pAsm); + + for (scalar_channel_index=0; + scalar_channel_index < number_of_scalar_operations; + scalar_channel_index++) + { + if(scalar_channel_index == (number_of_scalar_operations-1)) + { + switch(pAsm->D2.dst2.literal_slots) + { + case 0: + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + break; + case 1: + alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral); + Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl; + break; + case 2: + alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral); + Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f); + alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl; + break; + }; + } + else + { + alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + Init_R700ALUInstruction(alu_instruction_ptr); + } + + //src 0 + current_source_index = 0; + pcurrent_source = &(pAsm->S[0].src); + + if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, + current_source_index, + pcurrent_source, + scalar_channel_index) ) + { + return GL_FALSE; + } + + if (uNumSrc > 1) + { + // Process source 1 + current_source_index = 1; + pcurrent_source = &(pAsm->S[current_source_index].src); + + if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, + current_source_index, + pcurrent_source, + scalar_channel_index) ) + { + return GL_FALSE; + } + } + + //other bits + alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode; + + if( (is_single_scalar_operation == GL_TRUE) + || (GL_TRUE == bSplitInst) ) + { + alu_instruction_ptr->m_Word0.f.last = 1; + } + else + { + alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0; + } + + alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0; + if(1 == pAsm->D.dst.predicated) + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1; + } + else + { + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + } + + // dst + if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || + (pAsm->D.dst.rtype == DST_REG_OUT) ) + { + alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; + } + else + { + radeon_error("Only temp destination registers supported for ALU dest regs.\n"); + return GL_FALSE; + } + + alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype + + if ( is_single_scalar_operation == GL_TRUE ) + { + // Override scalar_channel_index since only one scalar value will be written + if(pAsm->D.dst.writex) + { + scalar_channel_index = 0; + } + else if(pAsm->D.dst.writey) + { + scalar_channel_index = 1; + } + else if(pAsm->D.dst.writez) + { + scalar_channel_index = 2; + } + else if(pAsm->D.dst.writew) + { + scalar_channel_index = 3; + } + } + + alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; + + alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode; + + if (pAsm->D.dst.op3) + { + //op3 + + alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode; + + //There's 3rd src for op3 + current_source_index = 2; + pcurrent_source = &(pAsm->S[current_source_index].src); + + if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, + current_source_index, + pcurrent_source, + scalar_channel_index) ) + { + return GL_FALSE; + } + } + else + { + //op2 + if (pAsm->bR6xx) + { + alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; + + alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs; + + //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; + //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; + switch (scalar_channel_index) + { + case 0: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex; + break; + case 1: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey; + break; + case 2: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez; + break; + case 3: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; + break; + default: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK; + break; + } + alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; + } + else + { + alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; + + alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs; + alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs; + + //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + switch (scalar_channel_index) + { + case 0: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex; + break; + case 1: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey; + break; + case 2: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez; + break; + case 3: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; + break; + default: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK; + break; + } + alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; + } + } + + if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) + { + return GL_FALSE; + } + + /* + * Judge the type of current instruction, is it vector or scalar + * instruction. + */ + if (is_single_scalar_operation) + { + if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) + { + return GL_FALSE; + } + } + else + { + if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) + { + return GL_FALSE; + } + } + + contiguous_slots_needed -= 1; + } + + return GL_TRUE; +} + +GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // opcode tmp.x, a.x + // MOV dst, tmp.x + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // Now replicate result to all necessary channels in destination + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_ABS(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->S[1].bits = pAsm->S[0].bits; + flipneg_PVSSRC(&(pAsm->S[1].src)); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_ADD(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB) + { + flipneg_PVSSRC(&(pAsm->S[1].src)); + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_ARL(r700_AssemblerBase *pAsm) +{ /* TODO: ar values dont' persist between clauses */ + if( GL_FALSE == checkop1(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_BAD(char *opcode_str) +{ + radeon_error("Not yet implemented instruction (%s)\n", opcode_str); + return GL_FALSE; +} + +GLboolean assemble_CMP(r700_AssemblerBase *pAsm) +{ + int tmp; + + if( GL_FALSE == checkop3(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE; + pAsm->D.dst.op3 = 1; + + tmp = (-1); + + if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + //OP3 has no support for write mask + tmp = gethelpr(pAsm); + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + nomask_PVSDST(&(pAsm->D.dst)); + } + else + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 2, 1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, 2) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + //tmp for source + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + noneg_PVSSRC(&(pAsm->S[0].src)); + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) +{ + int tmp; + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + assemble_src(pAsm, 0, -1); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; + next_ins(pAsm); + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + + assemble_dst(pAsm); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + + //TODO - replicate if more channels set in WriteMask + return GL_TRUE; + +} + +GLboolean assemble_DOT(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_DOT4; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode) + { + zerocomp_PVSSRC(&(pAsm->S[0].src), 3); + zerocomp_PVSSRC(&(pAsm->S[1].src), 3); + } + else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH) + { + onecomp_PVSSRC(&(pAsm->S[0].src), 3); + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_DST(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + onecomp_PVSSRC(&(pAsm->S[0].src), 0); + onecomp_PVSSRC(&(pAsm->S[0].src), 3); + + onecomp_PVSSRC(&(pAsm->S[1].src), 0); + onecomp_PVSSRC(&(pAsm->S[1].src), 2); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_EX2(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE); +} + +GLboolean assemble_EXP(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // FLOOR tmp.x, a.x + // EX2 dst.x tmp.x + + if (pAsm->pILInst->DstReg.WriteMask & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + // FRACT dst.y a.x + + if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + // EX2 dst.z, a.x + + if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + // MOV dst.w 1.0 + + if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) { + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean assemble_FLR(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR; + + if ( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT); +} + +GLboolean assemble_FRC(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + if ( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if(pILInst->Opcode == OPCODE_KIL) + checkop1(pAsm); + + pAsm->D.dst.opcode = opcode; + //pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = 0; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if(pILInst->Opcode == OPCODE_KIL_NV) + { + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = 0; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1); + neg_PVSSRC(&(pAsm->S[1].src)); + } + else + { + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* Doc says KILL has to be last(end) ALU clause */ + pAsm->pR700Shader->killIsUsed = GL_TRUE; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + return GL_TRUE; +} + +GLboolean assemble_LG2(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE); +} + +GLboolean assemble_LRP(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + if( GL_FALSE == checkop3(pAsm) ) + { + return GL_FALSE; + } + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + nomask_PVSDST(&(pAsm->D.dst)); + + + if( GL_FALSE == assemble_src(pAsm, 1, 0) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == assemble_src(pAsm, 2, 1) ) + { + return GL_FALSE; + } + + neg_PVSSRC(&(pAsm->S[1].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 2, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_LOG(r700_AssemblerBase *pAsm) +{ + BITS tmp1, tmp2, tmp3; + + checkop1(pAsm); + + tmp1 = gethelpr(pAsm); + tmp2 = gethelpr(pAsm); + tmp3 = gethelpr(pAsm); + + // FIXME: The hardware can do fabs() directly on input + // elements, but the compiler doesn't have the + // capability to use that. + + // MAX tmp1.x, a.x, -a.x (fabs(a.x)) + + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->S[1].bits = pAsm->S[0].bits; + flipneg_PVSSRC(&(pAsm->S[1].src)); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // Entire algo: + // + // LG2 tmp2.x, tmp1.x + // FLOOR tmp3.x, tmp2.x + // MOV dst.x, tmp3.x + // ADD tmp3.x, tmp2.x, -tmp3.x + // EX2 dst.y, tmp3.x + // MOV dst.z, tmp2.x + // MOV dst.w, 1.0 + + // LG2 tmp2.x, tmp1.x + // FLOOR tmp3.x, tmp2.x + + pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp3; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.x, tmp3.x + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp3; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // ADD tmp3.x, tmp2.x, -tmp3.x + // EX2 dst.y, tmp3.x + + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp3; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = DST_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp3; + + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + neg_PVSSRC(&(pAsm->S[1].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp3; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.z, tmp2.x + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.w 1.0 + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm) +{ + int tmp, ii; + GLboolean bReplaceDst = GL_FALSE; + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if( GL_FALSE == checkop3(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + tmp = (-1); + + if(PROGRAM_TEMPORARY == pILInst->DstReg.File) + { /* TODO : more investigation on MAD src and dst using same register */ + for(ii=0; ii<3; ii++) + { + if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File) + && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) ) + { + bReplaceDst = GL_TRUE; + break; + } + } + } + if(0xF != pILInst->DstReg.WriteMask) + { /* OP3 has no support for write mask */ + bReplaceDst = GL_TRUE; + } + + if(GL_TRUE == bReplaceDst) + { + tmp = gethelpr(pAsm); + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + nomask_PVSDST(&(pAsm->D.dst)); + } + else + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 2, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + if (GL_TRUE == bReplaceDst) + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + //tmp for source + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + noneg_PVSSRC(&(pAsm->S[0].src)); + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} + +/* LIT dst, src */ +GLboolean assemble_LIT(r700_AssemblerBase *pAsm) +{ + unsigned int dstReg; + unsigned int dstType; + unsigned int srcReg; + unsigned int srcType; + checkop1(pAsm); + int tmp = gethelpr(pAsm); + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + dstReg = pAsm->D.dst.reg; + dstType = pAsm->D.dst.rtype; + srcReg = pAsm->S[0].src.reg; + srcType = pAsm->S[0].src.rtype; + + /* dst.xw, <- 1.0 */ + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + pAsm->D.dst.rtype = dstType; + pAsm->D.dst.reg = dstReg; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 1; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_1; + pAsm->S[0].src.swizzley = SQ_SEL_1; + pAsm->S[0].src.swizzlez = SQ_SEL_1; + pAsm->S[0].src.swizzlew = SQ_SEL_1; + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + /* dst.y = max(src.x, 0.0) */ + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + pAsm->D.dst.rtype = dstType; + pAsm->D.dst.reg = dstReg; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + pAsm->S[0].src.rtype = srcType; + pAsm->S[0].src.reg = srcReg; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_0; + pAsm->S[1].src.swizzley = SQ_SEL_0; + pAsm->S[1].src.swizzlez = SQ_SEL_0; + pAsm->S[1].src.swizzlew = SQ_SEL_0; + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y); + + /* dst.z = log(src.y) */ + pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED; + pAsm->D.dst.math = 1; + pAsm->D.dst.rtype = dstType; + pAsm->D.dst.reg = dstReg; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 0; + pAsm->S[0].src.rtype = srcType; + pAsm->S[0].src.reg = srcReg; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 2) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W); + + swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X); + + /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */ + pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT; + pAsm->D.dst.math = 1; + pAsm->D.dst.op3 = 1; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + pAsm->S[0].src.rtype = srcType; + pAsm->S[0].src.reg = srcReg; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = dstReg; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_Z; + pAsm->S[1].src.swizzley = SQ_SEL_Z; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_Z; + + pAsm->S[2].src.rtype = srcType; + pAsm->S[2].src.reg = srcReg; + setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* dst.z = exp(tmp.x) */ + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + pAsm->D.dst.rtype = dstType; + pAsm->D.dst.reg = dstReg; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 0; + + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_X; + pAsm->S[0].src.swizzlez = SQ_SEL_X; + pAsm->S[0].src.swizzlew = SQ_SEL_X; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MAX(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MIN(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MIN; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MOV(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if (GL_FALSE == assemble_dst(pAsm)) + { + return GL_FALSE; + } + + if (GL_FALSE == assemble_src(pAsm, 0, -1)) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MUL(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_POW(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // LG2 tmp.x, a.swizzle + pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MUL tmp.x, tmp.x, b.swizzle + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // EX2 dst.mask, tmp.x + // EX2 tmp.x, tmp.x + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // Now replicate result to all necessary channels in destination + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_RCP(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE); +} + +GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE); +} + +GLboolean assemble_SCS(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + /* tmp.x = src /2*PI */ + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + assemble_src(pAsm, 0, -1); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1/(3.1415926535 * 2); + pAsm->C[1].f = 0.0F; + + next_ins(pAsm); + + // COS dst.x, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_COS; + pAsm->D.dst.math = 1; + + assemble_dst(pAsm); + /* mask y */ + pAsm->D.dst.writey = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // SIN dst.y, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_SIN; + pAsm->D.dst.math = 1; + + assemble_dst(pAsm); + /* mask x */ + pAsm->D.dst.writex = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = opcode; + //pAsm->D.dst.math = 1; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + pAsm->D.dst.predicated = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->uHelpReg; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number; + pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7; + noneg_PVSSRC(&(pAsm->S[0].src)); + + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = pAsm->uHelpReg; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_0; + pAsm->S[1].src.swizzley = SQ_SEL_0; + pAsm->S[1].src.swizzlez = SQ_SEL_0; + pAsm->S[1].src.swizzlew = SQ_SEL_0; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_SGE(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_SETGE; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_SLT(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_SETGT; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, 0) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_STP(r700_AssemblerBase *pAsm) +{ + return GL_TRUE; +} + +GLboolean assemble_TEX(r700_AssemblerBase *pAsm) +{ + GLboolean src_const; + GLboolean need_barrier = GL_FALSE; + + checkop1(pAsm); + + switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) + { + case PROGRAM_UNIFORM: + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + src_const = GL_TRUE; + break; + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + default: + src_const = GL_FALSE; + break; + } + + if (GL_TRUE == src_const) + { + if ( GL_FALSE == mov_temp(pAsm, 0) ) + return GL_FALSE; + need_barrier = GL_TRUE; + } + + if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) + { + GLuint tmp = gethelpr(pAsm); + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + pAsm->D.dst.math = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W); + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->aArgSubst[1] = tmp; + need_barrier = GL_TRUE; + } + + if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX ) + { + GLuint tmp1 = gethelpr(pAsm); + GLuint tmp2 = gethelpr(pAsm); + + /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ + pAsm->D.dst.opcode = SQ_OP2_INST_CUBE; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* tmp1.z = RCP_e(|tmp1.z|) */ + pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE; + pAsm->D.dst.math = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writez = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + pAsm->S[0].src.swizzlex = SQ_SEL_Z; + pAsm->S[0].src.abs = 1; + + next_ins(pAsm); + + /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x + * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x + * muladd has no writemask, have to use another temp + */ + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp1; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z); + setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); + /* immediate c 1.5 */ + pAsm->D2.dst2.literal_slots = 1; + pAsm->C[0].f = 1.5F; + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + pAsm->S[2].src.reg = tmp1; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X); + + next_ins(pAsm); + + /* tmp1.xy = temp2.xy */ + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + next_ins(pAsm); + pAsm->aArgSubst[1] = tmp1; + need_barrier = GL_TRUE; + + } + + switch(pAsm->pILInst[pAsm->uiCurInst].Opcode) + { + case OPCODE_DDX: + /* will these need WQM(1) on CF inst ? */ + pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H; + break; + case OPCODE_DDY: + pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V; + break; + case OPCODE_TXB: + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L; + break; + default: + if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1) + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C; + else + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + } + + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) + + pAsm->is_tex = GL_TRUE; + if ( GL_TRUE == need_barrier ) + { + pAsm->need_tex_barrier = GL_TRUE; + } + // Set src1 to tex unit id + pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit]; + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + + //No sw info from mesa compiler, so hard code here. + pAsm->S[1].src.swizzlex = SQ_SEL_X; + pAsm->S[1].src.swizzley = SQ_SEL_Y; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_W; + + if( GL_FALSE == tex_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == tex_src(pAsm) ) + { + return GL_FALSE; + } + + if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP) + { + /* hopefully did swizzles before */ + noswizzle_PVSSRC(&(pAsm->S[0].src)); + } + + if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX) + { + /* SAMPLE dst, tmp.yxwy, CUBE */ + pAsm->S[0].src.swizzlex = SQ_SEL_Y; + pAsm->S[0].src.swizzley = SQ_SEL_X; + pAsm->S[0].src.swizzlez = SQ_SEL_W; + pAsm->S[0].src.swizzlew = SQ_SEL_Y; + } + + if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1) + { + /* compare value goes to w chan ? */ + pAsm->S[0].src.swizzlew = SQ_SEL_Z; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* add ARB shadow ambient but clamp to 0..1 */ + if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1) + { + /* ADD_SAT dst, dst, ambient[texunit] */ + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + pAsm->D2.dst2.SaturateMode = 1; + + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->D.dst.reg; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + noneg_PVSSRC(&(pAsm->S[0].src)); + + pAsm->S[1].src.rtype = SRC_REG_CONSTANT; + pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit]; + noswizzle_PVSSRC(&(pAsm->S[1].src)); + noneg_PVSSRC(&(pAsm->S[1].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + } + + return GL_TRUE; +} + +GLboolean assemble_XPD(r700_AssemblerBase *pAsm) +{ + BITS tmp1; + BITS tmp2 = 0; + + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + tmp1 = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp1; + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + tmp2 = gethelpr(pAsm); + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp2; + + nomask_PVSDST(&(pAsm->D.dst)); + } + else + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0); + + // result1 + (neg) result0 + setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE); + pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[2].src.reg = tmp1; + + neg_PVSSRC(&(pAsm->S[2].src)); + noswizzle_PVSSRC(&(pAsm->S[2].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + + if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + // Use tmp as source + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp2; + + noneg_PVSSRC(&(pAsm->S[0].src)); + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) +{ + return GL_TRUE; +} + +static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason) +{ + switch (uReason) + { + case FC_PUSH_VPM: + pAsm->CALLSTACK[pAsm->CALLSP].current--; + break; + case FC_PUSH_WQM: + pAsm->CALLSTACK[pAsm->CALLSP].current -= 4; + break; + case FC_LOOP: + pAsm->CALLSTACK[pAsm->CALLSP].current -= 4; + break; + case FC_REP: + /* TODO : for 16 vp asic, should -= 2; */ + pAsm->CALLSTACK[pAsm->CALLSP].current -= 1; + break; + }; +} + +static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly) +{ + if(GL_TRUE == bCheckMaxOnly) + { + switch (uReason) + { + case FC_PUSH_VPM: + if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1) + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current + 1; + } + break; + case FC_PUSH_WQM: + if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4) + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current + 4; + } + break; + } + return; + } + + switch (uReason) + { + case FC_PUSH_VPM: + pAsm->CALLSTACK[pAsm->CALLSP].current++; + break; + case FC_PUSH_WQM: + pAsm->CALLSTACK[pAsm->CALLSP].current += 4; + break; + case FC_LOOP: + pAsm->CALLSTACK[pAsm->CALLSP].current += 4; + break; + case FC_REP: + /* TODO : for 16 vp asic, should += 2; */ + pAsm->CALLSTACK[pAsm->CALLSP].current += 1; + break; + }; + + if(pAsm->CALLSTACK[pAsm->CALLSP].current + > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = + pAsm->CALLSTACK[pAsm->CALLSP].current; + } +} + +GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset; + + return GL_TRUE; +} + +GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + + return GL_TRUE; +} + +GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse) +{ + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + if(GL_TRUE != bHasElse) + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + } + else + { + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + } + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_IF; + pAsm->fc_stack[pAsm->FCSP].mid = NULL; + pAsm->fc_stack[pAsm->FCSP].midLen= 0; + pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; + +#ifndef USE_CF_FOR_POP_AFTER + if(GL_TRUE != bHasElse) + { + pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER; + } +#endif /* USE_CF_FOR_POP_AFTER */ + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE); + + return GL_TRUE; +} + +GLboolean assemble_ELSE(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; /// + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid, + 0, + sizeof(R700ControlFlowGenericClause *) ); + pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr; + //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1; + +#ifndef USE_CF_FOR_POP_AFTER + pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER; +#endif /* USE_CF_FOR_POP_AFTER */ + + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1; + + return GL_TRUE; +} + +GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm) +{ +#ifdef USE_CF_FOR_POP_AFTER + pops(pAsm, 1); +#endif /* USE_CF_FOR_POP_AFTER */ + + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + if(NULL == pAsm->fc_stack[pAsm->FCSP].mid) + { + /* no else in between */ + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode; + } + else + { + pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode; + } + + if(NULL != pAsm->fc_stack[pAsm->FCSP].mid) + { + FREE(pAsm->fc_stack[pAsm->FCSP].mid); + } + + if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF) + { + radeon_error("if/endif in shader code are not paired. \n"); + return GL_FALSE; + } + + pAsm->FCSP--; + + decreaseCurrent(pAsm, FC_PUSH_VPM); + + return GL_TRUE; +} + +GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP; + pAsm->fc_stack[pAsm->FCSP].mid = NULL; + pAsm->fc_stack[pAsm->FCSP].unNumMid = 0; + pAsm->fc_stack[pAsm->FCSP].midLen = 0; + pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr; + + checkStackDepth(pAsm, FC_LOOP, GL_FALSE); + + return GL_TRUE; +} + +GLboolean assemble_BRK(r700_AssemblerBase *pAsm) +{ +#ifdef USE_CF_FOR_CONTINUE_BREAK + + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + unsigned int unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + break; + } + } + if(0 == FC_LOOP) + { + radeon_error("Break is not inside loop/endloop pair.\n"); + return GL_FALSE; + } + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + +#endif //USE_CF_FOR_CONTINUE_BREAK + return GL_TRUE; +} + +GLboolean assemble_CONT(r700_AssemblerBase *pAsm) +{ +#ifdef USE_CF_FOR_CONTINUE_BREAK + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE); + + unsigned int unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + break; + } + } + if(0 == FC_LOOP) + { + radeon_error("Continue is not inside loop/endloop pair.\n"); + return GL_FALSE; + } + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + +#endif /* USE_CF_FOR_CONTINUE_BREAK */ + + return GL_TRUE; +} + +GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm) +{ + GLuint i; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1; + pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1; + +#ifdef USE_CF_FOR_CONTINUE_BREAK + for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++) + { + pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex; + } + if(NULL != pAsm->fc_stack[pAsm->FCSP].mid) + { + FREE(pAsm->fc_stack[pAsm->FCSP].mid); + } +#endif + + if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP) + { + radeon_error("loop/endloop in shader code are not paired. \n"); + return GL_FALSE; + } + + GLuint unFCSP; + GLuint unIF = 0; + if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0) + { + for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + breakLoopOnFlag(pAsm, unFCSP); + break; + } + else if(FC_IF == pAsm->fc_stack[unFCSP].type) + { + unIF++; + } + } + if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry) + { +#ifdef USE_CF_FOR_POP_AFTER + returnOnFlag(pAsm, unIF); +#else + returnOnFlag(pAsm, 0); +#endif /* USE_CF_FOR_POP_AFTER */ + pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET; + } + } + + pAsm->FCSP--; + + decreaseCurrent(pAsm, FC_LOOP); + + return GL_TRUE; +} + +void add_return_inst(r700_AssemblerBase *pAsm) +{ + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return; + } + //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; +} + +GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift) +{ + /* Put in sub */ + if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize ) + { + pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs, + sizeof(SUB_OFFSET) * pAsm->unSubArraySize, + sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) ); + if(NULL == pAsm->subs) + { + return GL_FALSE; + } + pAsm->unSubArraySize += 10; + } + + pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL; + pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0; + + pAsm->CALLSP++; + pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer; + pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP; + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local + = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local); + pAsm->CALLSTACK[pAsm->CALLSP].max = 0; + pAsm->CALLSTACK[pAsm->CALLSP].current = 0; + SetActiveCFlist(pAsm->pR700Shader, + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); + + pAsm->unSubArrayPointer++; + + /* start sub */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + pAsm->FCSP++; + pAsm->fc_stack[pAsm->FCSP].type = FC_REP; + + checkStackDepth(pAsm, FC_REP, GL_FALSE); + + return GL_TRUE; +} + +GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm) +{ + if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP) + { + radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n"); + return GL_FALSE; + } + + /* copy max to sub structure */ + pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax + = pAsm->CALLSTACK[pAsm->CALLSP].max; + + decreaseCurrent(pAsm, FC_REP); + + pAsm->CALLSP--; + SetActiveCFlist(pAsm->pR700Shader, + pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local); + + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + pAsm->FCSP--; + + return GL_TRUE; +} + +GLboolean assemble_RET(r700_AssemblerBase *pAsm) +{ + GLuint unIF = 0; + + if(pAsm->CALLSP > 0) + { /* in sub */ + GLuint unFCSP; + for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--) + { + if(FC_LOOP == pAsm->fc_stack[unFCSP].type) + { + setRetInLoopFlag(pAsm, SQ_SEL_1); + breakLoopOnFlag(pAsm, unFCSP); + pAsm->unCFflags |= LOOPRET_FLAGS; + + return GL_TRUE; + } + else if(FC_IF == pAsm->fc_stack[unFCSP].type) + { + unIF++; + } + } + } + +#ifdef USE_CF_FOR_POP_AFTER + if(unIF > 0) + { + pops(pAsm, unIF); + } +#endif /* USE_CF_FOR_POP_AFTER */ + + add_return_inst(pAsm); + + return GL_TRUE; +} + +GLboolean assemble_CAL(r700_AssemblerBase *pAsm, + GLint nILindex, + GLuint uiIL_Shift, + GLuint uiNumberInsts, + struct prog_instruction *pILInst, + PRESUB_DESC * pPresubDesc) +{ + GLint uiIL_Offset; + + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + /* Put in caller */ + if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize ) + { + pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers, + sizeof(CALLER_POINTER) * pAsm->unCallerArraySize, + sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) ); + if(NULL == pAsm->callers) + { + return GL_FALSE; + } + pAsm->unCallerArraySize += 10; + } + + uiIL_Offset = nILindex + uiIL_Shift; + pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset; + pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr; + + pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL; + pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL; + + pAsm->unCallerArrayPointer++; + + int j; + GLuint max; + GLuint unSubID; + GLboolean bRet; + for(j=0; j<pAsm->unSubArrayPointer; j++) + { + if(uiIL_Offset == pAsm->subs[j].subIL_Offset) + { /* compiled before */ + + max = pAsm->subs[j].unStackDepthMax + + pAsm->CALLSTACK[pAsm->CALLSP].current; + if(max > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = max; + } + + pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j; + return GL_TRUE; + } + } + + pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer; + unSubID = pAsm->unSubArrayPointer; + + bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm); + + if(GL_TRUE == bRet) + { + max = pAsm->subs[unSubID].unStackDepthMax + + pAsm->CALLSTACK[pAsm->CALLSP].current; + if(max > pAsm->CALLSTACK[pAsm->CALLSP].max) + { + pAsm->CALLSTACK[pAsm->CALLSP].max = max; + } + + pAsm->subs[unSubID].pPresubDesc = pPresubDesc; + } + + return bRet; +} + +GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue) +{ + /*GLfloat fLiteral[2] = {0.1, 0.0};*/ + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + pAsm->D.dst.op3 = 0; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->flag_reg_index; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + pAsm->D2.dst2.literal_slots = 1; + pAsm->D2.dst2.SaturateMode = SATURATE_OFF; + pAsm->D.dst.predicated = 0; + /* in reloc where dislink flag init inst, only one slot alu inst is handled. */ + pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */ + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ +#if 0 + pAsm->S[0].src.rtype = SRC_REC_LITERAL; + //pAsm->S[0].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_Y; + pAsm->S[0].src.swizzlez = SQ_SEL_Z; + pAsm->S[0].src.swizzlew = SQ_SEL_W; + + if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) ) + { + return GL_FALSE; + } +#else + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = flagValue; + pAsm->S[0].src.swizzley = flagValue; + pAsm->S[0].src.swizzlez = flagValue; + pAsm->S[0].src.swizzlew = flagValue; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } +#endif + + return GL_TRUE; +} + +GLboolean testFlag(r700_AssemblerBase *pAsm) +{ + /*GLfloat fLiteral[2] = {0.1, 0.0};*/ + + //Test flag + GLuint tmp = gethelpr(pAsm); + pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE; + + pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE; + pAsm->D.dst.math = 1; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + pAsm->D2.dst2.literal_slots = 1; + pAsm->D2.dst2.SaturateMode = SATURATE_OFF; + pAsm->D.dst.predicated = 1; + pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */ + + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->flag_reg_index; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_Y; + pAsm->S[0].src.swizzlez = SQ_SEL_Z; + pAsm->S[0].src.swizzlew = SQ_SEL_W; +#if 0 + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + //pAsm->S[1].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_X; + pAsm->S[1].src.swizzley = SQ_SEL_Y; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_W; + + if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) ) + { + return GL_FALSE; + } +#else + pAsm->S[1].src.rtype = DST_REG_TEMPORARY; + pAsm->S[1].src.reg = 0; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_1; + pAsm->S[1].src.swizzley = SQ_SEL_1; + pAsm->S[1].src.swizzlez = SQ_SEL_1; + pAsm->S[1].src.swizzlew = SQ_SEL_1; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } +#endif + + checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE); + + return GL_TRUE; +} + +GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF) +{ + testFlag(pAsm); + jumpToOffest(pAsm, 1, 4); + setRetInLoopFlag(pAsm, SQ_SEL_0); + pops(pAsm, unIF + 1); + add_return_inst(pAsm); + + return GL_TRUE; +} + +GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP) +{ + testFlag(pAsm); + + //break + if(GL_FALSE == add_cf_instruction(pAsm) ) + { + return GL_FALSE; + } + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK; + pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1; + + pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( + (void *)pAsm->fc_stack[unFCSP].mid, + sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid, + sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) ); + pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr; + pAsm->fc_stack[unFCSP].unNumMid++; + + pops(pAsm, 1); + + return GL_TRUE; +} + +GLboolean AssembleInstr(GLuint uiFirstInst, + GLuint uiIL_Shift, + GLuint uiNumberInsts, + struct prog_instruction *pILInst, + r700_AssemblerBase *pR700AsmCode) +{ + GLuint i; + + pR700AsmCode->pILInst = pILInst; + for(i=uiFirstInst; i<uiNumberInsts; i++) + { + pR700AsmCode->uiCurInst = i; + +#ifndef USE_CF_FOR_CONTINUE_BREAK + if(OPCODE_BRK == pILInst[i+1].Opcode) + { + switch(pILInst[i].Opcode) + { + case OPCODE_SLE: + pILInst[i].Opcode = OPCODE_SGT; + break; + case OPCODE_SLT: + pILInst[i].Opcode = OPCODE_SGE; + break; + case OPCODE_SGE: + pILInst[i].Opcode = OPCODE_SLT; + break; + case OPCODE_SGT: + pILInst[i].Opcode = OPCODE_SLE; + break; + case OPCODE_SEQ: + pILInst[i].Opcode = OPCODE_SNE; + break; + case OPCODE_SNE: + pILInst[i].Opcode = OPCODE_SEQ; + break; + default: + break; + } + } +#endif + if(pILInst[i].CondUpdate == 1) + { + /* remember dest register used for cond evaluation */ + /* XXX also handle PROGRAM_OUTPUT registers here? */ + pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index; + } + + switch (pILInst[i].Opcode) + { + case OPCODE_ABS: + if ( GL_FALSE == assemble_ABS(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_ADD: + case OPCODE_SUB: + if ( GL_FALSE == assemble_ADD(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_ARL: + if ( GL_FALSE == assemble_ARL(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_ARR: + radeon_error("Not yet implemented instruction OPCODE_ARR \n"); + //if ( GL_FALSE == assemble_BAD("ARR") ) + return GL_FALSE; + break; + + case OPCODE_CMP: + if ( GL_FALSE == assemble_CMP(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_COS: + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) ) + return GL_FALSE; + break; + + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_DPH: + if ( GL_FALSE == assemble_DOT(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_DST: + if ( GL_FALSE == assemble_DST(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_EX2: + if ( GL_FALSE == assemble_EX2(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_EXP: + if ( GL_FALSE == assemble_EXP(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_FLR: + if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) + return GL_FALSE; + break; + //case OP_FLR_INT: ; + + // if ( GL_FALSE == assemble_FLR_INT() ) + // return GL_FALSE; + // break; + + case OPCODE_FRC: + if ( GL_FALSE == assemble_FRC(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_KIL: + case OPCODE_KIL_NV: + if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) ) + return GL_FALSE; + break; + case OPCODE_LG2: + if ( GL_FALSE == assemble_LG2(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LIT: + if ( GL_FALSE == assemble_LIT(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LRP: + if ( GL_FALSE == assemble_LRP(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LOG: + if ( GL_FALSE == assemble_LOG(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_MAD: + if ( GL_FALSE == assemble_MAD(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_MAX: + if ( GL_FALSE == assemble_MAX(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_MIN: + if ( GL_FALSE == assemble_MIN(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_MOV: + if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_MUL: + if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_NOISE1: + { + callPreSub(pR700AsmCode, + GLSL_NOISE1, + &noise1_presub, + pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number, + 1); + radeon_error("noise1: not yet supported shader instruction\n"); + }; + break; + case OPCODE_NOISE2: + radeon_error("noise2: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE3: + radeon_error("noise3: not yet supported shader instruction\n"); + break; + case OPCODE_NOISE4: + radeon_error("noise4: not yet supported shader instruction\n"); + break; + + case OPCODE_POW: + if ( GL_FALSE == assemble_POW(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_RCP: + if ( GL_FALSE == assemble_RCP(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_RSQ: + if ( GL_FALSE == assemble_RSQ(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_SIN: + if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) ) + return GL_FALSE; + break; + case OPCODE_SCS: + if ( GL_FALSE == assemble_SCS(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_SEQ: + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) ) + { + return GL_FALSE; + } + break; + + case OPCODE_SGT: + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) + { + return GL_FALSE; + } + break; + + case OPCODE_SGE: + if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/ + case OPCODE_SLT: + { + struct prog_src_register SrcRegSave[2]; + SrcRegSave[0] = pILInst[i].SrcReg[0]; + SrcRegSave[1] = pILInst[i].SrcReg[1]; + pILInst[i].SrcReg[0] = SrcRegSave[1]; + pILInst[i].SrcReg[1] = SrcRegSave[0]; + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + } + break; + + case OPCODE_SLE: + { + struct prog_src_register SrcRegSave[2]; + SrcRegSave[0] = pILInst[i].SrcReg[0]; + SrcRegSave[1] = pILInst[i].SrcReg[1]; + pILInst[i].SrcReg[0] = SrcRegSave[1]; + pILInst[i].SrcReg[1] = SrcRegSave[0]; + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) ) + { + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + return GL_FALSE; + } + pILInst[i].SrcReg[0] = SrcRegSave[0]; + pILInst[i].SrcReg[1] = SrcRegSave[1]; + } + break; + + case OPCODE_SNE: + if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) ) + { + return GL_FALSE; + } + break; + + //case OP_STP: + // if ( GL_FALSE == assemble_STP(pR700AsmCode) ) + // return GL_FALSE; + // break; + + case OPCODE_SWZ: + if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) + { + return GL_FALSE; + } + else + { + if( (i+1)<uiNumberInsts ) + { + if(OPCODE_END != pILInst[i+1].Opcode) + { + if( GL_TRUE == IsTex(pILInst[i+1].Opcode) ) + { + pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1? + } + } + } + } + break; + case OPCODE_DDX: + case OPCODE_DDY: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + if ( GL_FALSE == assemble_TEX(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_TRUNC: + if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) ) + return GL_FALSE; + break; + + case OPCODE_XPD: + if ( GL_FALSE == assemble_XPD(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_IF: + { + GLboolean bHasElse = GL_FALSE; + + if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE) + { + bHasElse = GL_TRUE; + } + + if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) ) + { + return GL_FALSE; + } + } + break; + + case OPCODE_ELSE : + if ( GL_FALSE == assemble_ELSE(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_ENDIF: + if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_BGNLOOP: + if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_BRK: + if( GL_FALSE == assemble_BRK(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_CONT: + if( GL_FALSE == assemble_CONT(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_ENDLOOP: + if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_BGNSUB: + if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) ) + { + return GL_FALSE; + } + break; + + case OPCODE_RET: + if( GL_FALSE == assemble_RET(pR700AsmCode) ) + { + return GL_FALSE; + } + break; + + case OPCODE_CAL: + if( GL_FALSE == assemble_CAL(pR700AsmCode, + pILInst[i].BranchTarget, + uiIL_Shift, + uiNumberInsts, + pILInst, + NULL) ) + { + return GL_FALSE; + } + break; + + //case OPCODE_EXPORT: + // if ( GL_FALSE == assemble_EXPORT() ) + // return GL_FALSE; + // break; + + case OPCODE_ENDSUB: + return assemble_ENDSUB(pR700AsmCode); + + case OPCODE_END: + //pR700AsmCode->uiCurInst = i; + //This is to remaind that if in later exoort there is depth/stencil + //export, we need a mov to re-arrange DST channel, where using a + //psuedo inst, we will use this end inst to do it. + return GL_TRUE; + + default: + radeon_error("internal: unknown instruction\n"); + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean InitShaderProgram(r700_AssemblerBase * pAsm) +{ + setRetInLoopFlag(pAsm, SQ_SEL_0); + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + return GL_TRUE; +} + +GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg) +{ + GLuint i; + GLuint unCFoffset; + TypedShaderList * plstCFmain; + TypedShaderList * plstCFsub; + + R700ShaderInstruction * pInst; + R700ControlFlowGenericClause * pCFInst; + + R700ControlFlowALUClause * pCF_ALU; + R700ALUInstruction * pALU; + GLuint unConstOffset = 0; + GLuint unRegOffset; + GLuint unMinRegIndex; + + plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local; + + /* remove flags init if they are not used */ + if((pAsm->unCFflags & HAS_LOOPRET) == 0) + { + R700ControlFlowALUClause * pCF_ALU; + pInst = plstCFmain->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + if(0 == pCF_ALU->m_Word1.f.count) + { + pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP; + } + else + { + R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction; + + pALU->m_pLinkedALUClause = NULL; + pALU = (R700ALUInstruction *)(pALU->pNextInst); + pALU->m_pLinkedALUClause = pCF_ALU; + pCF_ALU->m_pLinkedALUInstruction = pALU; + + pCF_ALU->m_Word1.f.count--; + } + break; + } + pInst = pInst->pNextInst; + }; + } + + if(pAsm->CALLSTACK[0].max > 0) + { + pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2; + } + + if(0 == pAsm->unSubArrayPointer) + { + return GL_TRUE; + } + + unCFoffset = plstCFmain->uNumOfNode; + + if(NULL != pILProg->Parameters) + { + unConstOffset = pILProg->Parameters->NumParameters; + } + + /* Reloc subs */ + for(i=0; i<pAsm->unSubArrayPointer; i++) + { + pAsm->subs[i].unCFoffset = unCFoffset; + plstCFsub = &(pAsm->subs[i].lstCFInstructions_local); + + pInst = plstCFsub->pHead; + + /* reloc instructions */ + while(pInst) + { + if(SIT_CF_GENERIC == pInst->m_ShaderInstType) + { + pCFInst = (R700ControlFlowGenericClause *)pInst; + + switch (pCFInst->m_Word1.f.cf_inst) + { + case SQ_CF_INST_POP: + case SQ_CF_INST_JUMP: + case SQ_CF_INST_ELSE: + case SQ_CF_INST_LOOP_END: + case SQ_CF_INST_LOOP_START: + case SQ_CF_INST_LOOP_START_NO_AL: + case SQ_CF_INST_LOOP_CONTINUE: + case SQ_CF_INST_LOOP_BREAK: + pCFInst->m_Word0.f.addr += unCFoffset; + break; + default: + break; + } + } + + pInst->m_uIndex += unCFoffset; + + pInst = pInst->pNextInst; + }; + + if(NULL != pAsm->subs[i].pPresubDesc) + { + GLuint uNumSrc; + + unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg; + unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart; + + pInst = plstCFsub->pHead; + while(pInst) + { + if(SIT_CF_ALU == pInst->m_ShaderInstType) + { + pCF_ALU = (R700ControlFlowALUClause *)pInst; + + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + + if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src0_sel += unConstOffset; + } + + if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F) + >= SQ_OP3_INST_MUL_LIT ) + { /* op3 : 3 srcs */ + if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word1_OP3.f.src2_sel += unConstOffset; + } + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + else + { + if(pAsm->bR6xx) + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0); + } + else + { + uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0); + } + if(2 == uNumSrc) + { /* 2 srcs */ + if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE) + { + pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex; + } + else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE) + { + pALU->m_Word0.f.src1_sel += unConstOffset; + } + } + } + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + pInst = pInst->pNextInst; + }; + } + + /* Put sub into main */ + plstCFmain->pTail->pNextInst = plstCFsub->pHead; + plstCFmain->pTail = plstCFsub->pTail; + plstCFmain->uNumOfNode += plstCFsub->uNumOfNode; + + unCFoffset += plstCFsub->uNumOfNode; + } + + /* reloc callers */ + for(i=0; i<pAsm->unCallerArrayPointer; i++) + { + pAsm->callers[i].cf_ptr->m_Word0.f.addr + = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset; + + if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc) + { + unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex; + unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg; + + if(NULL != pAsm->callers[i].prelude_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + if(NULL != pAsm->callers[i].finale_cf_ptr) + { + pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr); + pALU = pCF_ALU->m_pLinkedALUInstruction; + for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++) + { + pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex; + pALU = (R700ALUInstruction*)(pALU->pNextInst); + } + } + } + } + + return GL_TRUE; +} + +GLboolean callPreSub(r700_AssemblerBase* pAsm, + LOADABLE_SCRIPT_SIGNITURE scriptSigniture, + COMPILED_SUB * pCompiledSub, + GLshort uOutReg, + GLshort uNumValidSrc) +{ + /* save assemble context */ + GLuint starting_temp_register_number_save; + GLuint number_used_registers_save; + GLuint uFirstHelpReg_save; + GLuint uHelpReg_save; + GLuint uiCurInst_save; + struct prog_instruction *pILInst_save; + PRESUB_DESC * pPresubDesc; + GLboolean bRet; + int i; + + R700ControlFlowGenericClause* prelude_cf_ptr = NULL; + + /* copy srcs to presub inputs */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + for(i=0; i<uNumValidSrc; i++) + { + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i]; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 1; + + if( GL_FALSE == assemble_src(pAsm, i, 0) ) + { + return GL_FALSE; + } + + next_ins(pAsm); + } + if(uNumValidSrc > 0) + { + prelude_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + /* browse thro existing presubs. */ + for(i=0; i<pAsm->unNumPresub; i++) + { + if(pAsm->presubs[i].sptSigniture == scriptSigniture) + { + break; + } + } + + if(i == pAsm->unNumPresub) + { /* not loaded yet */ + /* save assemble context */ + number_used_registers_save = pAsm->number_used_registers; + uFirstHelpReg_save = pAsm->uFirstHelpReg; + uHelpReg_save = pAsm->uHelpReg; + starting_temp_register_number_save = pAsm->starting_temp_register_number; + pILInst_save = pAsm->pILInst; + uiCurInst_save = pAsm->uiCurInst; + + /* alloc in presub */ + if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize ) + { + pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs, + sizeof(PRESUB_DESC) * pAsm->unPresubArraySize, + sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) ); + if(NULL == pAsm->presubs) + { + radeon_error("No memeory to allocate built in shader function description structures. \n"); + return GL_FALSE; + } + pAsm->unPresubArraySize += 4; + } + + pPresubDesc = &(pAsm->presubs[i]); + pPresubDesc->sptSigniture = scriptSigniture; + + /* constants offsets need to be final resolved at reloc. */ + if(0 == pAsm->unNumPresub) + { + pPresubDesc->unConstantsStart = 0; + } + else + { + pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart + + pAsm->presubs[i-1].pCompiledSub->NumParameters; + } + + pPresubDesc->pCompiledSub = pCompiledSub; + + pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts; + pPresubDesc->maxStartReg = uFirstHelpReg_save; + pAsm->unCurNumILInsts += pCompiledSub->NumInstructions; + + pAsm->unNumPresub++; + + /* setup new assemble context */ + pAsm->starting_temp_register_number = 0; + pAsm->number_used_registers = pCompiledSub->NumTemporaries; + pAsm->uFirstHelpReg = pAsm->number_used_registers; + pAsm->uHelpReg = pAsm->uFirstHelpReg; + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + + + pPresubDesc->number_used_registers = pAsm->number_used_registers; + + /* restore assemble context */ + pAsm->number_used_registers = number_used_registers_save; + pAsm->uFirstHelpReg = uFirstHelpReg_save; + pAsm->uHelpReg = uHelpReg_save; + pAsm->starting_temp_register_number = starting_temp_register_number_save; + pAsm->pILInst = pILInst_save; + pAsm->uiCurInst = uiCurInst_save; + } + else + { /* was loaded */ + pPresubDesc = &(pAsm->presubs[i]); + + bRet = assemble_CAL(pAsm, + 0, + pPresubDesc->subIL_Shift, + pCompiledSub->NumInstructions, + pCompiledSub->Instructions, + pPresubDesc); + } + + if(GL_FALSE == bRet) + { + radeon_error("Shader presub assemble failed. \n"); + } + else + { + /* copy presub output to real dst */ + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pCompiledSub->dstRegIndex; + pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX; + pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY; + pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ; + pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW; + + next_ins(pAsm); + + pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr; + pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr; + pAsm->alu_x_opcode = SQ_CF_INST_ALU; + } + + if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers ) + { + pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg; + } + if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg) + { + pPresubDesc->maxStartReg = pAsm->uFirstHelpReg; + } + + return bRet; +} + +GLboolean Process_Export(r700_AssemblerBase* pAsm, + GLuint type, + GLuint export_starting_index, + GLuint export_count, + GLuint starting_register_number, + GLboolean is_depth_export) +{ + unsigned char ucWriteMask; + + check_current_clause(pAsm, CF_EMPTY_CLAUSE); + check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr + + pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type; + + switch (type) + { + case SQ_EXPORT_PIXEL: + if(GL_TRUE == is_depth_export) + { + pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index; + } + break; + + case SQ_EXPORT_POS: + pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index; + break; + + case SQ_EXPORT_PARAM: + pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index; + break; + + default: + radeon_error("Unknown export type: %d\n", type); + return GL_FALSE; + break; + } + + pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number; + + pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE; + pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3; + + pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1); + pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE + pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1; + + if (export_count == 1) + { + assert(starting_register_number >= pAsm->starting_export_register_number); + + ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number]; + /* exports Z as a float into Red channel */ + if (GL_TRUE == is_depth_export) + ucWriteMask = 0x1; + + if( (ucWriteMask & 0x1) != 0) + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK; + } + if( ((ucWriteMask>>1) & 0x1) != 0) + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK; + } + if( ((ucWriteMask>>2) & 0x1) != 0) + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK; + } + if( ((ucWriteMask>>3) & 0x1) != 0) + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK; + } + } + else + { + // This should only be used if all components for all registers have been written + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X; + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y; + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z; + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W; + } + + pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr; + + return GL_TRUE; +} + +GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select) +{ + gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END + pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV; + + // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->depth_export_register_number; + + pAsm->D.dst.writex = 1; // depth goes in R channel for HW + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->depth_export_register_number; + + setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select); + + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save; + + return GL_TRUE; +} + +GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, + GLbitfield OutputsWritten) +{ + unsigned int unBit; + GLuint export_count = 0; + unsigned int i; + + if(pR700AsmCode->depth_export_register_number >= 0) + { + if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth + { + return GL_FALSE; + } + } + + for (i = 0; i < FRAG_RESULT_MAX; ++i) + { + unBit = 1 << i; + + if (OutputsWritten & unBit) + { + GLboolean is_depth = i == FRAG_RESULT_DEPTH ? GL_TRUE : GL_FALSE; + if (!Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->uiFP_OutputMap[i], is_depth)) + return GL_FALSE; + ++export_count; + } + } + + /* Need to export something, otherwise we'll hang + * results are undefined anyway */ + if(export_count == 0) + { + Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE); + } + + if(pR700AsmCode->cf_last_export_ptr != NULL) + { + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1; + } + + return GL_TRUE; +} + +GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, + GLbitfield OutputsWritten) +{ + unsigned int unBit; + unsigned int i; + + GLuint export_starting_index = 0; + GLuint export_count = pR700AsmCode->number_of_exports; + + unBit = 1 << VERT_RESULT_HPOS; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_POS, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS], + GL_FALSE) ) + { + return GL_FALSE; + } + export_starting_index++; + export_count--; + } + + unBit = 1 << VERT_RESULT_PSIZ; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_POS, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_PSIZ], + GL_FALSE) ) + { + return GL_FALSE; + } + export_count--; + } + + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + + + pR700AsmCode->number_of_exports = export_count; + export_starting_index = 0; + + unBit = 1 << VERT_RESULT_COL0; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + + unBit = 1 << VERT_RESULT_COL1; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + + unBit = 1 << VERT_RESULT_FOGC; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + + for(i=0; i<8; i++) + { + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + } + + for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++) + { + unBit = 1 << i; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[i], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + } + + // At least one param should be exported + if (export_count) + { + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + } + else + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + 0, + 1, + pR700AsmCode->starting_export_register_number, + GL_FALSE) ) + { + return GL_FALSE; + } + + pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0; + pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0; + pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0; + pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1; + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + } + + pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1; + + return GL_TRUE; +} + +GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode) +{ + FREE(pR700AsmCode->pucOutMask); + FREE(pR700AsmCode->pInstDeps); + + if(NULL != pR700AsmCode->subs) + { + FREE(pR700AsmCode->subs); + } + if(NULL != pR700AsmCode->callers) + { + FREE(pR700AsmCode->callers); + } + + if(NULL != pR700AsmCode->presubs) + { + FREE(pR700AsmCode->presubs); + } + + return GL_TRUE; +} + |