From 27c07b6b28cb5d9f0b2ba446846670a234e48228 Mon Sep 17 00:00:00 2001 From: Richard Li Date: Tue, 21 Apr 2009 12:54:20 -0400 Subject: Initial pull of code from r6xx-r7xx-support branch Not functional yet. --- src/mesa/drivers/dri/r600/r700_assembler.c | 4068 ++++++++++++++++++++++++++++ 1 file changed, 4068 insertions(+) create mode 100644 src/mesa/drivers/dri/r600/r700_assembler.c (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c new file mode 100644 index 0000000000..5ceb3fdd9b --- /dev/null +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -0,0 +1,4068 @@ +/* + * Copyright (C) 2008-2009 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Authors: + * Richard Li , + */ + +#include +#include +#include +#include +#include + +#include "main/mtypes.h" +#include "main/imports.h" + +#include "r600_context.h" +#include "r700_chip.h" +#include "r700_debug.h" + +#include "r700_assembler.h" + +BITS addrmode_PVSDST(PVSDST * pPVSDST) +{ + return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1); +} + +void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode) +{ + pPVSDST->addrmode0 = addrmode & 1; + pPVSDST->addrmode1 = (addrmode >> 1) & 1; +} + +void nomask_PVSDST(PVSDST * pPVSDST) +{ + pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1; +} + +BITS addrmode_PVSSRC(PVSSRC* pPVSSRC) +{ + return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1); +} + +void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode) +{ + pPVSSRC->addrmode0 = addrmode & 1; + pPVSSRC->addrmode1 = (addrmode >> 1) & 1; +} + + +void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz) +{ + pPVSSRC->swizzlex = + pPVSSRC->swizzley = + pPVSSRC->swizzlez = + pPVSSRC->swizzlew = swz; +} + +void noswizzle_PVSSRC(PVSSRC* pPVSSRC) +{ + pPVSSRC->swizzlex = SQ_SEL_X; + pPVSSRC->swizzley = SQ_SEL_Y; + pPVSSRC->swizzlez = SQ_SEL_Z; + pPVSSRC->swizzlew = SQ_SEL_W; +} + +void +swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w) +{ + switch (x) + { + case SQ_SEL_X: x = pPVSSRC->swizzlex; + break; + case SQ_SEL_Y: x = pPVSSRC->swizzley; + break; + case SQ_SEL_Z: x = pPVSSRC->swizzlez; + break; + case SQ_SEL_W: x = pPVSSRC->swizzlew; + break; + default:; + } + + switch (y) + { + case SQ_SEL_X: y = pPVSSRC->swizzlex; + break; + case SQ_SEL_Y: y = pPVSSRC->swizzley; + break; + case SQ_SEL_Z: y = pPVSSRC->swizzlez; + break; + case SQ_SEL_W: y = pPVSSRC->swizzlew; + break; + default:; + } + + switch (z) + { + case SQ_SEL_X: z = pPVSSRC->swizzlex; + break; + case SQ_SEL_Y: z = pPVSSRC->swizzley; + break; + case SQ_SEL_Z: z = pPVSSRC->swizzlez; + break; + case SQ_SEL_W: z = pPVSSRC->swizzlew; + break; + default:; + } + + switch (w) + { + case SQ_SEL_X: w = pPVSSRC->swizzlex; + break; + case SQ_SEL_Y: w = pPVSSRC->swizzley; + break; + case SQ_SEL_Z: w = pPVSSRC->swizzlez; + break; + case SQ_SEL_W: w = pPVSSRC->swizzlew; + break; + default:; + } + + pPVSSRC->swizzlex = x; + pPVSSRC->swizzley = y; + pPVSSRC->swizzlez = z; + pPVSSRC->swizzlew = w; +} + +void neg_PVSSRC(PVSSRC* pPVSSRC) +{ + pPVSSRC->negx = 1; + pPVSSRC->negy = 1; + pPVSSRC->negz = 1; + pPVSSRC->negw = 1; +} + +void noneg_PVSSRC(PVSSRC* pPVSSRC) +{ + pPVSSRC->negx = 0; + pPVSSRC->negy = 0; + pPVSSRC->negz = 0; + pPVSSRC->negw = 0; +} + +// negate argument (for SUB instead of ADD and alike) +void flipneg_PVSSRC(PVSSRC* pPVSSRC) +{ + pPVSSRC->negx = !pPVSSRC->negx; + pPVSSRC->negy = !pPVSSRC->negy; + pPVSSRC->negz = !pPVSSRC->negz; + pPVSSRC->negw = !pPVSSRC->negw; +} + +void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c) +{ + switch (c) + { + case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break; + case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break; + case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break; + case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break; + default:; + } +} + +void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c) +{ + switch (c) + { + case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break; + case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break; + case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break; + case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break; + default:; + } +} + +BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0) +{ + return (pOutVTXFmt0->point_size | + pOutVTXFmt0->edge_flag | + pOutVTXFmt0->rta_index | + pOutVTXFmt0->kill_flag | + pOutVTXFmt0->viewport_index); +} + +BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt) +{ + return (pFPOutFmt->depth | + pFPOutFmt->stencil_ref | + pFPOutFmt->mask | + pFPOutFmt->coverage_to_mask); +} + +GLboolean is_reduction_opcode(PVSDWORD* dest) +{ + if (dest->dst.op3 == 0) + { + if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE) ) + { + return GL_TRUE; + } + } + return GL_FALSE; +} + +GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) +{ + GLuint format = FMT_INVALID; + GLuint uiElemSize = 0; + + switch (eType) + { + case GL_BYTE: + case GL_UNSIGNED_BYTE: + uiElemSize = 1; + switch(nChannels) + { + case 1: + format = FMT_8; break; + case 2: + format = FMT_8_8; break; + case 3: + format = FMT_8_8_8; break; + case 4: + format = FMT_8_8_8_8; break; + default: + break; + } + break; + + case GL_UNSIGNED_SHORT: + case GL_SHORT: + uiElemSize = 2; + switch(nChannels) + { + case 1: + format = FMT_16; break; + case 2: + format = FMT_16_16; break; + case 3: + format = FMT_16_16_16; break; + case 4: + format = FMT_16_16_16_16; break; + default: + break; + } + break; + + case GL_UNSIGNED_INT: + case GL_INT: + uiElemSize = 4; + switch(nChannels) + { + case 1: + format = FMT_32; break; + case 2: + format = FMT_32_32; break; + case 3: + format = FMT_32_32_32; break; + case 4: + format = FMT_32_32_32_32; break; + default: + break; + } + break; + + case GL_FLOAT: + uiElemSize = 4; + switch(nChannels) + { + case 1: + format = FMT_32_FLOAT; break; + case 2: + format = FMT_32_32_FLOAT; break; + case 3: + format = FMT_32_32_32_FLOAT; break; + case 4: + format = FMT_32_32_32_32_FLOAT; break; + default: + break; + } + break; + case GL_DOUBLE: + uiElemSize = 8; + switch(nChannels) + { + case 1: + format = FMT_32_FLOAT; break; + case 2: + format = FMT_32_32_FLOAT; break; + case 3: + format = FMT_32_32_32_FLOAT; break; + case 4: + format = FMT_32_32_32_32_FLOAT; break; + default: + break; + } + break; + default: + ; + //GL_ASSERT_NO_CASE(); + } + + if(NULL != pClient_size) + { + *pClient_size = uiElemSize * nChannels; + } + + return(format); +} + +unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) +{ + if(pAsm->D.dst.op3) + { + return 3; + } + + switch (pAsm->D.dst.opcode) + { + case SQ_OP2_INST_ADD: + case SQ_OP2_INST_MUL: + case SQ_OP2_INST_MAX: + case SQ_OP2_INST_MIN: + //case SQ_OP2_INST_MAX_DX10: + //case SQ_OP2_INST_MIN_DX10: + case SQ_OP2_INST_SETGT: + case SQ_OP2_INST_SETGE: + case SQ_OP2_INST_PRED_SETE: + case SQ_OP2_INST_PRED_SETGT: + case SQ_OP2_INST_PRED_SETGE: + case SQ_OP2_INST_PRED_SETNE: + case SQ_OP2_INST_DOT4: + case SQ_OP2_INST_DOT4_IEEE: + return 2; + + case SQ_OP2_INST_MOV: + case SQ_OP2_INST_FRACT: + case SQ_OP2_INST_FLOOR: + case SQ_OP2_INST_KILLGT: + case SQ_OP2_INST_EXP_IEEE: + case SQ_OP2_INST_LOG_CLAMPED: + case SQ_OP2_INST_LOG_IEEE: + case SQ_OP2_INST_RECIP_IEEE: + case SQ_OP2_INST_RECIPSQRT_IEEE: + case SQ_OP2_INST_FLT_TO_INT: + case SQ_OP2_INST_SIN: + case SQ_OP2_INST_COS: + return 1; + + default: r700_error(TODO_ASM_NEEDIMPINST, + "Need instruction operand number. \n");; + }; + + return 3; +} + +int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader) +{ + GLuint i; + + Init_R700_Shader(pShader); + pAsm->pR700Shader = pShader; + pAsm->currentShaderType = spt; + + pAsm->cf_last_export_ptr = NULL; + + pAsm->cf_current_export_clause_ptr = NULL; + pAsm->cf_current_alu_clause_ptr = NULL; + pAsm->cf_current_tex_clause_ptr = NULL; + pAsm->cf_current_vtx_clause_ptr = NULL; + pAsm->cf_current_cf_clause_ptr = NULL; + + // No clause has been created yet + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + + pAsm->number_of_colorandz_exports = 0; + pAsm->number_of_exports = 0; + pAsm->number_of_export_opcodes = 0; + + + pAsm->D.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; + + pAsm->uLastPosUpdate = 0; + + *(BITS *) &pAsm->fp_stOutFmt0 = 0; + + pAsm->uIIns = 0; + pAsm->uOIns = 0; + pAsm->number_used_registers = 0; + pAsm->uUsedConsts = 256; + + + // Fragment programs + pAsm->uBoolConsts = 0; + pAsm->uIntConsts = 0; + pAsm->uInsts = 0; + pAsm->uConsts = 0; + + pAsm->FCSP = 0; + pAsm->fc_stack[0].type = FC_NONE; + + pAsm->branch_depth = 0; + pAsm->max_branch_depth = 0; + + pAsm->aArgSubst[0] = + pAsm->aArgSubst[1] = + pAsm->aArgSubst[2] = + pAsm->aArgSubst[3] = (-1); + + pAsm->uOutputs = 0; + + for (i=0; icolor_export_register_number[i] = (-1); + } + + + pAsm->depth_export_register_number = (-1); + pAsm->stencil_export_register_number = (-1); + pAsm->coverage_to_mask_export_register_number = (-1); + pAsm->mask_export_register_number = (-1); + + pAsm->starting_export_register_number = 0; + pAsm->starting_vfetch_register_number = 0; + pAsm->starting_temp_register_number = 0; + pAsm->uFirstHelpReg = 0; + + + pAsm->input_position_is_used = GL_FALSE; + pAsm->input_normal_is_used = GL_FALSE; + + + for (i=0; iinput_color_is_used[ i ] = GL_FALSE; + } + + for (i=0; iinput_texture_unit_is_used[ i ] = GL_FALSE; + } + + for (i=0; ivfetch_instruction_ptr_array[ i ] = NULL; + } + + pAsm->number_of_inputs = 0; + + return 0; +} + +GLboolean IsTex(gl_inst_opcode Opcode) +{ + if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ) + { + return GL_TRUE; + } + return GL_FALSE; +} + +GLboolean IsAlu(gl_inst_opcode Opcode) +{ + //TODO : more for fc and ex for higher spec. + if( IsTex(Opcode) ) + { + return GL_FALSE; + } + return GL_TRUE; +} + +int check_current_clause(r700_AssemblerBase* pAsm, + CF_CLAUSE_TYPE new_clause_type) +{ + if (pAsm->cf_current_clause_type != new_clause_type) + { //Close last open clause + switch (pAsm->cf_current_clause_type) + { + case CF_ALU_CLAUSE: + if ( pAsm->cf_current_alu_clause_ptr != NULL) + { + pAsm->cf_current_alu_clause_ptr = NULL; + } + break; + case CF_VTX_CLAUSE: + if ( pAsm->cf_current_vtx_clause_ptr != NULL) + { + pAsm->cf_current_vtx_clause_ptr = NULL; + } + break; + case CF_TEX_CLAUSE: + if ( pAsm->cf_current_tex_clause_ptr != NULL) + { + pAsm->cf_current_tex_clause_ptr = NULL; + } + break; + case CF_EXPORT_CLAUSE: + if ( pAsm->cf_current_export_clause_ptr != NULL) + { + pAsm->cf_current_export_clause_ptr = NULL; + } + break; + case CF_OTHER_CLAUSE: + if ( pAsm->cf_current_cf_clause_ptr != NULL) + { + pAsm->cf_current_cf_clause_ptr = NULL; + } + break; + case CF_EMPTY_CLAUSE: + break; + default: + r700_error(ERROR_ASM_VTX_CLAUSE, + "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type); + return GL_FALSE; + } + + pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE; + + // Create new clause + switch (new_clause_type) + { + case CF_ALU_CLAUSE: + pAsm->cf_current_clause_type = CF_ALU_CLAUSE; + break; + case CF_VTX_CLAUSE: + pAsm->cf_current_clause_type = CF_VTX_CLAUSE; + break; + case CF_TEX_CLAUSE: + pAsm->cf_current_clause_type = CF_TEX_CLAUSE; + break; + case CF_EXPORT_CLAUSE: + { + R700ControlFlowSXClause* pR700ControlFlowSXClause + = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause); + + // Add new export instruction to control flow program + if (pR700ControlFlowSXClause != 0) + { + pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause; + Init_R700ControlFlowSXClause(pR700ControlFlowSXClause); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pR700ControlFlowSXClause ); + } + else + { + r700_error(ERROR_ASM_ALLOCEXPORTCF, + "Error allocating new EXPORT CF instruction in check_current_clause. \n"); + return GL_FALSE; + } + pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE; + } + break; + case CF_EMPTY_CLAUSE: + break; + case CF_OTHER_CLAUSE: + pAsm->cf_current_clause_type = CF_OTHER_CLAUSE; + break; + default: + r700_error(ERROR_ASM_UNKOWNCLAUSE, + "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type); + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm, + R700VertexInstruction* vertex_instruction_ptr) +{ + if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) ) + { + return GL_FALSE; + } + + if( pAsm->cf_current_vtx_clause_ptr == NULL || + ( (pAsm->cf_current_vtx_clause_ptr != NULL) && + (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1) + ) ) + { + // Create new Vfetch control flow instruction for this new clause + pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause); + + if (pAsm->cf_current_vtx_clause_ptr != NULL) + { + Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr ); + } + else + { + r700_error(ERROR_ASM_ALLOCVTXCF, "Could not allocate a new VFetch CF instruction."); + return GL_FALSE; + } + + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1; + + LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr ); + } + else + { + pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++; + } + + AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr); + + return GL_TRUE; +} + +GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, + R700TextureInstruction* tex_instruction_ptr) +{ + if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) ) + { + return GL_FALSE; + } + + if ( pAsm->cf_current_tex_clause_ptr == NULL || + ( (pAsm->cf_current_tex_clause_ptr != NULL) && + (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1) + ) ) + { + // new tex cf instruction for this new clause + pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause); + + if (pAsm->cf_current_tex_clause_ptr != NULL) + { + Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr ); + } + else + { + r700_error(ERROR_ASM_ALLOCTEXCF, "Could not allocate a new TEX CF instruction."); + return GL_FALSE; + } + + pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE; + + pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1; + } + else + { + pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++; + } + + // If this clause constains any TEX instruction that is dependent on a previous instruction, + // set the barrier bit + if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) + { + pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1; + } + + if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction) + { + pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr; + tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr; + } + + AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr); + + return GL_TRUE; +} + +GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm, + GLuint gl_client_id, + GLuint destination_register, + GLuint number_of_elements, + GLenum dataElementType, + VTX_FETCH_METHOD* pFetchMethod) +{ + GLuint client_size_inbyte; + GLuint data_format; + GLuint mega_fetch_count; + GLuint is_mega_fetch_flag; + + R700VertexGenericFetch* vfetch_instruction_ptr; + R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ]; + + if (assembled_vfetch_instruction_ptr == NULL) + { + vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch); + if (vfetch_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700VertexGenericFetch(vfetch_instruction_ptr); + } + else + { + vfetch_instruction_ptr = assembled_vfetch_instruction_ptr; + } + + data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte); + + if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here + { + //TODO : mini fetch + } + else + { + mega_fetch_count = MEGA_FETCH_BYTES - 1; + is_mega_fetch_flag = 0x1; + pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte; + } + + vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH; + vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA; + vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + + vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id; + vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0; + vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X; + vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count; + + vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X; + vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y; + vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z; + vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W; + + vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1; + + // Destination register + vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register; + vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE; + + vfetch_instruction_ptr->m_Word2.f.offset = 0; + vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0; + + vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag; + + if (assembled_vfetch_instruction_ptr == NULL) + { + if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) ) + { + return GL_FALSE; + } + + if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL) + { + return GL_FALSE; + } + else + { + pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr; + } + } + + return GL_TRUE; +} + +GLuint gethelpr(r700_AssemblerBase* pAsm) +{ + GLuint r = pAsm->uHelpReg; + pAsm->uHelpReg++; + if (pAsm->uHelpReg > pAsm->number_used_registers) + { + pAsm->number_used_registers = pAsm->uHelpReg; + } + return r; +} +void resethelpr(r700_AssemblerBase* pAsm) +{ + pAsm->uHelpReg = pAsm->uFirstHelpReg; +} + +void checkop_init(r700_AssemblerBase* pAsm) +{ + resethelpr(pAsm); + pAsm->aArgSubst[0] = + pAsm->aArgSubst[1] = + pAsm->aArgSubst[2] = + pAsm->aArgSubst[3] = -1; +} + +GLboolean mov_temp(r700_AssemblerBase* pAsm, int src) +{ + GLuint tmp = gethelpr(pAsm); + + //mov src to temp helper gpr. + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, src, 0) ) + { + return GL_FALSE; + } + + noswizzle_PVSSRC(&(pAsm->S[0].src)); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->aArgSubst[1 + src] = tmp; + + return GL_TRUE; +} + +GLboolean checkop1(r700_AssemblerBase* pAsm) +{ + checkop_init(pAsm); + return GL_TRUE; +} + +GLboolean checkop2(r700_AssemblerBase* pAsm) +{ + GLboolean bSrcConst[2]; + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + checkop_init(pAsm); + + if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[0] = GL_TRUE; + } + else + { + bSrcConst[0] = GL_FALSE; + } + if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[1] = GL_TRUE; + } + else + { + bSrcConst[1] = GL_FALSE; + } + + if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) ) + { + if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index) + { + if( GL_FALSE == mov_temp(pAsm, 1) ) + { + return GL_FALSE; + } + } + } + + return GL_TRUE; +} + +GLboolean checkop3(r700_AssemblerBase* pAsm) +{ + GLboolean bSrcConst[3]; + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + checkop_init(pAsm); + + if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[0] = GL_TRUE; + } + else + { + bSrcConst[0] = GL_FALSE; + } + if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[1] = GL_TRUE; + } + else + { + bSrcConst[1] = GL_FALSE; + } + if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) || + (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) || + (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) || + (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) ) + { + bSrcConst[2] = GL_TRUE; + } + else + { + bSrcConst[2] = GL_FALSE; + } + + if( (GL_TRUE == bSrcConst[0]) && + (GL_TRUE == bSrcConst[1]) && + (GL_TRUE == bSrcConst[2]) ) + { + if( GL_FALSE == mov_temp(pAsm, 1) ) + { + return GL_FALSE; + } + if( GL_FALSE == mov_temp(pAsm, 2) ) + { + return GL_FALSE; + } + + return GL_TRUE; + } + else if( (GL_TRUE == bSrcConst[0]) && + (GL_TRUE == bSrcConst[1]) ) + { + if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index) + { + if( GL_FALSE == mov_temp(pAsm, 1) ) + { + return 1; + } + } + + return GL_TRUE; + } + else if ( (GL_TRUE == bSrcConst[0]) && + (GL_TRUE == bSrcConst[2]) ) + { + if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index) + { + if( GL_FALSE == mov_temp(pAsm, 2) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; + } + else if( (GL_TRUE == bSrcConst[1]) && + (GL_TRUE == bSrcConst[2]) ) + { + if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index) + { + if( GL_FALSE == mov_temp(pAsm, 2) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; + } + + return GL_TRUE; +} + +GLboolean assemble_src(r700_AssemblerBase *pAsm, + int src, + int fld) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if (fld == -1) + { + fld = src; + } + + if(pAsm->aArgSubst[1+src] >= 0) + { + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src]; + } + else + { + switch (pILInst->SrcReg[src].File) + { + case PROGRAM_TEMPORARY: + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number; + break; + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + if (1 == pILInst->SrcReg[src].RelAddr) + { + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0); + } + else + { + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + } + + pAsm->S[fld].src.rtype = SRC_REG_CONSTANT; + pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index; + break; + case PROGRAM_INPUT: + setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE); + pAsm->S[fld].src.rtype = SRC_REG_INPUT; + switch (pAsm->currentShaderType) + { + case SPT_FP: + pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index]; + break; + case SPT_VP: + pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index]; + break; + } + break; + default: + r700_error(ERROR_ASM_SRCARGUMENT, "Invalid source argument type"); + return GL_FALSE; + } + } + + pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7; + pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7; + pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7; + pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7; + + pAsm->S[fld].src.negx = pILInst->SrcReg[src].NegateBase & 0x1; + pAsm->S[fld].src.negy = (pILInst->SrcReg[src].NegateBase >> 1) & 0x1; + pAsm->S[fld].src.negz = (pILInst->SrcReg[src].NegateBase >> 2) & 0x1; + pAsm->S[fld].src.negw = (pILInst->SrcReg[src].NegateBase >> 3) & 0x1; + + return GL_TRUE; +} + +GLboolean assemble_dst(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + switch (pILInst->DstReg.File) + { + case PROGRAM_TEMPORARY: + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number; + break; + case PROGRAM_ADDRESS: + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_A0; + pAsm->D.dst.reg = 0; + break; + case PROGRAM_OUTPUT: + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_OUT; + switch (pAsm->currentShaderType) + { + case SPT_FP: + pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index]; + break; + case SPT_VP: + pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index]; + break; + } + break; + default: + r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type"); + return GL_FALSE; + } + + pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1; + pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1; + pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1; + pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1; + + return GL_TRUE; +} + +GLboolean tex_dst(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if(PROGRAM_TEMPORARY == pILInst->DstReg.File) + { + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + } + else if(PROGRAM_OUTPUT == pILInst->DstReg.File) + { + pAsm->D.dst.rtype = DST_REG_OUT; + switch (pAsm->currentShaderType) + { + case SPT_FP: + pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index]; + break; + case SPT_VP: + pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index]; + break; + } + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + } + else + { + r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type"); + return GL_FALSE; + } + + pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1; + pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1; + pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1; + pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1; + + return GL_TRUE; +} + +GLboolean tex_src(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + GLboolean bValidTexCoord = GL_FALSE; + + switch (pILInst->SrcReg[0].File) + { + case PROGRAM_TEMPORARY: + bValidTexCoord = GL_TRUE; + + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + pAsm->starting_temp_register_number; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + + break; + case PROGRAM_INPUT: + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + + pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + } + break; + } + + if(GL_TRUE == bValidTexCoord) + { + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + } + else + { + r700_error(ERROR_ASM_BADTEXSRC, "Invalid source texcoord for TEX instruction"); + return GL_FALSE; + } + + pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7; + pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7; + pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7; + pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7; + + pAsm->S[0].src.negx = pILInst->SrcReg[0].NegateBase & 0x1; + pAsm->S[0].src.negy = (pILInst->SrcReg[0].NegateBase >> 1) & 0x1; + pAsm->S[0].src.negz = (pILInst->SrcReg[0].NegateBase >> 2) & 0x1; + pAsm->S[0].src.negw = (pILInst->SrcReg[0].NegateBase >> 3) & 0x1; + + return GL_TRUE; +} + +GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm) +{ + PVSSRC * texture_coordinate_source; + PVSSRC * texture_unit_source; + + R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction); + if (tex_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700TextureInstruction(tex_instruction_ptr); + + texture_coordinate_source = &(pAsm->S[0].src); + texture_unit_source = &(pAsm->S[1].src); + + tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode; + tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0; + tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0; + + tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; + + tex_instruction_ptr->m_Word1.f.lod_bias = 0x0; + tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED; + + tex_instruction_ptr->m_Word2.f.offset_x = 0x0; + tex_instruction_ptr->m_Word2.f.offset_y = 0x0; + tex_instruction_ptr->m_Word2.f.offset_z = 0x0; + + tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg; + + // dst + if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || + (pAsm->D.dst.rtype == DST_REG_OUT) ) + { + tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg; + tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE; + + tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; + tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; + + tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK); + tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK); + tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK); + tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK); + + + tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex; + tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley; + tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez; + tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew; + } + else + { + r700_error(ERROR_ASM_TEXDSTBADTYPE, "Only temp destination registers supported for TEX dest regs."); + return GL_FALSE; + } + + if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +void initialize(r700_AssemblerBase *pAsm) +{ + GLuint cycle, component; + + for (cycle=0; cyclehw_gpr[cycle][component] = (-1); + } + } + for (component=0; componenthw_cfile_addr[component] = (-1); + pAsm->hw_cfile_chan[component] = (-1); + } +} + +GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, + int source_index, + PVSSRC* pSource, + BITS scalar_channel_index) +{ + BITS src_sel; + BITS src_rel; + BITS src_chan; + BITS src_neg; + + //-------------------------------------------------------------------------- + // Source for operands src0, src1. + // Values [0,127] correspond to GPR[0..127]. + // Values [256,511] correspond to cfile constants c[0..255]. + + //-------------------------------------------------------------------------- + // Other special values are shown in the list below. + + // 248 SQ_ALU_SRC_0: special constant 0.0. + // 249 SQ_ALU_SRC_1: special constant 1.0 float. + + // 250 SQ_ALU_SRC_1_INT: special constant 1 integer. + // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. + + // 252 SQ_ALU_SRC_0_5: special constant 0.5 float. + // 253 SQ_ALU_SRC_LITERAL: literal constant. + + // 254 SQ_ALU_SRC_PV: previous vector result. + // 255 SQ_ALU_SRC_PS: previous scalar result. + //-------------------------------------------------------------------------- + + BITS channel_swizzle; + switch (scalar_channel_index) + { + case 0: channel_swizzle = pSource->swizzlex; break; + case 1: channel_swizzle = pSource->swizzley; break; + case 2: channel_swizzle = pSource->swizzlez; break; + case 3: channel_swizzle = pSource->swizzlew; break; + default: channel_swizzle = SQ_SEL_MASK; break; + } + + if(channel_swizzle == SQ_SEL_0) + { + src_sel = SQ_ALU_SRC_0; + } + else if (channel_swizzle == SQ_SEL_1) + { + src_sel = SQ_ALU_SRC_1; + } + else + { + if ( (pSource->rtype == SRC_REG_TEMPORARY) || + (pSource->rtype == SRC_REG_INPUT) + ) + { + src_sel = pSource->reg; + } + else if (pSource->rtype == SRC_REG_CONSTANT) + { + src_sel = pSource->reg + CFILE_REGISTER_OFFSET; + } + else + { + r700_error(ERROR_ASM_ALUSRCBADTYPE, "Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.", + source_index, pSource->rtype); + return GL_FALSE; + } + } + + if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) ) + { + src_rel = SQ_ABSOLUTE; + } + else + { + src_rel = SQ_RELATIVE; + } + + switch (channel_swizzle) + { + case SQ_SEL_X: + src_chan = SQ_CHAN_X; + break; + case SQ_SEL_Y: + src_chan = SQ_CHAN_Y; + break; + case SQ_SEL_Z: + src_chan = SQ_CHAN_Z; + break; + case SQ_SEL_W: + src_chan = SQ_CHAN_W; + break; + case SQ_SEL_0: + case SQ_SEL_1: + // Does not matter since src_sel controls + src_chan = SQ_CHAN_X; + break; + default: + r700_error(ERROR_ASM_ALUSRCSELECT, "Unknown source select value (%d) in assemble_alu_src()."); + return GL_FALSE; + break; + } + + switch (scalar_channel_index) + { + case 0: src_neg = pSource->negx; break; + case 1: src_neg = pSource->negy; break; + case 2: src_neg = pSource->negz; break; + case 3: src_neg = pSource->negw; break; + default: src_neg = 0; break; + } + + switch (source_index) + { + case 0: + alu_instruction_ptr->m_Word0.f.src0_sel = src_sel; + alu_instruction_ptr->m_Word0.f.src0_rel = src_rel; + alu_instruction_ptr->m_Word0.f.src0_chan = src_chan; + alu_instruction_ptr->m_Word0.f.src0_neg = src_neg; + break; + case 1: + alu_instruction_ptr->m_Word0.f.src1_sel = src_sel; + alu_instruction_ptr->m_Word0.f.src1_rel = src_rel; + alu_instruction_ptr->m_Word0.f.src1_chan = src_chan; + alu_instruction_ptr->m_Word0.f.src1_neg = src_neg; + break; + case 2: + alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel; + alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel; + alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan; + alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg; + break; + default: + r700_error(ERROR_ASM_ALUSRCNUMBER, "Only three sources allowed in ALU opcodes."); + return GL_FALSE; + break; + } + + return GL_TRUE; +} + +GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, + R700ALUInstruction* alu_instruction_ptr, + GLuint contiguous_slots_needed) +{ + if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) ) + { + return GL_FALSE; + } + + if ( pAsm->cf_current_alu_clause_ptr == NULL || + ( (pAsm->cf_current_alu_clause_ptr != NULL) && + (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) ) + ) ) + { + + //new cf inst for this clause + pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause); + + // link the new cf to cf segment + if(NULL != pAsm->cf_current_alu_clause_ptr) + { + Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr); + AddCFInstruction( pAsm->pR700Shader, + (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr ); + } + else + { + r700_error(ERROR_ASM_ALLOCALUCF, "Could not allocate a new ALU CF instruction."); + return GL_FALSE; + } + + pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP; + + pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0; + + //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0; + pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU; + + pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + + pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1; + } + else + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++; + } + + // If this clause constains any instruction that is forward dependent on a TEX instruction, + // set the whole_quad_mode for this clause + if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) ) + { + pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1; + } + + if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) ) + { + alu_instruction_ptr->m_Word0.f.last = 1; + } + + if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction) + { + pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr; + alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr; + } + + AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr); + + return GL_TRUE; +} + +void get_src_properties(R700ALUInstruction* alu_instruction_ptr, + int source_index, + BITS* psrc_sel, + BITS* psrc_rel, + BITS* psrc_chan, + BITS* psrc_neg) +{ + switch (source_index) + { + case 0: + *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ; + *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ; + *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan; + *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ; + break; + + case 1: + *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ; + *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ; + *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan; + *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ; + break; + + case 2: + *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel; + *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel; + *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan; + *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg; + break; + } +} + +int is_cfile(BITS sel) +{ + if (sel > 255 && sel < 512) + { + return 1; + } + return 0; +} + +int is_const(BITS sel) +{ + if (is_cfile(sel)) + { + return 1; + } + else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL) + { + return 1; + } + return 0; +} + +int is_gpr(BITS sel) +{ + if (sel >= 0 && sel < 128) + { + return 1; + } + return 0; +} + +const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000 + SQ_ALU_VEC_120, //001 + SQ_ALU_VEC_102, //010 + + SQ_ALU_VEC_201, //011 + SQ_ALU_VEC_012, //100 + SQ_ALU_VEC_021, //101 + + SQ_ALU_VEC_012, //110 + SQ_ALU_VEC_012}; //111 + +const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000 + SQ_ALU_SCL_122, //001 + SQ_ALU_SCL_122, //010 + + SQ_ALU_SCL_221, //011 + SQ_ALU_SCL_212, //100 + SQ_ALU_SCL_122, //101 + + SQ_ALU_SCL_122, //110 + SQ_ALU_SCL_122}; //111 + +GLboolean reserve_cfile(r700_AssemblerBase* pAsm, + GLuint sel, + GLuint chan) +{ + int res_match = (-1); + int res_empty = (-1); + + GLint res; + + for (res=3; res>=0; res--) + { + if(pAsm->hw_cfile_addr[ res] < 0) + { + res_empty = res; + } + else if( (pAsm->hw_cfile_addr[res] == (int)sel) + && + (pAsm->hw_cfile_chan[ res ] == (int) chan) ) + { + res_match = res; + } + } + + if(res_match >= 0) + { + // Read for this scalar component already reserved, nothing to do here. + ; + } + else if(res_empty >= 0) + { + pAsm->hw_cfile_addr[ res_empty ] = sel; + pAsm->hw_cfile_chan[ res_empty ] = chan; + } + else + { + r700_error(ERROR_ASM_CONSTCHANNEL, "All cfile read ports are used, cannot reference C$sel, channel $chan."); + return GL_FALSE; + } + return GL_TRUE; +} + +GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle) +{ + if(pAsm->hw_gpr[cycle][chan] < 0) + { + pAsm->hw_gpr[cycle][chan] = sel; + } + else if(pAsm->hw_gpr[cycle][chan] != (int)sel) + { + r700_error(ERROR_ASM_BADGPRRESERVE, "Another scalar operation has already used GPR read port for given channel"); + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle) +{ + switch (swiz) + { + case SQ_ALU_SCL_210: + { + int table[3] = {2, 1, 0}; + *pCycle = table[sel]; + return GL_TRUE; + } + break; + case SQ_ALU_SCL_122: + { + int table[3] = {1, 2, 2}; + *pCycle = table[sel]; + return GL_TRUE; + } + break; + case SQ_ALU_SCL_212: + { + int table[3] = {2, 1, 2}; + *pCycle = table[sel]; + return GL_TRUE; + } + break; + case SQ_ALU_SCL_221: + { + int table[3] = {2, 2, 1}; + *pCycle = table[sel]; + return GL_TRUE; + } + break; + default: + r700_error(ERROR_ASM_BADSCALARBZ, "Bad Scalar bank swizzle value"); + break; + } + + return GL_FALSE; +} + +GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle) +{ + switch (swiz) + { + case SQ_ALU_VEC_012: + { + int table[3] = {0, 1, 2}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_021: + { + int table[3] = {0, 2, 1}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_120: + { + int table[3] = {1, 2, 0}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_102: + { + int table[3] = {1, 0, 2}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_201: + { + int table[3] = {2, 0, 1}; + *pCycle = table[sel]; + } + break; + case SQ_ALU_VEC_210: + { + int table[3] = {2, 1, 0}; + *pCycle = table[sel]; + } + break; + default: + r700_error(ERROR_ASM_BADVECTORBZ, "Bad Vec bank swizzle value"); + return GL_FALSE; + break; + } + + return GL_TRUE; +} + +GLboolean check_scalar(r700_AssemblerBase* pAsm, + R700ALUInstruction* alu_instruction_ptr) +{ + GLuint cycle; + GLuint bank_swizzle; + GLuint const_count = 0; + + BITS sel; + BITS chan; + BITS rel; + BITS neg; + + GLuint src; + + BITS src_sel [3] = {0,0,0}; + BITS src_chan[3] = {0,0,0}; + BITS src_rel [3] = {0,0,0}; + BITS src_neg [3] = {0,0,0}; + + GLuint swizzle_key; + + GLuint number_of_operands = r700GetNumOperands(pAsm); + + for (src=0; srcm_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ]; + + for (src=0; srcm_Word1.f.bank_swizzle; + + if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) ) + { + return GL_FALSE; + } + + if(cycle < const_count) + { + if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) ) + { + return GL_FALSE; + } + } + } + } + + return GL_TRUE; +} + +GLboolean check_vector(r700_AssemblerBase* pAsm, + R700ALUInstruction* alu_instruction_ptr) +{ + GLuint cycle; + GLuint bank_swizzle; + GLuint const_count = 0; + + GLuint src; + + BITS sel; + BITS chan; + BITS rel; + BITS neg; + + BITS src_sel [3] = {0,0,0}; + BITS src_chan[3] = {0,0,0}; + BITS src_rel [3] = {0,0,0}; + BITS src_neg [3] = {0,0,0}; + + GLuint swizzle_key; + + GLuint number_of_operands = r700GetNumOperands(pAsm); + + for (src=0; srcm_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key]; + + for (src=0; srcm_Word1.f.bank_swizzle; + + if( is_gpr(sel) ) + { + if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) ) + { + return GL_FALSE; + } + + if ( (src == 1) && + (sel == src_sel[0]) && + (chan == src_chan[0]) ) + { + } + else + { + if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) ) + { + return GL_FALSE; + } + } + } + else if( is_const(sel) ) + { + const_count++; + + if( is_cfile(sel) ) + { + if( GL_FALSE == reserve_cfile(pAsm, sel, chan) ) + { + return GL_FALSE; + } + } + } + } + + return GL_TRUE; +} + +GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) +{ + GLuint number_of_scalar_operations; + GLboolean is_single_scalar_operation; + GLuint scalar_channel_index; + + PVSSRC * pcurrent_source; + int current_source_index; + GLuint contiguous_slots_needed; + + GLuint uNumSrc = r700GetNumOperands(pAsm); + GLuint channel_swizzle, j; + GLuint chan_counter[4] = {0, 0, 0, 0}; + PVSSRC * pSource[3]; + GLboolean bSplitInst = GL_FALSE; + + if (1 == pAsm->D.dst.math) + { + is_single_scalar_operation = GL_TRUE; + number_of_scalar_operations = 1; + } + else + { + is_single_scalar_operation = GL_FALSE; + number_of_scalar_operations = 4; + + /* check read port, only very preliminary algorithm, not count in + src0/1 same comp case and prev slot repeat case; also not count relative + addressing. TODO: improve performance. */ + for(j=0; jS[j].src); + } + for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++) + { + for(j=0; jswizzlex; break; + case 1: channel_swizzle = pSource[j]->swizzley; break; + case 2: channel_swizzle = pSource[j]->swizzlez; break; + case 3: channel_swizzle = pSource[j]->swizzlew; break; + default: channel_swizzle = SQ_SEL_MASK; break; + } + if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) || + (pSource[j]->rtype == SRC_REG_INPUT)) + && (channel_swizzle <= SQ_SEL_W) ) + { + chan_counter[channel_swizzle]++; + } + } + } + if( (chan_counter[SQ_SEL_X] > 3) + || (chan_counter[SQ_SEL_Y] > 3) + || (chan_counter[SQ_SEL_Z] > 3) + || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */ + { + bSplitInst = GL_TRUE; + } + } + + contiguous_slots_needed = 0; + + if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) ) + { + contiguous_slots_needed = 4; + } + + initialize(pAsm); + + for (scalar_channel_index=0; + scalar_channel_index < number_of_scalar_operations; + scalar_channel_index++) + { + R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction); + if (alu_instruction_ptr == NULL) + { + return GL_FALSE; + } + Init_R700ALUInstruction(alu_instruction_ptr); + + //src 0 + current_source_index = 0; + pcurrent_source = &(pAsm->S[0].src); + + if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, + current_source_index, + pcurrent_source, + scalar_channel_index) ) + { + return GL_FALSE; + } + + if (pAsm->D.dst.math == 0) + { + // Process source 1 + current_source_index = 1; + pcurrent_source = &(pAsm->S[current_source_index].src); + + if (GL_FALSE == assemble_alu_src(alu_instruction_ptr, + current_source_index, + pcurrent_source, + scalar_channel_index) ) + { + return GL_FALSE; + } + } + + //other bits + alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP; + + if( (is_single_scalar_operation == GL_TRUE) + || (GL_TRUE == bSplitInst) ) + { + alu_instruction_ptr->m_Word0.f.last = 1; + } + else + { + alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0; + } + + alu_instruction_ptr->m_Word0.f.pred_sel = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + + // dst + if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) || + (pAsm->D.dst.rtype == DST_REG_OUT) ) + { + alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg; + } + else + { + r700_error(ERROR_ASM_ALUDSTBADTYPE, "Only temp destination registers supported for ALU dest regs."); + return GL_FALSE; + } + + alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype + + if ( is_single_scalar_operation == GL_TRUE ) + { + // Override scalar_channel_index since only one scalar value will be written + if(pAsm->D.dst.writex) + { + scalar_channel_index = 0; + } + else if(pAsm->D.dst.writey) + { + scalar_channel_index = 1; + } + else if(pAsm->D.dst.writez) + { + scalar_channel_index = 2; + } + else if(pAsm->D.dst.writew) + { + scalar_channel_index = 3; + } + } + + alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index; + + alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode; + + if (pAsm->D.dst.op3) + { + //op3 + + alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode; + + //There's 3rd src for op3 + current_source_index = 2; + pcurrent_source = &(pAsm->S[current_source_index].src); + + if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr, + current_source_index, + pcurrent_source, + scalar_channel_index) ) + { + return GL_FALSE; + } + } + else + { + //op2 + if (pAsm->bR6xx) + { + alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode; + + alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0; + + //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0; + //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0; + switch (scalar_channel_index) + { + case 0: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex; + break; + case 1: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey; + break; + case 2: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez; + break; + case 3: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; + break; + default: + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = SQ_SEL_MASK; + break; + } + alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; + } + else + { + alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode; + + alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0; + alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0; + + //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0; + //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0; + switch (scalar_channel_index) + { + case 0: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex; + break; + case 1: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey; + break; + case 2: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez; + break; + case 3: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; + break; + default: + alu_instruction_ptr->m_Word1_OP2.f.write_mask = SQ_SEL_MASK; + break; + } + alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; + } + } + + if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) ) + { + return GL_FALSE; + } + + /* + * Judge the type of current instruction, is it vector or scalar + * instruction. + */ + if (is_single_scalar_operation) + { + if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) ) + { + return GL_FALSE; + } + } + else + { + if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) ) + { + return 1; + } + } + + contiguous_slots_needed = 0; + } + + return GL_TRUE; +} + +GLboolean next_ins(r700_AssemblerBase *pAsm) +{ + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if( GL_TRUE == IsTex(pILInst->Opcode) ) + { + if( GL_FALSE == assemble_tex_instruction(pAsm) ) + { + r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction"); + return GL_FALSE; + } + } + else + { //ALU + if( GL_FALSE == assemble_alu_instruction(pAsm) ) + { + r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling ALU instruction"); + return GL_FALSE; + } + } + + if(pAsm->D.dst.rtype == DST_REG_OUT) + { + if(pAsm->D.dst.op3) + { + // There is no mask for OP3 instructions, so all channels are written + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; + } + else + { + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] + |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; + } + } + + //reset for next inst. + pAsm->D.bits = 0; + pAsm->S[0].bits = 0; + pAsm->S[1].bits = 0; + pAsm->S[2].bits = 0; + + return GL_TRUE; +} + +GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // opcode tmp.x, a.x + // MOV dst, tmp.x + + pAsm->D.dst.opcode = opcode; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // Now replicate result to all necessary channels in destination + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_ABS(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + pAsm->S[1].bits = pAsm->S[0].bits; + flipneg_PVSSRC(&(pAsm->S[1].src)); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_ADD(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB) + { + flipneg_PVSSRC(&(pAsm->S[1].src)); + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_BAD(char *opcode_str) +{ + r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction (%s)", opcode_str); + return GL_FALSE; +} + +GLboolean assemble_CMP(r700_AssemblerBase *pAsm) +{ + int tmp; + + if( GL_FALSE == checkop3(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE; + pAsm->D.dst.op3 = 1; + + tmp = (-1); + + if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + //OP3 has no support for write mask + tmp = gethelpr(pAsm); + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + nomask_PVSDST(&(pAsm->D.dst)); + } + else + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 2, 1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, 2) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + //tmp for source + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + noneg_PVSSRC(&(pAsm->S[0].src)); + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean assemble_COS(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_COS); +} + +GLboolean assemble_DOT(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_DOT4; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode) + { + zerocomp_PVSSRC(&(pAsm->S[0].src), 3); + zerocomp_PVSSRC(&(pAsm->S[1].src), 3); + } + else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH) + { + onecomp_PVSSRC(&(pAsm->S[1].src), 3); + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_DST(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + onecomp_PVSSRC(&(pAsm->S[0].src), 0); + onecomp_PVSSRC(&(pAsm->S[0].src), 3); + + onecomp_PVSSRC(&(pAsm->S[1].src), 0); + onecomp_PVSSRC(&(pAsm->S[1].src), 2); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_EX2(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE); +} + +GLboolean assemble_FLR(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR; + + if ( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT); +} + +GLboolean assemble_FRC(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + if ( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_KIL(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT; + + if ( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = 0; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0); + noneg_PVSSRC(&(pAsm->S[0].src)); + + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + + if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File) + { + pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number; + } + else + { //PROGRAM_OUTPUT + pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index]; + } + + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noswizzle_PVSSRC(&(pAsm->S[1].src)); + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->pR700Shader->killIsUsed = GL_TRUE; + + return GL_TRUE; +} + +GLboolean assemble_LG2(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE); +} + +GLboolean assemble_LRP(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + if( GL_FALSE == checkop3(pAsm) ) + { + return GL_FALSE; + } + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_ADD; + + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + nomask_PVSDST(&(pAsm->D.dst)); + + + if( GL_FALSE == assemble_src(pAsm, 1, 0) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == assemble_src(pAsm, 2, 1) ) + { + return GL_FALSE; + } + + neg_PVSSRC(&(pAsm->S[1].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + if( GL_FALSE == assemble_src(pAsm, 2, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm) +{ + int tmp, ii; + GLboolean bReplaceDst = GL_FALSE; + struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + if( GL_FALSE == checkop3(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + tmp = (-1); + + if(PROGRAM_TEMPORARY == pILInst->DstReg.File) + { /* TODO : more investigation on MAD src and dst using same register */ + for(ii=0; ii<3; ii++) + { + if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File) + && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) ) + { + bReplaceDst = GL_TRUE; + break; + } + } + } + if(0xF != pILInst->DstReg.WriteMask) + { /* OP3 has no support for write mask */ + bReplaceDst = GL_TRUE; + } + + if(GL_TRUE == bReplaceDst) + { + tmp = gethelpr(pAsm); + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + nomask_PVSDST(&(pAsm->D.dst)); + } + else + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 2, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + if (GL_TRUE == bReplaceDst) + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + //tmp for source + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + noneg_PVSSRC(&(pAsm->S[0].src)); + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} + +/* LIT dst, src */ +GLboolean assemble_LIT(r700_AssemblerBase *pAsm) +{ + unsigned int dstReg; + unsigned int dstType; + unsigned int srcReg; + unsigned int srcType; + checkop1(pAsm); + int tmp = gethelpr(pAsm); + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + dstReg = pAsm->D.dst.reg; + dstType = pAsm->D.dst.rtype; + srcReg = pAsm->S[0].src.reg; + srcType = pAsm->S[0].src.rtype; + + /* dst.xw, <- 1.0 */ + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + pAsm->D.dst.rtype = dstType; + pAsm->D.dst.reg = dstReg; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 1; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_1; + pAsm->S[0].src.swizzley = SQ_SEL_1; + pAsm->S[0].src.swizzlez = SQ_SEL_1; + pAsm->S[0].src.swizzlew = SQ_SEL_1; + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* dst.y = max(src.x, 0.0) */ + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + pAsm->D.dst.rtype = dstType; + pAsm->D.dst.reg = dstReg; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 1; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + pAsm->S[0].src.rtype = srcType; + pAsm->S[0].src.reg = srcReg; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_X; + pAsm->S[0].src.swizzlez = SQ_SEL_X; + pAsm->S[0].src.swizzlew = SQ_SEL_X; + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = tmp; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_0; + pAsm->S[1].src.swizzley = SQ_SEL_0; + pAsm->S[1].src.swizzlez = SQ_SEL_0; + pAsm->S[1].src.swizzlew = SQ_SEL_0; + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* dst.w = log(src.y) */ + pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED; + pAsm->D.dst.math = 1; + pAsm->D.dst.rtype = dstType; + pAsm->D.dst.reg = dstReg; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 1; + pAsm->S[0].src.rtype = srcType; + pAsm->S[0].src.reg = srcReg; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_Y; + pAsm->S[0].src.swizzley = SQ_SEL_Y; + pAsm->S[0].src.swizzlez = SQ_SEL_Y; + pAsm->S[0].src.swizzlew = SQ_SEL_Y; + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */ + pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT; + pAsm->D.dst.op3 = 1; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + pAsm->S[0].src.rtype = srcType; + pAsm->S[0].src.reg = srcReg; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_W; + pAsm->S[0].src.swizzley = SQ_SEL_W; + pAsm->S[0].src.swizzlez = SQ_SEL_W; + pAsm->S[0].src.swizzlew = SQ_SEL_W; + + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[1].src.reg = dstReg; + setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[1].src)); + pAsm->S[1].src.swizzlex = SQ_SEL_W; + pAsm->S[1].src.swizzley = SQ_SEL_W; + pAsm->S[1].src.swizzlez = SQ_SEL_W; + pAsm->S[1].src.swizzlew = SQ_SEL_W; + + pAsm->S[2].src.rtype = srcType; + pAsm->S[2].src.reg = srcReg; + setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[2].src)); + pAsm->S[2].src.swizzlex = SQ_SEL_X; + pAsm->S[2].src.swizzley = SQ_SEL_X; + pAsm->S[2].src.swizzlez = SQ_SEL_X; + pAsm->S[2].src.swizzlew = SQ_SEL_X; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* dst.z = exp(tmp.x) */ + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + pAsm->D.dst.rtype = dstType; + pAsm->D.dst.reg = dstReg; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 0; + + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + noneg_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_X; + pAsm->S[0].src.swizzlez = SQ_SEL_X; + pAsm->S[0].src.swizzlew = SQ_SEL_X; + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MAX(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MAX; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MIN(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MIN; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MOV(r700_AssemblerBase *pAsm) +{ + checkop1(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if (GL_FALSE == assemble_dst(pAsm)) + { + return GL_FALSE; + } + + if (GL_FALSE == assemble_src(pAsm, 0, -1)) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_MUL(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_POW(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // LG2 tmp.x, a.swizzle + pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MUL tmp.x, tmp.x, b.swizzle + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // EX2 dst.mask, tmp.x + // EX2 tmp.x, tmp.x + pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // Now replicate result to all necessary channels in destination + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_RCP(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE); +} + +GLboolean assemble_RSQ(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE); +} + +GLboolean assemble_SIN(r700_AssemblerBase *pAsm) +{ + return assemble_math_function(pAsm, SQ_OP2_INST_SIN); +} + +GLboolean assemble_SCS(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + checkop1(pAsm); + + tmp = gethelpr(pAsm); + + // COS tmp.x, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_COS; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // SIN tmp.y, a.x + pAsm->D.dst.opcode = SQ_OP2_INST_SIN; + pAsm->D.dst.math = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writey = 1; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + // MOV dst.mask, tmp + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + noswizzle_PVSSRC(&(pAsm->S[0].src)); + pAsm->S[0].src.swizzlez = SQ_SEL_0; + pAsm->S[0].src.swizzlew = SQ_SEL_0; + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_SGE(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_SETGE; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_SLT(r700_AssemblerBase *pAsm) +{ + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_SETGT; + + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 0, 1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, 0) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_STP(r700_AssemblerBase *pAsm) +{ + return GL_TRUE; +} + +GLboolean assemble_TEX(r700_AssemblerBase *pAsm) +{ + GLboolean src_const; + + switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) + { + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + src_const = GL_TRUE; + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + src_const = GL_FALSE; + } + + if (GL_TRUE == src_const) + { + r700_error(TODO_ASM_CONSTTEXADDR, "TODO: Texture coordinates from a constant register not supported."); + return GL_FALSE; + } + + switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) + { + case OPCODE_TEX: + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + break; + case OPCODE_TXB: + r700_error(TODO_ASM_TXB, "do not support TXB yet"); + return GL_FALSE; + break; + case OPCODE_TXP: + /* TODO : tex proj version : divid first 3 components by 4th */ + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + break; + default: + r700_error(ERROR_ASM_BADTEXINST, "Internal error: bad texture op (not TEX)"); + return GL_FALSE; + break; + } + + // Set src1 to tex unit id + pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit; + pAsm->S[1].src.rtype = SRC_REG_TEMPORARY; + + //No sw info from mesa compiler, so hard code here. + pAsm->S[1].src.swizzlex = SQ_SEL_X; + pAsm->S[1].src.swizzley = SQ_SEL_Y; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_W; + + if( GL_FALSE == tex_dst(pAsm) ) + { + return GL_FALSE; + } + + if( GL_FALSE == tex_src(pAsm) ) + { + return GL_FALSE; + } + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + return GL_TRUE; +} + +GLboolean assemble_XPD(r700_AssemblerBase *pAsm) +{ + BITS tmp; + + if( GL_FALSE == checkop2(pAsm) ) + { + return GL_FALSE; + } + + tmp = gethelpr(pAsm); + + pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + nomask_PVSDST(&(pAsm->D.dst)); + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + tmp = gethelpr(pAsm); + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + nomask_PVSDST(&(pAsm->D.dst)); + } + else + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + } + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == assemble_src(pAsm, 1, -1) ) + { + return GL_FALSE; + } + + swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0); + swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0); + + // result1 + (neg) result0 + setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE); + pAsm->S[2].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[2].src.reg = tmp; + + neg_PVSSRC(&(pAsm->S[2].src)); + noswizzle_PVSSRC(&(pAsm->S[2].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + + if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask) + { + if( GL_FALSE == assemble_dst(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + // Use tmp as source + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + + noneg_PVSSRC(&(pAsm->S[0].src)); + noswizzle_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm) +{ + return GL_TRUE; +} + +GLboolean assemble_IF(r700_AssemblerBase *pAsm) +{ + return GL_TRUE; +} + +GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm) +{ + return GL_TRUE; +} + +GLboolean AssembleInstr(GLuint uiNumberInsts, + struct prog_instruction *pILInst, + r700_AssemblerBase *pR700AsmCode) +{ + GLuint i; + + pR700AsmCode->pILInst = pILInst; + for(i=0; iuiCurInst = i; + + switch (pILInst[i].Opcode) + { + case OPCODE_ABS: + if ( GL_FALSE == assemble_ABS(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_ADD: + case OPCODE_SUB: + if ( GL_FALSE == assemble_ADD(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_ARL: + r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARL "); + //if ( GL_FALSE == assemble_BAD("ARL") ) + return GL_FALSE; + break; + case OPCODE_ARR: + r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARR "); + //if ( GL_FALSE == assemble_BAD("ARR") ) + return GL_FALSE; + break; + + case OPCODE_CMP: + if ( GL_FALSE == assemble_CMP(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_COS: + if ( GL_FALSE == assemble_COS(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_DPH: + if ( GL_FALSE == assemble_DOT(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_DST: + if ( GL_FALSE == assemble_DST(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_EX2: + if ( GL_FALSE == assemble_EX2(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_EXP: + r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_EXP "); + //if ( GL_FALSE == assemble_BAD("EXP") ) + return GL_FALSE; + break; // approx of EX2 + + case OPCODE_FLR: + if ( GL_FALSE == assemble_FLR(pR700AsmCode) ) + return GL_FALSE; + break; + //case OP_FLR_INT: + // if ( GL_FALSE == assemble_FLR_INT() ) + // return GL_FALSE; + // break; + + case OPCODE_FRC: + if ( GL_FALSE == assemble_FRC(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_KIL: + if ( GL_FALSE == assemble_KIL(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LG2: + if ( GL_FALSE == assemble_LG2(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LIT: + if ( GL_FALSE == assemble_LIT(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LRP: + if ( GL_FALSE == assemble_LRP(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_LOG: + r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_LOG "); + //if ( GL_FALSE == assemble_BAD("LOG") ) + return GL_FALSE; + break; // approx of LG2 + + case OPCODE_MAD: + if ( GL_FALSE == assemble_MAD(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_MAX: + if ( GL_FALSE == assemble_MAX(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_MIN: + if ( GL_FALSE == assemble_MIN(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_MOV: + if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_MUL: + if ( GL_FALSE == assemble_MUL(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_POW: + if ( GL_FALSE == assemble_POW(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_RCP: + if ( GL_FALSE == assemble_RCP(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_RSQ: + if ( GL_FALSE == assemble_RSQ(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_SIN: + if ( GL_FALSE == assemble_SIN(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_SCS: + if ( GL_FALSE == assemble_SCS(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_SGE: + if ( GL_FALSE == assemble_SGE(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_SLT: + if ( GL_FALSE == assemble_SLT(pR700AsmCode) ) + return GL_FALSE; + break; + + //case OP_STP: + // if ( GL_FALSE == assemble_STP(pR700AsmCode) ) + // return GL_FALSE; + // break; + + case OPCODE_SWZ: + if ( GL_FALSE == assemble_MOV(pR700AsmCode) ) + { + return GL_FALSE; + } + else + { + if( (i+1)pInstDeps[i+1].nDstDep = i+1; //=1? + } + } + } + } + break; + + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + if ( GL_FALSE == assemble_TEX(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_XPD: + if ( GL_FALSE == assemble_XPD(pR700AsmCode) ) + return GL_FALSE; + break; + + case OPCODE_IF : + if ( GL_FALSE == assemble_IF(pR700AsmCode) ) + return GL_FALSE; + break; + case OPCODE_ELSE : + r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ELSE "); + //if ( GL_FALSE == assemble_BAD("ELSE") ) + return GL_FALSE; + break; + case OPCODE_ENDIF: + if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) ) + return GL_FALSE; + break; + + //case OPCODE_EXPORT: + // if ( GL_FALSE == assemble_EXPORT() ) + // return GL_FALSE; + // break; + + case OPCODE_END: + //pR700AsmCode->uiCurInst = i; + //This is to remaind that if in later exoort there is depth/stencil + //export, we need a mov to re-arrange DST channel, where using a + //psuedo inst, we will use this end inst to do it. + return GL_TRUE; + + default: + r700_error(ERROR_ASM_UNKNOWNILINST, "internal: unknown instruction"); + return GL_FALSE; + } + } + + return GL_TRUE; +} + +GLboolean Process_Export(r700_AssemblerBase* pAsm, + GLuint type, + GLuint export_starting_index, + GLuint export_count, + GLuint starting_register_number, + GLboolean is_depth_export) +{ + unsigned char ucWriteMask; + + check_current_clause(pAsm, CF_EMPTY_CLAUSE); + check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr + + pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type; + + switch (type) + { + case SQ_EXPORT_PIXEL: + if(GL_TRUE == is_depth_export) + { + pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index; + } + break; + + case SQ_EXPORT_POS: + pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index; + break; + + case SQ_EXPORT_PARAM: + pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index; + break; + + default: + r700_error(ERROR_ASM_BADEXPORTTYPE, "Unknown export type: %d", type); + return GL_FALSE; + break; + } + + pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number; + + pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE; + pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3; + + pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1); + pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE + pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0; + pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1; + + if (export_count == 1) + { + ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number]; + + if( (ucWriteMask & 0x1) != 0) + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK; + } + if( ((ucWriteMask>>1) & 0x1) != 0) + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK; + } + if( ((ucWriteMask>>2) & 0x1) != 0) + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK; + } + if( ((ucWriteMask>>3) & 0x1) != 0) + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W; + } + else + { + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK; + } + } + else + { + // This should only be used if all components for all registers have been written + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X; + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y; + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z; + pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W; + } + + pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr; + + return GL_TRUE; +} + +GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select) +{ + gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END + pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV; + + // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select + + pAsm->D.dst.opcode = SQ_OP2_INST_MOV; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = pAsm->depth_export_register_number; + + pAsm->D.dst.writex = 1; // depth goes in R channel for HW + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = DST_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->depth_export_register_number; + + setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select); + + noneg_PVSSRC(&(pAsm->S[0].src)); + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save; + + return GL_TRUE; +} + +GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, + GLbitfield OutputsWritten) +{ + unsigned int unBit; + + if(pR700AsmCode->depth_export_register_number >= 0) + { + if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth + { + return GL_FALSE; + } + } + + unBit = 1 << FRAG_RESULT_COLOR; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PIXEL, + 0, + 1, + pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR], + GL_FALSE) ) + { + return GL_FALSE; + } + } + unBit = 1 << FRAG_RESULT_DEPTH; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PIXEL, + 0, + 1, + pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH], + GL_TRUE)) + { + return GL_FALSE; + } + } + + if(pR700AsmCode->cf_last_export_ptr != NULL) + { + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1; + } + + return GL_TRUE; +} + +GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, + GLbitfield OutputsWritten) +{ + unsigned int unBit; + unsigned int i; + + GLuint export_starting_index = 0; + GLuint export_count = pR700AsmCode->number_of_exports; + + unBit = 1 << VERT_RESULT_HPOS; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_POS, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_count--; + + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + } + + pR700AsmCode->number_of_exports = export_count; + + unBit = 1 << VERT_RESULT_COL0; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + + unBit = 1 << VERT_RESULT_COL1; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + + for(i=0; i<8; i++) + { + unBit = 1 << (VERT_RESULT_TEX0 + i); + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + } + + // At least one param should be exported + if (export_count) + { + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + } + else + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + 0, + 1, + 0, + GL_FALSE) ) + { + return GL_FALSE; + } + + pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0; + pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0; + pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0; + pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1; + pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE; + } + + pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1; + + return GL_TRUE; +} + +GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode) +{ + FREE(pR700AsmCode->pucOutMask); + FREE(pR700AsmCode->pInstDeps); + return GL_TRUE; +} + -- cgit v1.2.3 From b52cdb81e2a0f7109e754daa4a02aced22aa995f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 21 May 2009 17:40:27 -0400 Subject: r6xx/r7xx: fix segfault in vertex shader setup --- src/mesa/drivers/dri/r600/r700_assembler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 5ceb3fdd9b..1610020ee8 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -4041,7 +4041,7 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, SQ_EXPORT_PARAM, 0, 1, - 0, + pR700AsmCode->starting_export_register_number, GL_FALSE) ) { return GL_FALSE; -- cgit v1.2.3 From d8694cd3f69dfc40509544293aa35f297571e878 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 29 May 2009 15:37:04 -0400 Subject: get rid of chip_object struct --- src/mesa/drivers/dri/r600/r600_context.c | 6 +----- src/mesa/drivers/dri/r600/r600_context.h | 30 ++++++++++++++++-------------- src/mesa/drivers/dri/r600/r700_assembler.c | 1 - src/mesa/drivers/dri/r600/r700_chip.c | 26 +------------------------- src/mesa/drivers/dri/r600/r700_chip.h | 12 ------------ src/mesa/drivers/dri/r600/r700_clear.c | 1 - src/mesa/drivers/dri/r600/r700_fragprog.c | 3 +-- src/mesa/drivers/dri/r600/r700_ioctl.c | 1 - src/mesa/drivers/dri/r600/r700_oglprog.c | 1 - src/mesa/drivers/dri/r600/r700_render.c | 7 +++---- src/mesa/drivers/dri/r600/r700_shader.c | 1 - src/mesa/drivers/dri/r600/r700_state.c | 20 +++++++------------- src/mesa/drivers/dri/r600/r700_tex.c | 1 - src/mesa/drivers/dri/r600/r700_vertprog.c | 3 +-- 14 files changed, 30 insertions(+), 83 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index c54dae0c53..526d02ed57 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -63,7 +63,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r600_emit.h" #include "radeon_bocs_wrapper.h" -#include "r700_chip.h" #include "r700_state.h" #include "r700_ioctl.h" @@ -369,9 +368,6 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc"); } - r600->disable_lowimpact_fallback = - driQueryOptionb(&r600->radeon.optionCache, - "disable_lowimpact_fallback"); radeon_fbo_init(&r600->radeon); radeonInitSpanFuncs( ctx ); @@ -410,7 +406,7 @@ r600DestroyContext (__DRIcontextPrivate * driContextPriv) context_t *context = ctx ? R700_CONTEXT(ctx) : NULL; if (context) - (context->chipobj.DestroyChipObj)(context->chipobj.pvChipObj); + FREE(context->hw.pStateList); } diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h index 321b05d930..52512aacc0 100644 --- a/src/mesa/drivers/dri/r600/r600_context.h +++ b/src/mesa/drivers/dri/r600/r600_context.h @@ -48,6 +48,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/mtypes.h" #include "main/colormac.h" +#include "r700_chip.h" + struct r600_context; typedef struct r600_context context_t; @@ -127,32 +129,20 @@ typedef struct offset_modifiers GLuint mask; } offset_modifiers; -typedef struct chip_object -{ - void *pvChipObj; - - /* ------------ OUT ------------------- */ - GLboolean (*DestroyChipObj)(GLcontext * ctx); - -} chip_object; - /** * \brief R600 context structure. */ struct r600_context { struct radeon_context radeon; /* parent class, must be first */ - /* ------ */ - chip_object chipobj; + /* ------ */ + R700_CHIP_CONTEXT hw; /* Vertex buffers */ GLvector4f dummy_attrib[_TNL_ATTRIB_MAX]; GLvector4f *temp_attrib[_TNL_ATTRIB_MAX]; - GLboolean disable_lowimpact_fallback; - - GLboolean vap_flush_needed; }; #define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx)) @@ -163,6 +153,18 @@ extern GLboolean r600CreateContext(const __GLcontextModes * glVisual, __DRIcontextPrivate * driContextPriv, void *sharedContextPrivate); +#define R700_CONTEXT_STATES(context) ((R700_CHIP_CONTEXT *)(&context->hw)) + +extern GLboolean r700InitChipObject(context_t *context); +extern GLboolean r700SendContextStates(context_t *context); +extern int r700SetupStreams(GLcontext * ctx); +extern void r700SetupVTXConstants(GLcontext * ctx, + unsigned int nStreamID, + void * pAos, + unsigned int size, /* number of elements in vector */ + unsigned int stride, + unsigned int Count); /* number of vectors in stream */ + #define RADEON_D_CAPTURE 0 #define RADEON_D_PLAYBACK 1 #define RADEON_D_PLAYBACK_RAW 2 diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 1610020ee8..a564d6f099 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -34,7 +34,6 @@ #include "main/imports.h" #include "r600_context.h" -#include "r700_chip.h" #include "r700_debug.h" #include "r700_assembler.h" diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 9b30249231..5ae70c99c7 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -31,7 +31,6 @@ #include "r600_context.h" #include "r600_cmdbuf.h" -#include "r700_chip.h" #include "r700_state.h" #include "r700_tex.h" #include "r700_oglprog.h" @@ -39,25 +38,6 @@ #include "r700_vertprog.h" #include "r700_ioctl.h" -static GLboolean r700DestroyChipObj(GLcontext * ctx) -{ - context_t * context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700; - - if(NULL == context->chipobj.pvChipObj) - { - return GL_TRUE; - } - - r700 = (R700_CHIP_CONTEXT *)(context->chipobj.pvChipObj); - - FREE(r700->pStateList); - - FREE(r700); - - return GL_TRUE; -} - #define LINK_STATES(reg) \ do \ { \ @@ -71,11 +51,7 @@ GLboolean r700InitChipObject(context_t *context) { ContextState * pStateListWork; - R700_CHIP_CONTEXT *r700 = CALLOC( sizeof(R700_CHIP_CONTEXT) ); - - context->chipobj.pvChipObj = (void*)r700; - - context->chipobj.DestroyChipObj = r700DestroyChipObj; + R700_CHIP_CONTEXT *r700 = &context->hw; /* init state list */ r700->pStateList = (ContextState*) MALLOC (sizeof(ContextState)*sizeof(R700_CHIP_CONTEXT)/sizeof(unsigned int)); diff --git a/src/mesa/drivers/dri/r600/r700_chip.h b/src/mesa/drivers/dri/r600/r700_chip.h index 88b5075479..918cb222d4 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.h +++ b/src/mesa/drivers/dri/r600/r700_chip.h @@ -451,17 +451,5 @@ typedef struct _R700_CHIP_CONTEXT } R700_CHIP_CONTEXT; -#define R700_CONTEXT_STATES(context) ((R700_CHIP_CONTEXT *)(context->chipobj.pvChipObj)) - -extern GLboolean r700InitChipObject(context_t *context); -extern GLboolean r700SendContextStates(context_t *context); -extern int r700SetupStreams(GLcontext * ctx); -extern void r700SetupVTXConstants(GLcontext * ctx, - unsigned int nStreamID, - void * pAos, - unsigned int size, /* number of elements in vector */ - unsigned int stride, - unsigned int Count); /* number of vectors in stream */ - #endif /* _R700_CHIP_H_ */ diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c index e610514e01..e1164a09d7 100644 --- a/src/mesa/drivers/dri/r600/r700_clear.c +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -33,7 +33,6 @@ #include "main/enums.h" #include "r600_context.h" -#include "r700_chip.h" #include "r700_shaderinst.h" #include "r600_emit.h" diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index c46094dcd7..cebb82d853 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -38,7 +38,6 @@ #include "r600_context.h" #include "r600_cmdbuf.h" -#include "r700_chip.h" #include "r700_fragprog.h" #include "r700_debug.h" @@ -259,7 +258,7 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx) context_t *context = R700_CONTEXT(ctx); BATCH_LOCALS(&context->radeon); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); struct r700_fragment_program *fp = (struct r700_fragment_program *) (ctx->FragmentProgram._Current); diff --git a/src/mesa/drivers/dri/r600/r700_ioctl.c b/src/mesa/drivers/dri/r600/r700_ioctl.c index f8c3359a47..c479532001 100644 --- a/src/mesa/drivers/dri/r600/r700_ioctl.c +++ b/src/mesa/drivers/dri/r600/r700_ioctl.c @@ -37,7 +37,6 @@ #include "radeon_lock.h" #include "r600_context.h" -#include "r700_chip.h" #include "r700_ioctl.h" #include "r700_clear.h" diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c index ba49367dc4..36de143b1a 100644 --- a/src/mesa/drivers/dri/r600/r700_oglprog.c +++ b/src/mesa/drivers/dri/r600/r700_oglprog.c @@ -34,7 +34,6 @@ #include "r600_context.h" -#include "r700_chip.h" #include "r700_oglprog.h" #include "r700_fragprog.h" #include "r700_vertprog.h" diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index ded5f51a6a..cc90e8dfb7 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -47,7 +47,6 @@ #include "r600_context.h" #include "r600_cmdbuf.h" -#include "r700_chip.h" #include "r700_tex.h" #include "r700_vertprog.h" @@ -109,7 +108,7 @@ static GLboolean r700SetupShaders(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); GLuint exportCount; @@ -133,7 +132,7 @@ GLboolean r700SendTextureState(context_t *context) { unsigned int i; - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); #if 0 /* to be enabled */ for(i=0; ichipobj.pvChipObj); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); #if 1 BATCH_LOCALS(&context->radeon); diff --git a/src/mesa/drivers/dri/r600/r700_shader.c b/src/mesa/drivers/dri/r600/r700_shader.c index 446f9f959b..b4fd51c137 100644 --- a/src/mesa/drivers/dri/r600/r700_shader.c +++ b/src/mesa/drivers/dri/r600/r700_shader.c @@ -35,7 +35,6 @@ #include "main/glheader.h" #include "r600_context.h" -#include "r700_chip.h" #include "r700_debug.h" #include "r700_shader.h" diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index e3e78df16c..768e184623 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -49,7 +49,6 @@ #include "r600_context.h" -#include "r700_chip.h" #include "r700_state.h" #include "r700_fragprog.h" @@ -190,7 +189,7 @@ static void r700InvalidateState(GLcontext * ctx, GLuint new_state) //----------- { context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); _swrast_InvalidateState(ctx, new_state); _swsetup_InvalidateState(ctx, new_state); @@ -239,7 +238,7 @@ static void r700SetDepthState(GLcontext * ctx) { context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); if (ctx->Depth.Test) { @@ -322,7 +321,7 @@ static void r700BlendFuncSeparate(GLcontext * ctx, static void r700UpdateCulling(GLcontext * ctx) { - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(R700_CONTEXT(ctx)->chipobj.pvChipObj); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&R700_CONTEXT(ctx)->hw); CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); CLEARbit(r700->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit); @@ -490,7 +489,7 @@ static void r700Viewport(GLcontext * ctx, { context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); __DRIdrawablePrivate *dPriv = context->radeon.dri.drawable; @@ -524,12 +523,7 @@ static void r700Viewport(GLcontext * ctx, tz = v[MAT_TZ] * scale; /* TODO : Need DMA flush as well. */ -#if 0 /* to be enabled */ - if(context->cmdbuf.count_used > 0) - { - (context->chipobj.FlushCmdBuffer)(context); - } -#endif /* to be enabled */ + r700->PA_CL_VPORT_XSCALE.u32All = *((unsigned int*)(&sx)); r700->PA_CL_VPORT_XOFFSET.u32All = *((unsigned int*)(&tx)); @@ -591,7 +585,7 @@ static void r700Scissor(GLcontext* ctx, GLint x, GLint y, GLsizei w, GLsizei h) void r700SetRenderTarget(context_t *context) { - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); struct radeon_renderbuffer *rrb; unsigned int nPitchInPixel; @@ -774,7 +768,7 @@ void r700InitState(GLcontext * ctx) //------------------- { context_t *context = R700_CONTEXT(ctx); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); /* Turn off vgt reuse */ r700->VGT_REUSE_OFF.u32All = 0; diff --git a/src/mesa/drivers/dri/r600/r700_tex.c b/src/mesa/drivers/dri/r600/r700_tex.c index e2de8faf77..21c9379729 100644 --- a/src/mesa/drivers/dri/r600/r700_tex.c +++ b/src/mesa/drivers/dri/r600/r700_tex.c @@ -40,7 +40,6 @@ #include "radeon_common.h" #include "r600_context.h" -#include "r700_chip.h" #include "r700_state.h" diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 2861918c42..a827de197b 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -41,7 +41,6 @@ #include "r600_context.h" #include "r600_cmdbuf.h" -#include "r700_chip.h" #include "r700_debug.h" #include "r700_vertprog.h" @@ -339,7 +338,7 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx) BATCH_LOCALS(&context->radeon); - R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(context->chipobj.pvChipObj); + R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); struct r700_vertex_program *vp = (struct r700_vertex_program *)ctx->VertexProgram._Current; -- cgit v1.2.3 From 8e28d64e9c7cdc4d07ffe4a79c9ef9fff4a27df6 Mon Sep 17 00:00:00 2001 From: Cooper Yuan Date: Sun, 31 May 2009 16:51:07 +0800 Subject: R6xx/r7xx: Fix blinn's lighting shader --- src/mesa/drivers/dri/r600/r700_assembler.c | 38 +++++++++++++++--------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index a564d6f099..9f1d1a3a44 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2883,15 +2883,15 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* dst.w = log(src.y) */ + /* dst.z = log(src.y) */ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED; pAsm->D.dst.math = 1; pAsm->D.dst.rtype = dstType; pAsm->D.dst.reg = dstReg; pAsm->D.dst.writex = 0; pAsm->D.dst.writey = 0; - pAsm->D.dst.writez = 0; - pAsm->D.dst.writew = 1; + pAsm->D.dst.writez = 1; + pAsm->D.dst.writew = 0; pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); @@ -2905,15 +2905,15 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */ + /* dst.w = MUL_LIT(src.w, dst.z, src.x ) */ pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT; pAsm->D.dst.op3 = 1; - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; + pAsm->D.dst.rtype = dstType; + pAsm->D.dst.reg = dstReg; + pAsm->D.dst.writex = 0; pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; - pAsm->D.dst.writew = 0; + pAsm->D.dst.writew = 1; pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; @@ -2928,10 +2928,10 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[1].src.reg = dstReg; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); noneg_PVSSRC(&(pAsm->S[1].src)); - pAsm->S[1].src.swizzlex = SQ_SEL_W; - pAsm->S[1].src.swizzley = SQ_SEL_W; - pAsm->S[1].src.swizzlez = SQ_SEL_W; - pAsm->S[1].src.swizzlew = SQ_SEL_W; + pAsm->S[1].src.swizzlex = SQ_SEL_Z; + pAsm->S[1].src.swizzley = SQ_SEL_Z; + pAsm->S[1].src.swizzlez = SQ_SEL_Z; + pAsm->S[1].src.swizzlew = SQ_SEL_Z; pAsm->S[2].src.rtype = srcType; pAsm->S[2].src.reg = srcReg; @@ -2947,7 +2947,7 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* dst.z = exp(tmp.x) */ + /* dst.z = exp(dst.z) */ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; pAsm->D.dst.math = 1; pAsm->D.dst.rtype = dstType; @@ -2957,14 +2957,14 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->D.dst.writez = 1; pAsm->D.dst.writew = 0; - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - pAsm->S[0].src.reg = tmp; + pAsm->S[0].src.rtype = dstType; + pAsm->S[0].src.reg = dstReg; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_X; - pAsm->S[0].src.swizzley = SQ_SEL_X; - pAsm->S[0].src.swizzlez = SQ_SEL_X; - pAsm->S[0].src.swizzlew = SQ_SEL_X; + pAsm->S[0].src.swizzlex = SQ_SEL_Z; + pAsm->S[0].src.swizzley = SQ_SEL_Z; + pAsm->S[0].src.swizzlez = SQ_SEL_Z; + pAsm->S[0].src.swizzlew = SQ_SEL_Z; if( GL_FALSE == next_ins(pAsm) ) { -- cgit v1.2.3 From f4646f3247b721d08a2e01da4b2c8a808663d765 Mon Sep 17 00:00:00 2001 From: Cooper Yuan Date: Tue, 14 Jul 2009 10:39:07 +0800 Subject: R6xx/r7xx: Fix specular lighting issue --- src/mesa/drivers/dri/r600/r700_assembler.c | 42 ++++++++++++++++-------------- 1 file changed, 23 insertions(+), 19 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 4666518824..1d41c5cf78 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2883,14 +2883,17 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* dst.z = log(src.y) */ + /* before: dst.w = log(src.y) + * after : dst.x = log(src.y) + * why change dest register is that dst.w has been initialized as 1 before + */ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED; pAsm->D.dst.math = 1; pAsm->D.dst.rtype = dstType; pAsm->D.dst.reg = dstReg; - pAsm->D.dst.writex = 0; + pAsm->D.dst.writex = 1; pAsm->D.dst.writey = 0; - pAsm->D.dst.writez = 1; + pAsm->D.dst.writez = 0; pAsm->D.dst.writew = 0; pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; @@ -2905,15 +2908,16 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* dst.w = MUL_LIT(src.w, dst.z, src.x ) */ + /* before: tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */ + /* after : tmp.x = amd MUL_LIT(src.w, dst.x, src.x ) */ pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT; pAsm->D.dst.op3 = 1; - pAsm->D.dst.rtype = dstType; - pAsm->D.dst.reg = dstReg; - pAsm->D.dst.writex = 0; + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; pAsm->D.dst.writey = 0; pAsm->D.dst.writez = 0; - pAsm->D.dst.writew = 1; + pAsm->D.dst.writew = 0; pAsm->S[0].src.rtype = srcType; pAsm->S[0].src.reg = srcReg; @@ -2928,10 +2932,10 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->S[1].src.reg = dstReg; setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE); noneg_PVSSRC(&(pAsm->S[1].src)); - pAsm->S[1].src.swizzlex = SQ_SEL_Z; - pAsm->S[1].src.swizzley = SQ_SEL_Z; - pAsm->S[1].src.swizzlez = SQ_SEL_Z; - pAsm->S[1].src.swizzlew = SQ_SEL_Z; + pAsm->S[1].src.swizzlex = SQ_SEL_X; + pAsm->S[1].src.swizzley = SQ_SEL_X; + pAsm->S[1].src.swizzlez = SQ_SEL_X; + pAsm->S[1].src.swizzlew = SQ_SEL_X; pAsm->S[2].src.rtype = srcType; pAsm->S[2].src.reg = srcReg; @@ -2947,7 +2951,7 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) return GL_FALSE; } - /* dst.z = exp(dst.z) */ + /* dst.z = exp(tmp.x) */ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE; pAsm->D.dst.math = 1; pAsm->D.dst.rtype = dstType; @@ -2957,14 +2961,14 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm) pAsm->D.dst.writez = 1; pAsm->D.dst.writew = 0; - pAsm->S[0].src.rtype = dstType; - pAsm->S[0].src.reg = dstReg; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); noneg_PVSSRC(&(pAsm->S[0].src)); - pAsm->S[0].src.swizzlex = SQ_SEL_Z; - pAsm->S[0].src.swizzley = SQ_SEL_Z; - pAsm->S[0].src.swizzlez = SQ_SEL_Z; - pAsm->S[0].src.swizzlew = SQ_SEL_Z; + pAsm->S[0].src.swizzlex = SQ_SEL_X; + pAsm->S[0].src.swizzley = SQ_SEL_X; + pAsm->S[0].src.swizzlez = SQ_SEL_X; + pAsm->S[0].src.swizzlew = SQ_SEL_X; if( GL_FALSE == next_ins(pAsm) ) { -- cgit v1.2.3 From cc893d9a98255d3c26df7123ba5cc02e478c9328 Mon Sep 17 00:00:00 2001 From: Kevin DeKorte Date: Mon, 20 Jul 2009 18:56:47 -0400 Subject: r600: fix dst reg indexing This fixes segfaults in apps like teapot and tunnel --- src/mesa/drivers/dri/r600/r700_assembler.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 1d41c5cf78..2d40dfa708 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2193,6 +2193,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) GLboolean next_ins(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + uint index; if( GL_TRUE == IsTex(pILInst->Opcode) ) { @@ -2213,14 +2214,20 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) if(pAsm->D.dst.rtype == DST_REG_OUT) { + if (pAsm->starting_export_register_number >= pAsm->D.dst.reg) { + index = 0; + } else { + index = pAsm->D.dst.reg - pAsm->starting_export_register_number; + } + if(pAsm->D.dst.op3) { // There is no mask for OP3 instructions, so all channels are written - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; + pAsm->pucOutMask[index] = 0xF; } else { - pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] + pAsm->pucOutMask[index] |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; } } -- cgit v1.2.3 From 7edb2a9eef698c386042e6cead68ac516ec15616 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 21 Jul 2009 17:44:36 -0400 Subject: Revert "r600: fix dst reg indexing" This reverts commit cc893d9a98255d3c26df7123ba5cc02e478c9328. Richard has the proper fix. --- src/mesa/drivers/dri/r600/r700_assembler.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 2d40dfa708..1d41c5cf78 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2193,7 +2193,6 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) GLboolean next_ins(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - uint index; if( GL_TRUE == IsTex(pILInst->Opcode) ) { @@ -2214,20 +2213,14 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) if(pAsm->D.dst.rtype == DST_REG_OUT) { - if (pAsm->starting_export_register_number >= pAsm->D.dst.reg) { - index = 0; - } else { - index = pAsm->D.dst.reg - pAsm->starting_export_register_number; - } - if(pAsm->D.dst.op3) { // There is no mask for OP3 instructions, so all channels are written - pAsm->pucOutMask[index] = 0xF; + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF; } else { - pAsm->pucOutMask[index] + pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask; } } -- cgit v1.2.3 From 0f6d3aece7b193dcacbd94f87ac734ee3a44b366 Mon Sep 17 00:00:00 2001 From: Cooper Yuan Date: Wed, 29 Jul 2009 15:23:56 +0800 Subject: R6xx/r7xx: VS export fog color as parameter --- src/mesa/drivers/dri/r600/r700_assembler.c | 16 ++++++++++++++++ src/mesa/drivers/dri/r600/r700_fragprog.c | 6 ++++++ 2 files changed, 22 insertions(+) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 1d41c5cf78..ebd5ff106b 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -4014,6 +4014,22 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, export_starting_index++; } + unBit = 1 << VERT_RESULT_FOGC; + if(OutputsWritten & unBit) + { + if( GL_FALSE == Process_Export(pR700AsmCode, + SQ_EXPORT_PARAM, + export_starting_index, + 1, + pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC], + GL_FALSE) ) + { + return GL_FALSE; + } + + export_starting_index++; + } + for(i=0; i<8; i++) { unBit = 1 << (VERT_RESULT_TEX0 + i); diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 88e66491ba..a473dfe888 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -67,6 +67,12 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++; } + unBit = 1 << FRAG_ATTRIB_FOGC; + if(mesa_fp->Base.InputsRead & unBit) + { + pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++; + } + for(i=0; i<8; i++) { unBit = 1 << (FRAG_ATTRIB_TEX0 + i); -- cgit v1.2.3 From 86ac0ae0b09566d0cd66dcfc17192780f7e2df03 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 1 Aug 2009 20:55:43 -0400 Subject: r600: fix rectangle textures It might be better to add an instruction to normalize the coordinates for rectanglular textures as there are some limitations to wrap modes on unnormalized tex coords. fixes texrect --- src/mesa/drivers/dri/r600/r700_assembler.c | 36 +++++++++++++++++++++--------- src/mesa/drivers/dri/r600/r700_assembler.h | 2 +- 2 files changed, 27 insertions(+), 11 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index ebd5ff106b..0abf112b55 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1205,7 +1205,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) return GL_TRUE; } -GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm) +GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized) { PVSSRC * texture_coordinate_source; PVSSRC * texture_unit_source; @@ -1227,10 +1227,18 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm) tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg; tex_instruction_ptr->m_Word1.f.lod_bias = 0x0; - tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED; - tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED; - tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED; - tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED; + if (normalized) { + tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED; + } else { + /* XXX: UNNORMALIZED tex coords have limited wrap modes */ + tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED; + tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED; + } tex_instruction_ptr->m_Word2.f.offset_x = 0x0; tex_instruction_ptr->m_Word2.f.offset_y = 0x0; @@ -2196,11 +2204,19 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) if( GL_TRUE == IsTex(pILInst->Opcode) ) { - if( GL_FALSE == assemble_tex_instruction(pAsm) ) - { - r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction"); - return GL_FALSE; - } + if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { + if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) + { + r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction"); + return GL_FALSE; + } + } else { + if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) ) + { + r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction"); + return GL_FALSE; + } + } } else { //ALU diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h index e9b21b802e..f9c4d849c6 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.h +++ b/src/mesa/drivers/dri/r600/r700_assembler.h @@ -424,7 +424,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, GLboolean assemble_dst(r700_AssemblerBase *pAsm); GLboolean tex_dst(r700_AssemblerBase *pAsm); GLboolean tex_src(r700_AssemblerBase *pAsm); -GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm); +GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized); void initialize(r700_AssemblerBase *pAsm); GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, int source_index, -- cgit v1.2.3 From 1e52b8b4e02c887cb493e5e2bde902b54e9c72fd Mon Sep 17 00:00:00 2001 From: Cooper Yuan Date: Wed, 12 Aug 2009 17:39:18 +0800 Subject: r600: A shader is bound that exports Z as a float into Red channel --- src/mesa/drivers/dri/r600/r700_assembler.c | 3 +++ src/mesa/drivers/dri/r600/r700_fragprog.c | 1 + 2 files changed, 4 insertions(+) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 0abf112b55..eaacd06113 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -3839,6 +3839,9 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm, if (export_count == 1) { ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number]; + /* exports Z as a float into Red channel */ + if (GL_TRUE == is_depth_export) + ucWriteMask = 0x1; if( (ucWriteMask & 0x1) != 0) { diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index efeea905c1..6249bde6f1 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -118,6 +118,7 @@ void Map_Fragment_Program(r700_AssemblerBase *pAsm, pAsm->uiFP_OutputMap[FRAG_RESULT_DEPTH] = pAsm->number_used_registers++; pAsm->number_of_exports++; pAsm->number_of_colorandz_exports++; + pAsm->pR700Shader->depthIsExported = 1; } pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports); -- cgit v1.2.3 From 5d10890795d9bddc8cafc4afb19cacf164d6e667 Mon Sep 17 00:00:00 2001 From: Pauli Nieminen Date: Tue, 25 Aug 2009 21:46:32 +0300 Subject: radeon/r600: Fix remaining warnings when building 64 bit binary. --- src/mesa/drivers/dri/r600/r600_cmdbuf.c | 2 ++ src/mesa/drivers/dri/r600/r700_assembler.c | 4 ++-- src/mesa/drivers/dri/radeon/radeon_cmdbuf.h | 7 ++++--- src/mesa/drivers/dri/radeon/radeon_dma.c | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c index a330d5b151..050d7bc6d0 100644 --- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c +++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c @@ -304,6 +304,7 @@ static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */ return 0; } +#if 0 static void dump_cmdbuf(struct radeon_cs *cs) { int i; @@ -314,6 +315,7 @@ static void dump_cmdbuf(struct radeon_cs *cs) fprintf(stderr,"--end--\n"); } +#endif static int r600_cs_emit(struct radeon_cs *cs) { diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index eaacd06113..834c23593b 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2132,7 +2132,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew; break; default: - alu_instruction_ptr->m_Word1_OP2.f6.write_mask = SQ_SEL_MASK; + alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK; break; } alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF; @@ -2161,7 +2161,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew; break; default: - alu_instruction_ptr->m_Word1_OP2.f.write_mask = SQ_SEL_MASK; + alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK; break; } alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF; diff --git a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h index 410df4d665..6fcd1ce7ca 100644 --- a/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h +++ b/src/mesa/drivers/dri/radeon/radeon_cmdbuf.h @@ -54,11 +54,12 @@ void rcommonBeginBatch(radeonContextPtr rmesa, */ #define OUT_BATCH_RELOC(data, bo, offset, rd, wd, flags) \ do { \ - if (0 && offset) { \ + int __offset = (offset); \ + if (0 && __offset) { \ fprintf(stderr, "(%s:%s:%d) offset : %d\n", \ - __FILE__, __FUNCTION__, __LINE__, offset); \ + __FILE__, __FUNCTION__, __LINE__, __offset); \ } \ - radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, offset); \ + radeon_cs_write_dword(b_l_rmesa->cmdbuf.cs, __offset); \ radeon_cs_write_reloc(b_l_rmesa->cmdbuf.cs, \ bo, rd, wd, flags); \ if (!b_l_rmesa->radeonScreen->kernel_mm) \ diff --git a/src/mesa/drivers/dri/radeon/radeon_dma.c b/src/mesa/drivers/dri/radeon/radeon_dma.c index f15013c33b..51f764cf47 100644 --- a/src/mesa/drivers/dri/radeon/radeon_dma.c +++ b/src/mesa/drivers/dri/radeon/radeon_dma.c @@ -333,7 +333,7 @@ void radeonReleaseDmaRegions(radeonContextPtr rmesa) foreach(dma_bo, &rmesa->dma.reserved) ++reserved; - fprintf(stderr, "%s: free %u, wait %u, reserved %u, minimum_size: %u\n", + fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n", __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size); } -- cgit v1.2.3 From aabb36fdd58ff891e3a0946e2326d458f5a4d832 Mon Sep 17 00:00:00 2001 From: Pauli Nieminen Date: Sun, 30 Aug 2009 22:08:38 +0300 Subject: r600: Convert to shared debug code and add a few new debug messages. There is only a few functions that have debugging enabled now. --- src/mesa/drivers/dri/r600/Makefile | 9 ++-- src/mesa/drivers/dri/r600/r600_context.c | 10 +++- src/mesa/drivers/dri/r600/r600_emit.c | 28 ++++++----- src/mesa/drivers/dri/r600/r600_tex.c | 20 ++++---- src/mesa/drivers/dri/r600/r600_texstate.c | 8 ++-- src/mesa/drivers/dri/r600/r700_assembler.c | 72 ++++++++++++++--------------- src/mesa/drivers/dri/r600/r700_chip.c | 34 ++++++++++++++ src/mesa/drivers/dri/r600/r700_clear.c | 5 +- src/mesa/drivers/dri/r600/r700_debug.c | 35 +++----------- src/mesa/drivers/dri/r600/r700_debug.h | 60 ++---------------------- src/mesa/drivers/dri/r600/r700_oglprog.c | 6 +++ src/mesa/drivers/dri/r600/r700_render.c | 10 ++-- src/mesa/drivers/dri/r600/r700_shaderinst.c | 14 +++--- src/mesa/drivers/dri/r600/r700_vertprog.c | 3 +- 14 files changed, 141 insertions(+), 173 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile index 3c3100ab91..d925a2dfe3 100644 --- a/src/mesa/drivers/dri/r600/Makefile +++ b/src/mesa/drivers/dri/r600/Makefile @@ -27,16 +27,17 @@ COMMON_SOURCES = \ ../common/dri_util.c RADEON_COMMON_SOURCES = \ - radeon_texture.c \ + radeon_bo_legacy.c \ radeon_common_context.c \ radeon_common.c \ + radeon_cs_legacy.c \ radeon_dma.c \ + radeon_debug.c \ + radeon_fbo.c \ radeon_lock.c \ - radeon_bo_legacy.c \ - radeon_cs_legacy.c \ radeon_mipmap_tree.c \ radeon_span.c \ - radeon_fbo.c \ + radeon_texture.c \ radeon_queryobj.c DRIVER_SOURCES = \ diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 6a066f3510..251c124cbf 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -56,6 +56,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "drivers/common/driverfuncs.h" +#include "radeon_debug.h" #include "r600_context.h" #include "radeon_common_context.h" #include "radeon_span.h" @@ -225,8 +226,10 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, /* Allocate the R600 context */ r600 = (context_t*) CALLOC(sizeof(*r600)); - if (!r600) + if (!r600) { + radeon_error("Failed to allocate memory for context.\n"); return GL_FALSE; + } if (!(screen->chip_flags & RADEON_CHIPSET_TCL)) hw_tcl_on = future_hw_tcl_on = 0; @@ -255,6 +258,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, if (!radeonInitContext(&r600->radeon, &functions, glVisual, driContextPriv, sharedContextPrivate)) { + radeon_error("Initializing context failed.\n"); FREE(r600); return GL_FALSE; } @@ -347,6 +351,8 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; + radeon_init_debug(); + driInitExtensions(ctx, card_extensions, GL_TRUE); if (r600->radeon.radeonScreen->kernel_mm) driInitExtensions(ctx, mm_extensions, GL_FALSE); @@ -375,7 +381,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual, TNL_CONTEXT(ctx)->Driver.RunPipeline = r600RunPipeline; if (driQueryOptionb(&r600->radeon.optionCache, "no_rast")) { - fprintf(stderr, "disabling 3D acceleration\n"); + radeon_warning("disabling 3D acceleration\n"); #if R200_MERGED FALLBACK(&r600->radeon, RADEON_FALLBACK_DISABLE, 1); #endif diff --git a/src/mesa/drivers/dri/r600/r600_emit.c b/src/mesa/drivers/dri/r600/r600_emit.c index f8c56e7774..5c250c2418 100644 --- a/src/mesa/drivers/dri/r600/r600_emit.c +++ b/src/mesa/drivers/dri/r600/r600_emit.c @@ -60,28 +60,30 @@ GLboolean r600EmitShader(GLcontext * ctx, radeonContextPtr radeonctx = RADEON_CONTEXT(ctx); struct radeon_bo * pbo; uint32_t *out; - shader_again_alloc: pbo = radeon_bo_open(radeonctx->radeonScreen->bom, - 0, - sizeinDWORD * 4, - 256, - RADEON_GEM_DOMAIN_GTT, - 0); + 0, + sizeinDWORD * 4, + 256, + RADEON_GEM_DOMAIN_GTT, + 0); + + radeon_print(RADEON_SHADER, RADEON_NORMAL, "%s %p size %d: %s\n", __func__, pbo, sizeinDWORD, szShaderUsage); if (!pbo) { + radeon_print(RADEON_MEMORY | RADEON_CS, RADEON_IMPORTANT, "No memory for buffer object. Flushing command buffer.\n"); rcommonFlushCmdBuf(radeonctx, __FUNCTION__); goto shader_again_alloc; } radeon_cs_space_add_persistent_bo(radeonctx->cmdbuf.cs, - pbo, - RADEON_GEM_DOMAIN_GTT, 0); + pbo, + RADEON_GEM_DOMAIN_GTT, 0); - if (radeon_cs_space_check_with_bo(radeonctx->cmdbuf.cs, - pbo, - RADEON_GEM_DOMAIN_GTT, 0)) { - fprintf(stderr,"failure to revalidate BOs - badness\n"); + if (radeon_cs_space_check_with_bo(radeonctx->cmdbuf.cs, + pbo, + RADEON_GEM_DOMAIN_GTT, 0)) { + radeon_error("failure to revalidate BOs - badness\n"); return GL_FALSE; } @@ -103,6 +105,8 @@ GLboolean r600DeleteShader(GLcontext * ctx, { struct radeon_bo * pbo = (struct radeon_bo *)shaderbo; + radeon_print(RADEON_SHADER, RADEON_NORMAL, "%s: %p\n", __func__, pbo); + if (pbo) { if (pbo->ptr) radeon_bo_unmap(pbo); diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index a06a1fa921..d105b90cd1 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -67,7 +67,7 @@ static unsigned int translate_wrap_mode(GLenum wrapmode) case GL_MIRROR_CLAMP_TO_EDGE_EXT: return SQ_TEX_MIRROR_ONCE_LAST_TEXEL; case GL_MIRROR_CLAMP_TO_BORDER_EXT: return SQ_TEX_MIRROR_ONCE_BORDER; default: - _mesa_problem(NULL, "bad wrap mode in %s", __FUNCTION__); + radeon_error("bad wrap mode in %s", __FUNCTION__); return 0; } } @@ -208,8 +208,7 @@ static void r600SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloa | R300_TX_MIN_FILTER_ANISO | R300_TX_MIN_FILTER_MIP_LINEAR | aniso_filter(anisotropy);*/ - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "Using maximum anisotropy of %f\n", anisotropy); + radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "Using maximum anisotropy of %f\n", anisotropy); return; } @@ -288,10 +287,9 @@ static void r600TexParameter(GLcontext * ctx, GLenum target, { radeonTexObj* t = radeon_tex_obj(texObj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { - fprintf(stderr, "%s( %s )\n", __FUNCTION__, + radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_VERBOSE, + "%s( %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr(pname)); - } switch (pname) { case GL_TEXTURE_MIN_FILTER: @@ -351,11 +349,10 @@ static void r600DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj) context_t* rmesa = R700_CONTEXT(ctx); radeonTexObj* t = radeon_tex_obj(texObj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { - fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, + radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL, + "%s( %p (target = %s) )\n", __FUNCTION__, (void *)texObj, _mesa_lookup_enum_by_nr(texObj->Target)); - } if (rmesa) { int i; @@ -393,10 +390,9 @@ static struct gl_texture_object *r600NewTextureObject(GLcontext * ctx, radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj); - if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) { - fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__, + radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL, + "%s( %p (target = %s) )\n", __FUNCTION__, t, _mesa_lookup_enum_by_nr(target)); - } _mesa_initialize_texture_object(&t->base, name, target); t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy; diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 9f1bf45246..237eaa8249 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -567,7 +567,7 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex if (!t->image_override) { if (!r600GetTexFormat(texObj, firstImage->TexFormat->MesaFormat)) { - _mesa_problem(NULL, "unexpected texture format in %s", + radeon_error("unexpected texture format in %s\n", __FUNCTION__); return; } @@ -593,7 +593,7 @@ static void setup_hardware_state(context_t *rmesa, struct gl_texture_object *tex SETfield(t->SQ_TEX_RESOURCE1, 0, TEX_DEPTH_shift, TEX_DEPTH_mask); break; default: - _mesa_problem(NULL, "unexpected texture target type in %s", __FUNCTION__); + radeon_error("unexpected texture target type in %s\n", __FUNCTION__); return; } @@ -675,9 +675,7 @@ GLboolean r600ValidateBuffers(GLcontext * ctx) continue; if (!r600_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) { - _mesa_warning(ctx, - "failed to validate texture for unit %d.\n", - i); + radeon_warning("failed to validate texture for unit %d.\n", i); } t = radeon_tex_obj(ctx->Texture.Unit[i]._Current); if (t->image_override && t->bo) diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 834c23593b..2d8480daaf 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -33,8 +33,8 @@ #include "main/mtypes.h" #include "main/imports.h" +#include "radeon_debug.h" #include "r600_context.h" -#include "r700_debug.h" #include "r700_assembler.h" @@ -366,8 +366,8 @@ unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm) case SQ_OP2_INST_COS: return 1; - default: r700_error(TODO_ASM_NEEDIMPINST, - "Need instruction operand number. \n");; + default: radeon_error( + "Need instruction operand number for %x.\n", pAsm->D.dst.opcode); }; return 3; @@ -531,7 +531,7 @@ int check_current_clause(r700_AssemblerBase* pAsm, case CF_EMPTY_CLAUSE: break; default: - r700_error(ERROR_ASM_VTX_CLAUSE, + radeon_error( "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type); return GL_FALSE; } @@ -565,7 +565,7 @@ int check_current_clause(r700_AssemblerBase* pAsm, } else { - r700_error(ERROR_ASM_ALLOCEXPORTCF, + radeon_error( "Error allocating new EXPORT CF instruction in check_current_clause. \n"); return GL_FALSE; } @@ -578,7 +578,7 @@ int check_current_clause(r700_AssemblerBase* pAsm, pAsm->cf_current_clause_type = CF_OTHER_CLAUSE; break; default: - r700_error(ERROR_ASM_UNKOWNCLAUSE, + radeon_error( "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type); return GL_FALSE; } @@ -611,7 +611,7 @@ GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm, } else { - r700_error(ERROR_ASM_ALLOCVTXCF, "Could not allocate a new VFetch CF instruction."); + radeon_error("Could not allocate a new VFetch CF instruction.\n"); return GL_FALSE; } @@ -661,7 +661,7 @@ GLboolean add_tex_instruction(r700_AssemblerBase* pAsm, } else { - r700_error(ERROR_ASM_ALLOCTEXCF, "Could not allocate a new TEX CF instruction."); + radeon_error("Could not allocate a new TEX CF instruction.\n"); return GL_FALSE; } @@ -1047,7 +1047,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm, } break; default: - r700_error(ERROR_ASM_SRCARGUMENT, "Invalid source argument type"); + radeon_error("Invalid source argument type\n"); return GL_FALSE; } } @@ -1094,7 +1094,7 @@ GLboolean assemble_dst(r700_AssemblerBase *pAsm) } break; default: - r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type"); + radeon_error("Invalid destination output argument type\n"); return GL_FALSE; } @@ -1134,7 +1134,7 @@ GLboolean tex_dst(r700_AssemblerBase *pAsm) } else { - r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type"); + radeon_error("Invalid destination output argument type\n"); return GL_FALSE; } @@ -1188,7 +1188,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) } else { - r700_error(ERROR_ASM_BADTEXSRC, "Invalid source texcoord for TEX instruction"); + radeon_error("Invalid source texcoord for TEX instruction\n"); return GL_FALSE; } @@ -1269,7 +1269,7 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize } else { - r700_error(ERROR_ASM_TEXDSTBADTYPE, "Only temp destination registers supported for TEX dest regs."); + radeon_error("Only temp destination registers supported for TEX dest regs.\n"); return GL_FALSE; } @@ -1362,7 +1362,7 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, } else { - r700_error(ERROR_ASM_ALUSRCBADTYPE, "Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.", + radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n", source_index, pSource->rtype); return GL_FALSE; } @@ -1397,7 +1397,7 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, src_chan = SQ_CHAN_X; break; default: - r700_error(ERROR_ASM_ALUSRCSELECT, "Unknown source select value (%d) in assemble_alu_src()."); + radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle); return GL_FALSE; break; } @@ -1432,7 +1432,7 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr, alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg; break; default: - r700_error(ERROR_ASM_ALUSRCNUMBER, "Only three sources allowed in ALU opcodes."); + radeon_error("Only three sources allowed in ALU opcodes.\n"); return GL_FALSE; break; } @@ -1467,7 +1467,7 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm, } else { - r700_error(ERROR_ASM_ALLOCALUCF, "Could not allocate a new ALU CF instruction."); + radeon_error("Could not allocate a new ALU CF instruction.\n"); return GL_FALSE; } @@ -1635,7 +1635,7 @@ GLboolean reserve_cfile(r700_AssemblerBase* pAsm, } else { - r700_error(ERROR_ASM_CONSTCHANNEL, "All cfile read ports are used, cannot reference C$sel, channel $chan."); + radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n"); return GL_FALSE; } return GL_TRUE; @@ -1649,7 +1649,7 @@ GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint } else if(pAsm->hw_gpr[cycle][chan] != (int)sel) { - r700_error(ERROR_ASM_BADGPRRESERVE, "Another scalar operation has already used GPR read port for given channel"); + radeon_error("Another scalar operation has already used GPR read port for given channel\n"); return GL_FALSE; } @@ -1689,7 +1689,7 @@ GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* p } break; default: - r700_error(ERROR_ASM_BADSCALARBZ, "Bad Scalar bank swizzle value"); + radeon_error("Bad Scalar bank swizzle value\n"); break; } @@ -1737,7 +1737,7 @@ GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* p } break; default: - r700_error(ERROR_ASM_BADVECTORBZ, "Bad Vec bank swizzle value"); + radeon_error("Bad Vec bank swizzle value\n"); return GL_FALSE; break; } @@ -2056,7 +2056,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm) } else { - r700_error(ERROR_ASM_ALUDSTBADTYPE, "Only temp destination registers supported for ALU dest regs."); + radeon_error("Only temp destination registers supported for ALU dest regs.\n"); return GL_FALSE; } @@ -2207,13 +2207,13 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) { - r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction"); + radeon_error("Error assembling TEX instruction\n"); return GL_FALSE; } } else { if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) ) { - r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction"); + radeon_error("Error assembling TEX instruction\n"); return GL_FALSE; } } @@ -2222,7 +2222,7 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) { //ALU if( GL_FALSE == assemble_alu_instruction(pAsm) ) { - r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling ALU instruction"); + radeon_error("Error assembling ALU instruction\n"); return GL_FALSE; } } @@ -2367,7 +2367,7 @@ GLboolean assemble_ADD(r700_AssemblerBase *pAsm) GLboolean assemble_BAD(char *opcode_str) { - r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction (%s)", opcode_str); + radeon_error("Not yet implemented instruction (%s)\n", opcode_str); return GL_FALSE; } @@ -3381,7 +3381,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) if (GL_TRUE == src_const) { - r700_error(TODO_ASM_CONSTTEXADDR, "TODO: Texture coordinates from a constant register not supported."); + radeon_error("TODO: Texture coordinates from a constant register not supported.\n"); return GL_FALSE; } @@ -3391,7 +3391,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; case OPCODE_TXB: - r700_error(TODO_ASM_TXB, "do not support TXB yet"); + radeon_error("do not support TXB yet\n"); return GL_FALSE; break; case OPCODE_TXP: @@ -3399,7 +3399,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; default: - r700_error(ERROR_ASM_BADTEXINST, "Internal error: bad texture op (not TEX)"); + radeon_error("Internal error: bad texture op (not TEX)\n"); return GL_FALSE; break; } @@ -3581,12 +3581,12 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, break; case OPCODE_ARL: - r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARL "); + radeon_error("Not yet implemented instruction OPCODE_ARL \n"); //if ( GL_FALSE == assemble_BAD("ARL") ) return GL_FALSE; break; case OPCODE_ARR: - r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARR "); + radeon_error("Not yet implemented instruction OPCODE_ARR \n"); //if ( GL_FALSE == assemble_BAD("ARR") ) return GL_FALSE; break; @@ -3617,7 +3617,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_EXP: - r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_EXP "); + radeon_error("Not yet implemented instruction OPCODE_EXP \n"); //if ( GL_FALSE == assemble_BAD("EXP") ) return GL_FALSE; break; // approx of EX2 @@ -3653,7 +3653,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_LOG: - r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_LOG "); + radeon_error("Not yet implemented instruction OPCODE_LOG \n"); //if ( GL_FALSE == assemble_BAD("LOG") ) return GL_FALSE; break; // approx of LG2 @@ -3752,7 +3752,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_FALSE; break; case OPCODE_ELSE : - r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ELSE "); + radeon_error("Not yet implemented instruction OPCODE_ELSE \n"); //if ( GL_FALSE == assemble_BAD("ELSE") ) return GL_FALSE; break; @@ -3774,7 +3774,7 @@ GLboolean AssembleInstr(GLuint uiNumberInsts, return GL_TRUE; default: - r700_error(ERROR_ASM_UNKNOWNILINST, "internal: unknown instruction"); + radeon_error("internal: unknown instruction\n"); return GL_FALSE; } } @@ -3818,7 +3818,7 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm, break; default: - r700_error(ERROR_ASM_BADEXPORTTYPE, "Unknown export type: %d", type); + radeon_error("Unknown export type: %d\n", type); return GL_FALSE; break; } diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 16f1a3df36..4fc624ed0b 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -49,6 +49,8 @@ static void r700SendTexState(GLcontext *ctx, struct radeon_state_atom *atom) unsigned int i; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); + for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { radeonTexObj *t = r700->textures[i]; if (t) { @@ -93,6 +95,7 @@ static void r700SendTexSamplerState(GLcontext *ctx, struct radeon_state_atom *at R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); unsigned int i; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { radeonTexObj *t = r700->textures[i]; @@ -115,6 +118,7 @@ static void r700SendTexBorderColorState(GLcontext *ctx, struct radeon_state_atom R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); unsigned int i; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) { radeonTexObj *t = r700->textures[i]; @@ -141,6 +145,7 @@ static void r700SetupVTXConstants(GLcontext * ctx, context_t *context = R700_CONTEXT(ctx); struct radeon_aos * paos = (struct radeon_aos *)pAos; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); unsigned int uSQ_VTX_CONSTANT_WORD0_0; unsigned int uSQ_VTX_CONSTANT_WORD1_0; @@ -205,6 +210,7 @@ void r700SetupStreams(GLcontext *ctx) TNLcontext *tnl = TNL_CONTEXT(ctx); struct vertex_buffer *vb = &tnl->vb; unsigned int i, j = 0; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); R600_STATECHANGE(context, vtx); @@ -229,6 +235,7 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) = (struct r700_vertex_program *)ctx->VertexProgram._Current; unsigned int i, j = 0; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); if (context->radeon.tcl.aos_count == 0) return; @@ -264,6 +271,7 @@ static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *a R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_renderbuffer *rrb; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); rrb = radeon_get_depthbuffer(&context->radeon); if (!rrb || !rrb->bo) { @@ -303,6 +311,7 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * struct radeon_renderbuffer *rrb; BATCH_LOCALS(&context->radeon); int id = 0; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); rrb = radeon_get_colorbuffer(&context->radeon); if (!rrb || !rrb->bo) { @@ -352,6 +361,7 @@ static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_bo * pbo; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); pbo = (struct radeon_bo *)r700GetActiveFpShaderBo(GL_CONTEXT(context)); @@ -385,6 +395,7 @@ static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_bo * pbo; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context)); @@ -416,6 +427,7 @@ static void r700SendFSState(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); struct radeon_bo * pbo; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); /* XXX fixme * R6xx chips require a FS be emitted, even if it's not used. @@ -457,6 +469,7 @@ static void r700SendViewportState(GLcontext *ctx, struct radeon_state_atom *atom R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); int id = 0; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); if (id > R700_MAX_VIEWPORTS) return; @@ -489,6 +502,7 @@ static void r700SendSQConfig(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(34); R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6); @@ -526,6 +540,7 @@ static void r700SendUCPState(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); int i; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); for (i = 0; i < R700_MAX_UCP; i++) { if (r700->ucp[i].enabled) { @@ -547,6 +562,7 @@ static void r700SendSPIState(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); unsigned int ui; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(59 + R700_MAX_SHADER_EXPORTS); @@ -620,6 +636,7 @@ static void r700SendVGTState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(41); @@ -665,6 +682,7 @@ static void r700SendSXState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(9); R600_OUT_BATCH_REGVAL(SX_MISC, r700->SX_MISC.u32All); @@ -679,6 +697,7 @@ static void r700SendDBState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(23); R600_OUT_BATCH_REGVAL(DB_HTILE_DATA_BASE, r700->DB_HTILE_DATA_BASE.u32All); @@ -720,6 +739,7 @@ static void r700SendCBState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { BEGIN_BATCH_NO_AUTOSTATE(11); @@ -766,6 +786,7 @@ static void r700SendCBBlendState(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); unsigned int ui; + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) { BEGIN_BATCH_NO_AUTOSTATE(3); @@ -796,6 +817,7 @@ static void r700SendCBBlendColorState(GLcontext *ctx, struct radeon_state_atom * context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(6); R600_OUT_BATCH_REGSEQ(CB_BLEND_RED, 4); @@ -850,6 +872,7 @@ static void r700SendCLState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(12); R600_OUT_BATCH_REGVAL(PA_CL_CLIP_CNTL, r700->PA_CL_CLIP_CNTL.u32All); @@ -881,6 +904,7 @@ static void r700SendScissorState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(22); R600_OUT_BATCH_REGSEQ(PA_SC_SCREEN_SCISSOR_TL, 2); @@ -913,6 +937,7 @@ static void r700SendSCState(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); BEGIN_BATCH_NO_AUTOSTATE(15); R600_OUT_BATCH_REGVAL(R7xx_PA_SC_EDGERULE, r700->PA_SC_EDGERULE.u32All); @@ -969,6 +994,7 @@ static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom) R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context); int i; BATCH_LOCALS(&context->radeon); + radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__); if (r700->vs.num_consts == 0) return; @@ -999,6 +1025,7 @@ static int check_cb(GLcontext *ctx, struct radeon_state_atom *atom) if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770) count += 11; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); return count; } @@ -1019,6 +1046,7 @@ static int check_blnd(GLcontext *ctx, struct radeon_state_atom *atom) count += 3; } } + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); return count; } @@ -1034,6 +1062,7 @@ static int check_ucp(GLcontext *ctx, struct radeon_state_atom *atom) if (r700->ucp[i].enabled) count += 6; } + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); return count; } @@ -1045,6 +1074,7 @@ static int check_vtx(GLcontext *ctx, struct radeon_state_atom *atom) if (count) count += 6; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); return count; } @@ -1059,6 +1089,7 @@ static int check_tx(GLcontext *ctx, struct radeon_state_atom *atom) if (t) count++; } + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); return count * 31; } @@ -1070,6 +1101,7 @@ static int check_ps_consts(GLcontext *ctx, struct radeon_state_atom *atom) if (count) count += 2; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); return count; } @@ -1082,6 +1114,7 @@ static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom) if (count) count += 2; + radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); return count; } @@ -1101,6 +1134,7 @@ do { \ void r600InitAtoms(context_t *context) { + radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context); context->radeon.hw.max_state_size = 10 + 5 + 14; /* start 3d, idle, cb/db flush */ /* Setup the atom linked list */ diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c index 6d4ea90ccc..c6546ab00c 100644 --- a/src/mesa/drivers/dri/r600/r700_clear.c +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -55,6 +55,8 @@ void r700Clear(GLcontext * ctx, GLbitfield mask) int i; struct gl_framebuffer *fb = ctx->DrawBuffer; + radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s %x\n", __func__, mask); + if( GL_TRUE == r700ClearFast(context, mask) ) { return; @@ -106,8 +108,7 @@ void r700Clear(GLcontext * ctx, GLbitfield mask) } if (swrast_mask) { - if (RADEON_DEBUG & DEBUG_FALLBACKS) - fprintf(stderr, "%s: swrast clear, mask: %x\n", + radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, "%s: swrast clear, mask: %x\n", __FUNCTION__, swrast_mask); _swrast_Clear(ctx, swrast_mask); } diff --git a/src/mesa/drivers/dri/r600/r700_debug.c b/src/mesa/drivers/dri/r600/r700_debug.c index dabd0d2c25..cd1ba9eca3 100644 --- a/src/mesa/drivers/dri/r600/r700_debug.c +++ b/src/mesa/drivers/dri/r600/r700_debug.c @@ -25,31 +25,8 @@ * CooperYuan , */ -#include -#include -#include -#include - -#include "main/glheader.h" - #include "r700_debug.h" -#include "r600_context.h" - -void NormalizeLogErrorCode(int nError) -{ - //TODO -} - -void r700_error(int nLocalError, char* fmt, ...) -{ - va_list args; - - NormalizeLogErrorCode(nLocalError); - - va_start(args, fmt); - fprintf(stderr, fmt, args); - va_end(args); -} +#include "radeon_debug.h" void DumpHwBinary(int type, void *addr, int size) { @@ -61,21 +38,21 @@ void DumpHwBinary(int type, void *addr, int size) switch (type) { case DUMP_PIXEL_SHADER: - DEBUGF("Pixel Shader\n"); + radeon_print(RADEON_SHADER, RADEON_TRACE, "Pixel Shader\n"); break; case DUMP_VERTEX_SHADER: - DEBUGF("Vertex Shader\n"); + radeon_print(RADEON_SHADER, RADEON_TRACE, "Vertex Shader\n"); break; case DUMP_FETCH_SHADER: - DEBUGF("Fetch Shader\n"); + radeon_print(RADEON_SHADER, RADEON_TRACE, "Fetch Shader\n"); break; } for (i = 0; i < size; i++) { - DEBUGP("0x%08x,\t", *pHw); + radeon_print(RADEON_SHADER, RADEON_TRACE, "0x%08x,\t", *pHw); if (i%4 == 3) - DEBUGP("0x%08x\n", *pHw); + radeon_print(RADEON_SHADER, RADEON_TRACE, "0x%08x\n", *pHw); pHw++; } diff --git a/src/mesa/drivers/dri/r600/r700_debug.h b/src/mesa/drivers/dri/r600/r700_debug.h index e810e6da08..c0921bf610 100644 --- a/src/mesa/drivers/dri/r600/r700_debug.h +++ b/src/mesa/drivers/dri/r600/r700_debug.h @@ -27,67 +27,13 @@ #ifndef _R700_DEBUG_H_ #define _R700_DEBUG_H_ - -enum R700_ERROR -{ - ERROR_ASM_VTX_CLAUSE = 0x1000, - ERROR_ASM_UNKOWNCLAUSE = 0x1001, - ERROR_ASM_ALLOCEXPORTCF = 0x1002, - ERROR_ASM_ALLOCVTXCF = 0x1003, - ERROR_ASM_ALLOCTEXCF = 0x1004, - ERROR_ASM_ALLOCALUCF = 0x1005, - ERROR_ASM_UNKNOWNILINST = 0x1006, - ERROR_ASM_SRCARGUMENT = 0x1007, - ERROR_ASM_DSTARGUMENT = 0x1008, - ERROR_ASM_TEXINSTRUCTION = 0x1009, - ERROR_ASM_ALUINSTRUCTION = 0x100A, - ERROR_ASM_INSTDSTTRACK = 0x100B, - ERROR_ASM_TEXDSTBADTYPE = 0x100C, - ERROR_ASM_ALUSRCBADTYPE = 0x100D, - ERROR_ASM_ALUSRCSELECT = 0x100E, - ERROR_ASM_ALUSRCNUMBER = 0x100F, - ERROR_ASM_ALUDSTBADTYPE = 0x1010, - ERROR_ASM_CONSTCHANNEL = 0x1011, - ERROR_ASM_BADSCALARBZ = 0x1012, - ERROR_ASM_BADGPRRESERVE = 0x1013, - ERROR_ASM_BADVECTORBZ = 0x1014, - ERROR_ASM_BADTEXINST = 0x1015, - ERROR_ASM_BADTEXSRC = 0x1016, - ERROR_ASM_BADEXPORTTYPE = 0x1017, - - - TODO_ASM_CONSTTEXADDR = 0x8000, - TODO_ASM_NEEDIMPINST = 0x8001, - TODO_ASM_TXB = 0x8002, - TODO_ASM_TXP = 0x8003 -}; - enum R700_DUMP_TYPE { - DUMP_VERTEX_SHADER = 0x1, - DUMP_PIXEL_SHADER = 0x2, - DUMP_FETCH_SHADER = 0x4, + DUMP_VERTEX_SHADER = 0x1, + DUMP_PIXEL_SHADER = 0x2, + DUMP_FETCH_SHADER = 0x4, }; -#define DEBUGF printf -#define DEBUGP printf - -void NormalizeLogErrorCode(int nError); -/*NormalizeLogErrorCode(nLocalError); */ -void r700_error(int nLocalError, char *fmt, ...); extern void DumpHwBinary(int, void *, int); -#ifdef STANDALONE_COMPILER -#ifdef __cplusplus -extern "C" -{ -#endif //__cplusplus - -void LogString(char* szStr); - -#ifdef __cplusplus -} -#endif //__cplusplus -#endif /*STANDALONE_COMPILER*/ - #endif /*_R700_DEBUG_H_*/ diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c index c49b90c1cc..3c8c1fd7a3 100644 --- a/src/mesa/drivers/dri/r600/r700_oglprog.c +++ b/src/mesa/drivers/dri/r600/r700_oglprog.c @@ -49,6 +49,9 @@ static struct gl_program *r700NewProgram(GLcontext * ctx, struct r700_vertex_program *vp; struct r700_fragment_program *fp; + radeon_print(RADEON_SHADER, RADEON_VERBOSE, + "%s %u, %u\n", __func__, target, id); + switch (target) { case GL_VERTEX_STATE_PROGRAM_NV: @@ -89,6 +92,9 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog) struct r700_vertex_program * vp; struct r700_fragment_program * fp; + radeon_print(RADEON_SHADER, RADEON_VERBOSE, + "%s %p\n", __func__, prog); + switch (prog->Target) { case GL_VERTEX_STATE_PROGRAM_NV: diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 0b3ceb01ba..4753c757a1 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -327,8 +327,8 @@ static GLuint r700PredictRenderSize(GLcontext* ctx) else dwords += state_size; - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s: total prediction size is %d.\n", __FUNCTION__, dwords); + radeon_print(RADEON_RENDER, RADEON_VERBOSE, + "%s: total prediction size is %d.\n", __FUNCTION__, dwords); return dwords; } @@ -342,8 +342,7 @@ static GLboolean r700RunRender(GLcontext * ctx, struct vertex_buffer *vb = &tnl->vb; struct radeon_renderbuffer *rrb; - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s: cs begin at %d\n", + radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: cs begin at %d\n", __func__, context->radeon.cmdbuf.cs->cdw); /* always emit CB base to prevent @@ -388,8 +387,7 @@ static GLboolean r700RunRender(GLcontext * ctx, radeonReleaseArrays(ctx, ~0); - if (RADEON_DEBUG & DEBUG_PRIMS) - fprintf(stderr, "%s: cs end at %d\n", + radeon_print(RADEON_RENDER, RADEON_TRACE, "%s: cs end at %d\n", __func__, context->radeon.cmdbuf.cs->cdw); if ( emit_end < context->radeon.cmdbuf.cs->cdw ) diff --git a/src/mesa/drivers/dri/r600/r700_shaderinst.c b/src/mesa/drivers/dri/r600/r700_shaderinst.c index c1bffee91f..f120d9f941 100644 --- a/src/mesa/drivers/dri/r600/r700_shaderinst.c +++ b/src/mesa/drivers/dri/r600/r700_shaderinst.c @@ -27,7 +27,7 @@ #include "main/mtypes.h" -#include "r700_debug.h" +#include "radeon_debug.h" #include "r700_shaderinst.h" void Init_R700ControlFlowGenericClause(R700ControlFlowGenericClause* pInst) @@ -208,16 +208,16 @@ unsigned int GetCFMaxInstructions(ShaderInstType instType) GLboolean LinkVertexInstruction(R700ControlFlowGenericClause *pCFGeneric, R700VertexInstruction *pVTXInstruction) { - if (pCFGeneric->m_pLinkedTEXInstruction != 0) - { - r700_error(ERROR_ASM_VTX_CLAUSE, "This instruction is already linked to a texture instruction"); - return GL_FALSE; + if (pCFGeneric->m_pLinkedTEXInstruction != 0) + { + radeon_error("This instruction is already linked to a texture instruction.\n"); + return GL_FALSE; } pCFGeneric->m_pLinkedVTXInstruction = pVTXInstruction; - pVTXInstruction->m_pLinkedGenericClause = pCFGeneric; + pVTXInstruction->m_pLinkedGenericClause = pCFGeneric; - return GL_TRUE; + return GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 86a67ab766..d107f99e7b 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -38,6 +38,7 @@ #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" +#include "radeon_debug.h" #include "r600_context.h" #include "r600_cmdbuf.h" @@ -174,7 +175,7 @@ void Map_Vertex_Program(struct r700_vertex_program *vp, // Create VFETCH instructions for inputs if (GL_TRUE != Process_Vertex_Program_Vfetch_Instructions(vp, mesa_vp) ) { - r700_error(ERROR_ASM_VTX_CLAUSE, "Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); + radeon_error("Calling Process_Vertex_Program_Vfetch_Instructions return error. \n"); return; //error } -- cgit v1.2.3 From 4099bb76148007f9ccb6c86838b2bf37ea42de56 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 11 Sep 2009 11:07:58 -0400 Subject: r600: support tex coords from constants Fixes neverball among other things. --- src/mesa/drivers/dri/r600/r700_assembler.c | 62 ++++++++---------------------- 1 file changed, 17 insertions(+), 45 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 2d8480daaf..19ba87001f 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1149,17 +1149,19 @@ GLboolean tex_dst(r700_AssemblerBase *pAsm) GLboolean tex_src(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - - GLboolean bValidTexCoord = GL_FALSE; switch (pILInst->SrcReg[0].File) { case PROGRAM_TEMPORARY: - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + pAsm->starting_temp_register_number; pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - + break; + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index; + pAsm->S[0].src.rtype = SRC_REG_CONSTANT; break; case PROGRAM_INPUT: switch (pILInst->SrcReg[0].Index) @@ -1174,23 +1176,13 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) case FRAG_ATTRIB_TEX5: case FRAG_ATTRIB_TEX6: case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; - pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; pAsm->S[0].src.rtype = SRC_REG_INPUT; } break; } - if(GL_TRUE == bValidTexCoord) - { - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); - } - else - { - radeon_error("Invalid source texcoord for TEX instruction\n"); - return GL_FALSE; - } + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7; pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7; @@ -1201,7 +1193,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1; pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1; pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1; - + return GL_TRUE; } @@ -3362,39 +3354,19 @@ GLboolean assemble_STP(r700_AssemblerBase *pAsm) { return GL_TRUE; } - -GLboolean assemble_TEX(r700_AssemblerBase *pAsm) -{ - GLboolean src_const; - - switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) - { - case PROGRAM_CONSTANT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_STATE_VAR: - src_const = GL_TRUE; - case PROGRAM_TEMPORARY: - case PROGRAM_INPUT: - src_const = GL_FALSE; - } - if (GL_TRUE == src_const) - { - radeon_error("TODO: Texture coordinates from a constant register not supported.\n"); - return GL_FALSE; - } - - switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) +GLboolean assemble_TEX(r700_AssemblerBase *pAsm) +{ + switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) { case OPCODE_TEX: - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; - case OPCODE_TXB: + case OPCODE_TXB: radeon_error("do not support TXB yet\n"); return GL_FALSE; break; - case OPCODE_TXP: + case OPCODE_TXP: /* TODO : tex proj version : divid first 3 components by 4th */ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; @@ -3418,13 +3390,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { return GL_FALSE; } - + if( GL_FALSE == tex_src(pAsm) ) { return GL_FALSE; } - if ( GL_FALSE == next_ins(pAsm) ) + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } -- cgit v1.2.3 From 08b7d32140a09a35bdfe93327dd7ee2333315bc1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 11 Sep 2009 12:10:15 -0400 Subject: Revert "r600: support tex coords from constants" This reverts commit 4099bb76148007f9ccb6c86838b2bf37ea42de56. Tex coord src has to be a GPR. --- src/mesa/drivers/dri/r600/r700_assembler.c | 62 ++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 17 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 19ba87001f..2d8480daaf 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1149,19 +1149,17 @@ GLboolean tex_dst(r700_AssemblerBase *pAsm) GLboolean tex_src(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); + + GLboolean bValidTexCoord = GL_FALSE; switch (pILInst->SrcReg[0].File) { case PROGRAM_TEMPORARY: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + pAsm->starting_temp_register_number; pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - break; - case PROGRAM_CONSTANT: - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_STATE_VAR: - pAsm->S[0].src.reg = pILInst->SrcReg[0].Index; - pAsm->S[0].src.rtype = SRC_REG_CONSTANT; + break; case PROGRAM_INPUT: switch (pILInst->SrcReg[0].Index) @@ -1176,13 +1174,23 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) case FRAG_ATTRIB_TEX5: case FRAG_ATTRIB_TEX6: case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; pAsm->S[0].src.rtype = SRC_REG_INPUT; } break; } - setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + if(GL_TRUE == bValidTexCoord) + { + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + } + else + { + radeon_error("Invalid source texcoord for TEX instruction\n"); + return GL_FALSE; + } pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7; pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7; @@ -1193,7 +1201,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1; pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1; pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1; - + return GL_TRUE; } @@ -3354,19 +3362,39 @@ GLboolean assemble_STP(r700_AssemblerBase *pAsm) { return GL_TRUE; } - -GLboolean assemble_TEX(r700_AssemblerBase *pAsm) + +GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { - switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) + GLboolean src_const; + + switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File) + { + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + src_const = GL_TRUE; + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + src_const = GL_FALSE; + } + + if (GL_TRUE == src_const) + { + radeon_error("TODO: Texture coordinates from a constant register not supported.\n"); + return GL_FALSE; + } + + switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) { case OPCODE_TEX: - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; - case OPCODE_TXB: + case OPCODE_TXB: radeon_error("do not support TXB yet\n"); return GL_FALSE; break; - case OPCODE_TXP: + case OPCODE_TXP: /* TODO : tex proj version : divid first 3 components by 4th */ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; @@ -3390,13 +3418,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { return GL_FALSE; } - + if( GL_FALSE == tex_src(pAsm) ) { return GL_FALSE; } - if ( GL_FALSE == next_ins(pAsm) ) + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } -- cgit v1.2.3 From a79eecb9139169fa8c99c0f9cf26db95f3983a36 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Fri, 11 Sep 2009 15:59:55 -0400 Subject: r600: fix texcoords from constants with some minor updates from Richard. --- src/mesa/drivers/dri/r600/r700_assembler.c | 92 +++++++++++++++++------------- 1 file changed, 52 insertions(+), 40 deletions(-) (limited to 'src/mesa/drivers/dri/r600/r700_assembler.c') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 2d8480daaf..fda6692725 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -1149,41 +1149,49 @@ GLboolean tex_dst(r700_AssemblerBase *pAsm) GLboolean tex_src(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - + GLboolean bValidTexCoord = GL_FALSE; - switch (pILInst->SrcReg[0].File) - { + switch (pILInst->SrcReg[0].File) { + case PROGRAM_CONSTANT: + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + bValidTexCoord = GL_TRUE; + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = pAsm->aArgSubst[1]; + break; case PROGRAM_TEMPORARY: - bValidTexCoord = GL_TRUE; - - pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + pAsm->starting_temp_register_number; - pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; - - break; + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + + pAsm->starting_temp_register_number; + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + break; case PROGRAM_INPUT: - switch (pILInst->SrcReg[0].Index) - { - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: - case FRAG_ATTRIB_TEX0: - case FRAG_ATTRIB_TEX1: - case FRAG_ATTRIB_TEX2: - case FRAG_ATTRIB_TEX3: - case FRAG_ATTRIB_TEX4: - case FRAG_ATTRIB_TEX5: - case FRAG_ATTRIB_TEX6: - case FRAG_ATTRIB_TEX7: - bValidTexCoord = GL_TRUE; - - pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; - pAsm->S[0].src.rtype = SRC_REG_INPUT; - } - break; + switch (pILInst->SrcReg[0].Index) + { + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + bValidTexCoord = GL_TRUE; + pAsm->S[0].src.reg = + pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index]; + pAsm->S[0].src.rtype = SRC_REG_INPUT; + break; + } + break; } if(GL_TRUE == bValidTexCoord) - { + { setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); } else @@ -1201,7 +1209,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm) pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1; pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1; pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1; - + return GL_TRUE; } @@ -2202,7 +2210,9 @@ GLboolean next_ins(r700_AssemblerBase *pAsm) { struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]); - if( GL_TRUE == IsTex(pILInst->Opcode) ) + if( GL_TRUE == IsTex(pILInst->Opcode) && + /* handle const moves to temp register */ + !(pAsm->D.dst.opcode == SQ_OP2_INST_MOV) ) { if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) { if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) ) @@ -3374,28 +3384,30 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) case PROGRAM_ENV_PARAM: case PROGRAM_STATE_VAR: src_const = GL_TRUE; + break; case PROGRAM_TEMPORARY: case PROGRAM_INPUT: src_const = GL_FALSE; + break; } - if (GL_TRUE == src_const) + if (GL_TRUE == src_const) { - radeon_error("TODO: Texture coordinates from a constant register not supported.\n"); - return GL_FALSE; + if ( GL_FALSE == mov_temp(pAsm, 0) ) + return GL_FALSE; } - switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) + switch (pAsm->pILInst[pAsm->uiCurInst].Opcode) { case OPCODE_TEX: - pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; + pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; - case OPCODE_TXB: + case OPCODE_TXB: radeon_error("do not support TXB yet\n"); return GL_FALSE; break; - case OPCODE_TXP: - /* TODO : tex proj version : divid first 3 components by 4th */ + case OPCODE_TXP: + /* TODO : tex proj version : divid first 3 components by 4th */ pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE; break; default: @@ -3418,13 +3430,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm) { return GL_FALSE; } - + if( GL_FALSE == tex_src(pAsm) ) { return GL_FALSE; } - if ( GL_FALSE == next_ins(pAsm) ) + if ( GL_FALSE == next_ins(pAsm) ) { return GL_FALSE; } -- cgit v1.2.3