diff options
Diffstat (limited to 'src/mesa')
109 files changed, 1262 insertions, 417 deletions
diff --git a/src/mesa/SConscript b/src/mesa/SConscript index 5200a244f2..5c02abc914 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -82,6 +82,7 @@ if env['platform'] != 'winddk': 'main/pixelstore.c', 'main/points.c', 'main/polygon.c', + 'main/querymatrix.c', 'main/queryobj.c', 'main/rastpos.c', 'main/readpix.c', diff --git a/src/mesa/drivers/dri/i810/i810render.c b/src/mesa/drivers/dri/i810/i810render.c index b543d4f012..205f0cebc1 100644 --- a/src/mesa/drivers/dri/i810/i810render.c +++ b/src/mesa/drivers/dri/i810/i810render.c @@ -37,6 +37,8 @@ #include "main/imports.h" #include "main/mtypes.h" +#include "math/m_xform.h" + #include "tnl/t_context.h" #include "i810screen.h" diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c index ec209391ab..add0adacb5 100644 --- a/src/mesa/drivers/dri/i915/intel_render.c +++ b/src/mesa/drivers/dri/i915/intel_render.c @@ -37,6 +37,8 @@ #include "main/mtypes.h" #include "main/enums.h" +#include "math/m_xform.h" + #include "tnl/t_context.h" #include "tnl/t_vertex.h" #include "tnl/t_pipeline.h" diff --git a/src/mesa/drivers/dri/mga/mgarender.c b/src/mesa/drivers/dri/mga/mgarender.c index 8b8fc485d3..cc0cea618d 100644 --- a/src/mesa/drivers/dri/mga/mgarender.c +++ b/src/mesa/drivers/dri/mga/mgarender.c @@ -44,6 +44,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/imports.h" #include "main/mtypes.h" +#include "math/m_xform.h" + #include "tnl/t_context.h" #include "mgacontext.h" diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c index 4ec864c181..6452fe218e 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c @@ -138,5 +138,7 @@ nouveau_driver_functions_init(struct dd_function_table *functions) functions->DrawPixels = _mesa_meta_DrawPixels; functions->CopyPixels = _mesa_meta_CopyPixels; functions->Bitmap = _mesa_meta_Bitmap; +#if FEATURE_EXT_framebuffer_blit functions->BlitFramebuffer = _mesa_meta_BlitFramebuffer; +#endif } diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c index bd1273beea..32d8f2d0f9 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c @@ -262,10 +262,12 @@ nouveau_finish_render_texture(GLcontext *ctx, void nouveau_fbo_functions_init(struct dd_function_table *functions) { +#if FEATURE_EXT_framebuffer_object functions->NewFramebuffer = nouveau_framebuffer_new; functions->NewRenderbuffer = nouveau_renderbuffer_new; functions->BindFramebuffer = nouveau_bind_framebuffer; functions->FramebufferRenderbuffer = nouveau_framebuffer_renderbuffer; functions->RenderTexture = nouveau_render_texture; functions->FinishRenderTexture = nouveau_finish_render_texture; +#endif } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index c6246a81a2..d2fa816894 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -113,7 +113,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after unroll loops"); } else{ - rc_transform_loops(&c->Base, &loop_state); + rc_transform_loops(&c->Base, &loop_state, -1); debug_program_log(c, "after transform loops"); rc_emulate_branches(&c->Base); diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index e940fedec2..666c9c2a7a 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -32,6 +32,11 @@ #include "radeon_emulate_branches.h" #include "radeon_emulate_loops.h" +struct loop { + int BgnLoop; + +}; + /* * Take an already-setup and valid source then swizzle it appropriately to * obtain a constant ZERO or ONE source. @@ -332,11 +337,140 @@ static void ei_pow(struct r300_vertex_program_code *vp, inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]); } +static void mark_write(void * userdata, struct rc_instruction * inst, + rc_register_file file, unsigned int index, unsigned int mask) +{ + unsigned int * writemasks = userdata; + + if (file != RC_FILE_TEMPORARY) + return; + + if (index >= R300_VS_MAX_TEMPS) + return; + + writemasks[index] |= mask; +} + +static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler) +{ + return PVS_SRC_OPERAND(compiler->PredicateIndex, + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_ZERO), + t_swizzle(RC_SWIZZLE_W), + t_src_class(RC_FILE_TEMPORARY), + 0); +} + +static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler, + unsigned int hw_opcode, int is_math) +{ + return PVS_OP_DST_OPERAND(hw_opcode, + is_math, + 0, + compiler->PredicateIndex, + RC_MASK_W, + t_dst_class(RC_FILE_TEMPORARY)); + +} + +static void ei_if(struct r300_vertex_program_compiler * compiler, + struct rc_instruction *rci, + unsigned int * inst, + unsigned int branch_depth) +{ + unsigned int predicate_opcode; + int is_math = 0; + + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode IF not supported\n"); + return; + } + + /* Reserve a temporary to use as our predicate stack counter, if we + * don't already have one. */ + if (!compiler->PredicateMask) { + unsigned int writemasks[R300_VS_MAX_TEMPS]; + memset(writemasks, 0, sizeof(writemasks)); + struct rc_instruction * inst; + unsigned int i; + for(inst = compiler->Base.Program.Instructions.Next; + inst != &compiler->Base.Program.Instructions; + inst = inst->Next) { + rc_for_all_writes_mask(inst, mark_write, writemasks); + } + for(i = 0; i < R300_VS_MAX_TEMPS; i++) { + unsigned int mask = ~writemasks[i] & RC_MASK_XYZW; + /* Only the W component can be used fo the predicate + * stack counter. */ + if (mask & RC_MASK_W) { + compiler->PredicateMask = RC_MASK_W; + compiler->PredicateIndex = i; + break; + } + } + if (i == R300_VS_MAX_TEMPS) { + rc_error(&compiler->Base, "No free temporary to use for" + " predicate stack counter.\n"); + return; + } + } + predicate_opcode = + branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ; + + rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0)); + if (branch_depth == 0) { + is_math = 1; + predicate_opcode = ME_PRED_SET_NEQ; + inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]); + inst[2] = 0; + } else { + predicate_opcode = VE_PRED_SET_NEQ_PUSH; + inst[1] = t_pred_src(compiler); + inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]); + } + + inst[0] = t_pred_dst(compiler, predicate_opcode, is_math); + inst[3] = 0; + +} + +static void ei_else(struct r300_vertex_program_compiler * compiler, + unsigned int * inst) +{ + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode ELSE not supported\n"); + return; + } + inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1); + inst[1] = t_pred_src(compiler); + inst[2] = 0; + inst[3] = 0; +} + +static void ei_endif(struct r300_vertex_program_compiler *compiler, + unsigned int * inst) +{ + if (!compiler->Base.is_r500) { + rc_error(&compiler->Base,"Opcode ENDIF not supported\n"); + return; + } + inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1); + inst[1] = t_pred_src(compiler); + inst[2] = 0; + inst[3] = 0; +} static void translate_vertex_program(struct r300_vertex_program_compiler * compiler) { struct rc_instruction *rci; + struct loop * loops; + int current_loop_depth = 0; + int loops_reserved = 0; + + unsigned int branch_depth = 0; + compiler->code->pos_end = 0; /* Not supported yet */ compiler->code->length = 0; @@ -366,9 +500,12 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break; case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break; case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break; + case RC_OPCODE_ELSE: ei_else(compiler, inst); break; + case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break; case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break; case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break; case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; + case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break; case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break; case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break; case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break; @@ -385,11 +522,86 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break; case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break; case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break; + case RC_OPCODE_BGNLOOP: + { + struct loop * l; + + if ((!compiler->Base.is_r500 + && loops_reserved >= R300_VS_MAX_LOOP_DEPTH) + || loops_reserved >= R500_VS_MAX_FC_DEPTH) { + rc_error(&compiler->Base, + "Loops are nested too deep."); + return; + } + memory_pool_array_reserve(&compiler->Base.Pool, + struct loop, loops, current_loop_depth, + loops_reserved, 1); + l = &loops[current_loop_depth++]; + memset(l , 0, sizeof(struct loop)); + l->BgnLoop = (compiler->code->length / 4); + continue; + } + case RC_OPCODE_ENDLOOP: + { + struct loop * l = &loops[current_loop_depth - 1]; + unsigned int act_addr = l->BgnLoop - 1; + unsigned int last_addr = (compiler->code->length / 4) - 1; + unsigned int ret_addr = l->BgnLoop; + + if (loops_reserved >= R300_VS_MAX_FC_OPS) { + rc_error(&compiler->Base, + "Too many flow control instructions."); + return; + } + if (compiler->Base.is_r500) { + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].lw = + R500_PVS_FC_ACT_ADRS(act_addr) + | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff) + ; + compiler->code->fc_op_addrs.r500 + [compiler->code->num_fc_ops].uw = + R500_PVS_FC_LAST_INST(last_addr) + | R500_PVS_FC_RTN_INST(ret_addr) + ; + } else { + compiler->code->fc_op_addrs.r300 + [compiler->code->num_fc_ops] = + R300_PVS_FC_ACT_ADRS(act_addr) + | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff) + | R300_PVS_FC_LAST_INST(last_addr) + | R300_PVS_FC_RTN_INST(ret_addr) + ; + } + compiler->code->fc_loop_index[compiler->code->num_fc_ops] = + R300_PVS_FC_LOOP_INIT_VAL(0x0) + | R300_PVS_FC_LOOP_STEP_VAL(0x1) + ; + compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP( + compiler->code->num_fc_ops); + compiler->code->num_fc_ops++; + current_loop_depth--; + continue; + } + default: rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name); return; } + /* Non-flow control instructions that are inside an if statement + * need to pay attention to the predicate bit. */ + if (branch_depth + && vpi->Opcode != RC_OPCODE_IF + && vpi->Opcode != RC_OPCODE_ELSE + && vpi->Opcode != RC_OPCODE_ENDIF) { + + inst[0] |= (PVS_DST_PRED_ENABLE_MASK + << PVS_DST_PRED_ENABLE_SHIFT); + inst[0] |= (PVS_DST_PRED_SENSE_MASK + << PVS_DST_PRED_SENSE_SHIFT); + } + compiler->code->length += 4; if (compiler->Base.Error) @@ -406,6 +618,7 @@ struct temporary_allocation { static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler) { struct rc_instruction *inst; + struct rc_instruction *end_loop = NULL; unsigned int num_orig_temps = 0; char hwtemps[R300_VS_MAX_TEMPS]; struct temporary_allocation * ta; @@ -440,10 +653,35 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + /* Instructions inside of loops need to use the ENDLOOP + * instruction as their LastRead. */ + if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + int endloops = 1; + struct rc_instruction * ptr; + for(ptr = inst->Next; + ptr != &compiler->Base.Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + endloops++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + endloops--; + if (endloops <= 0) { + end_loop = ptr; + break; + } + } + } + } + + if (inst == end_loop) { + end_loop = NULL; + continue; + } for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) - ta[inst->U.I.SrcReg[i].Index].LastRead = inst; + ta[inst->U.I.SrcReg[i].Index].LastRead = + end_loop ? end_loop : inst; } } @@ -640,22 +878,17 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) debug_program_log(compiler, "before compilation"); - /* XXX Ideally this should be done only for r3xx, but since - * we don't have branching support for r5xx, we use the emulation - * on all chipsets. */ + if (compiler->Base.is_r500) + rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU); + else + rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU); - if (compiler->Base.is_r500){ - rc_transform_loops(&compiler->Base, &loop_state); - rc_emulate_loops(&loop_state, R500_VS_MAX_ALU); - } else { - rc_transform_loops(&compiler->Base, &loop_state); - rc_emulate_loops(&loop_state, R300_VS_MAX_ALU); - } debug_program_log(compiler, "after emulate loops"); - rc_emulate_branches(&compiler->Base); - - debug_program_log(compiler, "after emulate branches"); + if (!compiler->Base.is_r500) { + rc_emulate_branches(&compiler->Base); + debug_program_log(compiler, "after emulate branches"); + } if (compiler->Base.is_r500) { struct radeon_program_transformation transformations[] = { @@ -717,6 +950,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) if (compiler->Base.Debug) { fprintf(stderr, "Final vertex program code:\n"); - r300_vertex_program_dump(compiler->code); + r300_vertex_program_dump(compiler); } } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c index 5800f1a78e..e6009338e2 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c @@ -20,7 +20,9 @@ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include "radeon_compiler.h" #include "radeon_code.h" +#include "../r300_reg.h" #include <stdio.h> @@ -133,6 +135,10 @@ static void r300_vs_op_dump(uint32_t op) { fprintf(stderr, " dst: %d%s op: ", (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]); + if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) { + fprintf(stderr, "PRED %u", + (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1); + } if (op & 0x80) { if (op & 0x1) { fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n"); @@ -160,8 +166,9 @@ static void r300_vs_src_dump(uint32_t src) r300_vs_swiz_debug[(src >> 22) & 0x7]); } -void r300_vertex_program_dump(struct r300_vertex_program_code * vs) +void r300_vertex_program_dump(struct r300_vertex_program_compiler * c) { + struct r300_vertex_program_code * vs = c->code; unsigned instrcount = vs->length / 4; unsigned i; @@ -177,4 +184,21 @@ void r300_vertex_program_dump(struct r300_vertex_program_code * vs) r300_vs_src_dump(vs->body.d[offset+1+src]); } } + + fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops); + for(i = 0; i < vs->num_fc_ops; i++) { + switch((vs->fc_ops >> (i * 2)) & 0x3 ) { + case 0: fprintf(stderr, "NOP"); break; + case 1: fprintf(stderr, "JUMP"); break; + case 2: fprintf(stderr, "LOOP"); break; + case 3: fprintf(stderr, "JSR"); break; + } + if (c->Base.is_r500) { + fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n", + vs->fc_op_addrs.r500[i].uw, + vs->fc_op_addrs.r500[i].lw); + } else { + fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]); + } + } } diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index c3f817ad4e..9b60e30f58 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -70,6 +70,10 @@ struct loop_info { int * Brks; int BrkCount; int BrkReserved; + + int * Conts; + int ContCount; + int ContReserved; }; struct emit_state { @@ -413,20 +417,22 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst ; break; - case RC_OPCODE_CONTINUE: + case RC_OPCODE_CONT: loop = &s->Loops[s->CurrentLoopDepth - 1]; - s->Code->inst[newip].inst2 = R500_FC_OP_JUMP + memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, + loop->ContCount, loop->ContReserved, 1); + loop->Conts[loop->ContCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE | R500_FC_JUMP_FUNC(0xff) | R500_FC_B_OP1_DECR | R500_FC_B_POP_CNT( s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED ; - s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->BgnLoop); break; case RC_OPCODE_ENDLOOP: { - unsigned int i; loop = &s->Loops[s->CurrentLoopDepth - 1]; /* Emit ENDLOOP */ s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP @@ -449,6 +455,12 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = R500_FC_JUMP_ADDR(newip + 1); } + + /* Set jump address for CONT instructions. */ + while(loop->ContCount--) { + s->Code->inst[loop->Conts[loop->ContCount]].inst3 = + R500_FC_JUMP_ADDR(newip); + } s->CurrentLoopDepth--; break; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index e14a3520dd..896246d203 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -243,6 +243,12 @@ struct rX00_fragment_program_code { #define R500_VS_MAX_ALU 1024 #define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4) #define R300_VS_MAX_TEMPS 32 +/* This is the max for all chipsets (r300-r500) */ +#define R300_VS_MAX_FC_OPS 16 +/* The r500 maximum depth is not just for loops, but any combination of loops + * and subroutine jumps. */ +#define R500_VS_MAX_FC_DEPTH 8 +#define R300_VS_MAX_LOOP_DEPTH 1 #define VSF_MAX_INPUTS 32 #define VSF_MAX_OUTPUTS 32 @@ -263,9 +269,18 @@ struct r300_vertex_program_code { uint32_t InputsRead; uint32_t OutputsWritten; -}; -void r300_vertex_program_dump(struct r300_vertex_program_code * vs); + unsigned int num_fc_ops; + uint32_t fc_ops; + union { + uint32_t r300[R300_VS_MAX_FC_OPS]; + struct { + uint32_t lw; + uint32_t uw; + } r500[R300_VS_MAX_FC_OPS]; + } fc_op_addrs; + int32_t fc_loop_index[R300_VS_MAX_FC_OPS]; +}; #endif /* RADEON_CODE_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 1c8ba864a4..935dc9b0a8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -307,3 +307,46 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig } } + +/** + * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. + * Gallium and OpenGL define it the other way around. + * + * So let's just negate FACE at the beginning of the shader and rewrite the rest + * of the shader to read from the newly allocated temporary. + */ +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) +{ + unsigned tempregi = rc_find_free_temporary(c); + struct rc_instruction *inst_add; + struct rc_instruction *inst; + + /* perspective divide */ + inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tempregi; + inst_add->U.I.DstReg.WriteMask = RC_MASK_X; + + inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; + + inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; + inst_add->U.I.SrcReg[1].Index = face; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; + + for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { + const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); + unsigned i; + + for(i = 0; i < opcode->NumSrcRegs; i++) { + if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && + inst->U.I.SrcReg[i].Index == face) { + inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; + inst->U.I.SrcReg[i].Index = tempregi; + } + } + } +} diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index f15905d79d..7c42eb3ae5 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -81,6 +81,7 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output); void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, int full_vtransform); +void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face); struct r300_fragment_program_compiler { struct radeon_compiler Base; @@ -110,8 +111,12 @@ struct r300_vertex_program_compiler { void * UserData; void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c); + + int PredicateIndex; + unsigned int PredicateMask; }; void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c); +void r300_vertex_program_dump(struct r300_vertex_program_compiler * c); #endif /* RADEON_COMPILER_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index 31566a937f..faf531b412 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -274,7 +274,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f } break; } - case RC_OPCODE_CONTINUE: + case RC_OPCODE_CONT: break; case RC_OPCODE_ENDIF: push_branch(&s); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c index 24c3ae57b6..32d4b45dd6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -421,12 +421,9 @@ static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, * ENDLOOP; -> ENDLOOP * * @param inst A pointer to a BGNLOOP instruction. - * @return If the loop can be unrolled, a pointer to the first instruction of - * the unrolled loop. - * Otherwise, A pointer to the ENDLOOP instruction. - * Null if there is an error. + * @return 1 for success, 0 for failure */ -static struct rc_instruction * transform_loop(struct emulate_loop_state * s, +static int transform_loop(struct emulate_loop_state * s, struct rc_instruction * inst) { struct loop_info * loop; @@ -437,10 +434,10 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, loop = &s->Loops[s->LoopCount++]; if (!build_loop_info(s->C, loop, inst)) - return NULL; + return 0; - if(try_unroll_loop(s->C, loop, -1)){ - return loop->BeginLoop->Next; + if(try_unroll_loop(s->C, loop, s->prog_inst_limit)){ + return 1; } /* Reverse the conditional instruction */ @@ -465,33 +462,31 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, break; default: rc_error(s->C, "loop->Cond is not a conditional.\n"); - return NULL; + return 0; } /* Prepare the loop to be emulated */ rc_remove_instruction(loop->Brk); rc_remove_instruction(loop->EndIf); rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); - return loop->EndLoop; + return 1; } void rc_transform_loops(struct radeon_compiler *c, - struct emulate_loop_state * s) + struct emulate_loop_state * s, int prog_inst_limit) { struct rc_instruction * ptr; memset(s, 0, sizeof(struct emulate_loop_state)); s->C = c; - ptr = s->C->Program.Instructions.Next; - while(ptr != &s->C->Program.Instructions) { + s->prog_inst_limit = prog_inst_limit; + for(ptr = s->C->Program.Instructions.Next; + ptr != &s->C->Program.Instructions; ptr = ptr->Next) { if(ptr->Type == RC_INSTRUCTION_NORMAL && ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ - ptr = transform_loop(s, ptr); - if(!ptr){ + if (!transform_loop(s, ptr)) return; - } } - ptr = ptr->Next; } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h index 86d91ef14b..bba1f68e30 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -21,10 +21,11 @@ struct emulate_loop_state { struct loop_info * Loops; unsigned int LoopCount; unsigned int LoopReserved; + int prog_inst_limit; }; void rc_transform_loops(struct radeon_compiler *c, - struct emulate_loop_state * s); + struct emulate_loop_state * s, int prog_inst_limit); void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c index 04f234f11d..da495a3afa 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c @@ -95,6 +95,12 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .IsComponentwise = 1 }, { + .Opcode = RC_OPCODE_DP2, + .Name = "DP2", + .NumSrcRegs = 2, + .HasDstReg = 1 + }, + { .Opcode = RC_OPCODE_DP3, .Name = "DP3", .NumSrcRegs = 2, @@ -295,6 +301,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .IsComponentwise = 1 }, { + .Opcode = RC_OPCODE_SSG, + .Name = "SSG", + .NumSrcRegs = 1, + .HasDstReg = 1, + .IsComponentwise = 1 + }, + { .Opcode = RC_OPCODE_SUB, .Name = "SUB", .NumSrcRegs = 2, @@ -386,8 +399,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = { .NumSrcRegs = 0, }, { - .Opcode = RC_OPCODE_CONTINUE, - .Name = "CONTINUE", + .Opcode = RC_OPCODE_CONT, + .Name = "CONT", .IsFlowControl = 1, .NumSrcRegs = 0 }, @@ -435,6 +448,10 @@ void rc_compute_sources_for_writemask( case RC_OPCODE_ARL: srcmasks[0] |= RC_MASK_X; break; + case RC_OPCODE_DP2: + srcmasks[0] |= RC_MASK_XY; + srcmasks[1] |= RC_MASK_XY; + break; case RC_OPCODE_DP3: srcmasks[0] |= RC_MASK_XYZ; srcmasks[1] |= RC_MASK_XYZ; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h index 8b9fa07dde..d3f639c870 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h @@ -64,6 +64,9 @@ typedef enum { * dst.c = d src0.c / dy */ RC_OPCODE_DDY, + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */ + RC_OPCODE_DP2, + /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */ RC_OPCODE_DP3, @@ -154,6 +157,9 @@ typedef enum { /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */ RC_OPCODE_SNE, + /** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */ + RC_OPCODE_SSG, + /** vec4 instruction: dst.c = src0.c - src1.c */ RC_OPCODE_SUB, @@ -187,7 +193,7 @@ typedef enum { RC_OPCODE_ENDLOOP, - RC_OPCODE_CONTINUE, + RC_OPCODE_CONT, /** special instruction, used in R300-R500 fragment program pair instructions * indicates that the result of the alpha operation shall be replicated diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c index 8a912da461..ce72cd97ab 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c @@ -65,6 +65,11 @@ struct regalloc_state { struct hardware_register * HwTemporary; unsigned int NumHwTemporaries; + /** + * If an instruction is inside of a loop, end_loop will be the + * IP of the ENDLOOP instruction, otherwise end_loop will be 0 + */ + int end_loop; }; static void print_live_intervals(struct live_intervals * src) @@ -178,10 +183,10 @@ static void scan_callback(void * data, struct rc_instruction * inst, else reg->Live.Start = inst->IP; reg->Live.End = inst->IP; - } else { - if (inst->IP > reg->Live.End) - reg->Live.End = inst->IP; - } + } else if (s->end_loop) + reg->Live.End = s->end_loop; + else if (inst->IP > reg->Live.End) + reg->Live.End = inst->IP; } static void compute_live_intervals(struct regalloc_state * s) @@ -191,6 +196,31 @@ static void compute_live_intervals(struct regalloc_state * s) for(struct rc_instruction * inst = s->C->Program.Instructions.Next; inst != &s->C->Program.Instructions; inst = inst->Next) { + + /* For all instructions inside of a loop, the ENDLOOP + * instruction is used as the end of the live interval. */ + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) { + int loops = 1; + struct rc_instruction * tmp; + for(tmp = inst->Next; + tmp != &s->C->Program.Instructions; + tmp = tmp->Next) { + if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) { + loops++; + break; + } else if (tmp->U.I.Opcode + == RC_OPCODE_ENDLOOP) { + if(!--loops) { + s->end_loop = tmp->IP; + break; + } + } + } + } + + if (inst->IP == s->end_loop) + s->end_loop = 0; + rc_for_all_reads_mask(inst, scan_callback, s); rc_for_all_writes_mask(inst, scan_callback, s); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 857aae5514..704a7bb2d2 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -216,18 +216,18 @@ static void transform_CEIL(struct radeon_compiler* c, rc_remove_instruction(inst); } -static void transform_DP3(struct radeon_compiler* c, +static void transform_DP2(struct radeon_compiler* c, struct rc_instruction* inst) { struct rc_src_register src0 = inst->U.I.SrcReg[0]; struct rc_src_register src1 = inst->U.I.SrcReg[1]; - src0.Negate &= ~RC_MASK_W; - src0.Swizzle &= ~(7 << (3 * 3)); - src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); - src1.Negate &= ~RC_MASK_W; - src1.Swizzle &= ~(7 << (3 * 3)); - src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); - emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + src0.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src0.Swizzle &= ~(63 << (3 * 2)); + src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); + src1.Swizzle &= ~(63 << (3 * 2)); + src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); + emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); rc_remove_instruction(inst); } @@ -464,6 +464,43 @@ static void transform_SNE(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * CMP tmp0, -x, 1, 0 + * CMP tmp1, x, 1, 0 + * ADD result, tmp0, -tmp1; + */ + unsigned tmp0, tmp1; + + /* 0 < x */ + tmp0 = rc_find_free_temporary(c); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + negate(inst->U.I.SrcReg[0]), + builtin_one, + builtin_zero); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit3(c, inst->Prev, RC_OPCODE_CMP, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_one, + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp0), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + static void transform_SUB(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -516,6 +553,7 @@ int radeonTransformALU( switch(inst->U.I.Opcode) { case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; + case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_DST: transform_DST(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; @@ -530,6 +568,7 @@ int radeonTransformALU( case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; + case RC_OPCODE_SSG: transform_SSG(c, inst); return 1; case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; @@ -577,6 +616,29 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c, rc_remove_instruction(inst); } +static void transform_r300_vertex_DP2(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_instruction *next_inst = inst->Next; + transform_DP2(c, inst); + next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4; +} + +static void transform_r300_vertex_DP3(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + struct rc_src_register src0 = inst->U.I.SrcReg[0]; + struct rc_src_register src1 = inst->U.I.SrcReg[1]; + src0.Negate &= ~RC_MASK_W; + src0.Swizzle &= ~(7 << (3 * 3)); + src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + src1.Negate &= ~RC_MASK_W; + src1.Swizzle &= ~(7 << (3 * 3)); + src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); + emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); + rc_remove_instruction(inst); +} + static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, struct rc_instruction* inst) { @@ -672,6 +734,41 @@ static void transform_r300_vertex_SLE(struct radeon_compiler* c, inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; } +static void transform_r300_vertex_SSG(struct radeon_compiler* c, + struct rc_instruction* inst) +{ + /* result = sign(x) + * + * SLT tmp0, 0, x; + * SLT tmp1, x, 0; + * ADD result, tmp0, -tmp1; + */ + unsigned tmp0, tmp1; + + /* 0 < x */ + tmp0 = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask), + builtin_zero, + inst->U.I.SrcReg[0]); + + /* x < 0 */ + tmp1 = rc_find_free_temporary(c); + emit2(c, inst->Prev, RC_OPCODE_SLT, 0, + dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), + inst->U.I.SrcReg[0], + builtin_zero); + + /* Either both are zero, or one of them is one and the other is zero. */ + /* result = tmp0 - tmp1 */ + emit2(c, inst->Prev, RC_OPCODE_ADD, 0, + inst->U.I.DstReg, + srcreg(RC_FILE_TEMPORARY, tmp0), + negate(srcreg(RC_FILE_TEMPORARY, tmp1))); + + rc_remove_instruction(inst); +} + /** * For use with radeonLocalTransform, this transforms non-native ALU * instructions of the r300 up to r500 vertex engine. @@ -685,7 +782,8 @@ int r300_transform_vertex_alu( case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; - case RC_OPCODE_DP3: transform_DP3(c, inst); return 1; + case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; + case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; @@ -705,6 +803,7 @@ int r300_transform_vertex_alu( return 1; } return 0; + case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1; case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index 9c4b65f4c0..ddce590ee6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -117,8 +117,8 @@ int radeonTransformTEX( struct rc_instruction * inst_rcp = NULL; struct rc_instruction * inst_mad; struct rc_instruction * inst_cmp; - unsigned tmp_texsample = rc_find_free_temporary(c); - unsigned tmp_sum = rc_find_free_temporary(c); + unsigned tmp_texsample; + unsigned tmp_sum; unsigned tmp_recip_w = 0; int pass, fail, tex; @@ -126,6 +126,7 @@ int radeonTransformTEX( struct rc_dst_register output_reg = inst->U.I.DstReg; /* Redirect TEX to a new temp. */ + tmp_texsample = rc_find_free_temporary(c); inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = tmp_texsample; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; @@ -144,6 +145,7 @@ int radeonTransformTEX( } /* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */ + tmp_sum = rc_find_free_temporary(c); inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; inst_mad->U.I.DstReg.Index = tmp_sum; @@ -199,6 +201,8 @@ int radeonTransformTEX( inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); + + assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w); } } diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index e4b302bbad..3d2f8928fa 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -461,7 +461,7 @@ static void r300InitGLExtensions(GLcontext *ctx) if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) { _mesa_disable_extension(ctx, "GL_ARB_occlusion_query"); } - if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350) + if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_R420) _mesa_enable_extension(ctx, "GL_ARB_half_float_vertex"); if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index f25264b6f2..f7705b0f6f 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -441,6 +441,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 #define R300_VAP_GB_HORZ_DISC_ADJ 0x222c +#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230 +#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8) +#define R300_PVS_FC_LAST_INST(x) ((x) << 16) +#define R300_PVS_FC_RTN_INST(x) ((x) << 24) + /* gap */ /* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between @@ -459,6 +465,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_2288_R300 0x00750000 /* -- nh */ # define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ +#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290 +#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0) +#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8) + /* gap */ /* Addresses are relative to the vertex program instruction area of the @@ -489,6 +499,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #define R300_VAP_PVS_CODE_CNTL_1 0x22D8 # define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 #define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC +#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x))) +#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x))) /* The entire range from 0x2300 to 0x2AC inclusive seems to be used for * immediate vertices @@ -505,6 +518,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* write 0 to indicate end of packet? */ #define R300_VAP_VTX_END_OF_PKT 0x24AC +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500 +#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0) +#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16) + +#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504 +#define R500_PVS_FC_LAST_INST(x) ((x) << 0) +#define R500_PVS_FC_RTN_INST(x) ((x) << 16) + /* gap */ /* These are values from r300_reg/r300_reg.h - they are known to be correct diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 4ba6740e3d..9458869826 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -152,8 +152,8 @@ int32_t r300TranslateTexFormat(gl_format mesaFormat) case MESA_FORMAT_Z32: return R300_EASY_TX_FORMAT(X, X, X, X, X32); /* EXT_texture_sRGB */ - case MESA_FORMAT_SRGBA8: - return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA; + case MESA_FORMAT_SARGB8: + return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA; case MESA_FORMAT_SLA8: return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA; case MESA_FORMAT_SL8: diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c index 619678214f..4fd425b809 100644 --- a/src/mesa/drivers/dri/r600/r600_blit.c +++ b/src/mesa/drivers/dri/r600/r600_blit.c @@ -72,7 +72,7 @@ unsigned r600_check_blit(gl_format mesa_format) case MESA_FORMAT_Z24_S8: case MESA_FORMAT_Z16: case MESA_FORMAT_Z32: - case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: case MESA_FORMAT_SLA8: case MESA_FORMAT_SL8: break; @@ -320,9 +320,9 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma CLEARbit(cb_color0_info, SOURCE_FORMAT_bit); SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask); break; - case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: format = COLOR_8_8_8_8; - comp_swap = SWAP_STD_REV; + comp_swap = SWAP_ALT; SETbit(cb_color0_info, SOURCE_FORMAT_bit); SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask); break; @@ -1050,17 +1050,17 @@ set_tex_resource(context_t * context, SETfield(sq_tex_resource4, SQ_SEL_X, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); break; - case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: SETfield(sq_tex_resource1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - SETfield(sq_tex_resource4, SQ_SEL_W, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); SETfield(sq_tex_resource4, SQ_SEL_Z, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); SETfield(sq_tex_resource4, SQ_SEL_Y, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); SETfield(sq_tex_resource4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(sq_tex_resource4, SQ_SEL_W, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); break; @@ -1454,7 +1454,7 @@ set_default_state(context_t *context) SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit); } - BEGIN_BATCH_NO_AUTOSTATE(114); + BEGIN_BATCH_NO_AUTOSTATE(117); R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6); R600_OUT_BATCH(sq_config); R600_OUT_BATCH(sq_gpr_resource_mgmt_1); @@ -1499,9 +1499,10 @@ set_default_state(context_t *context) R600_OUT_BATCH_REGVAL(PA_SU_VTX_CNTL, (PIX_CENTER_bit) | (X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) | (X_1_256TH << QUANT_MODE_shift)); + R600_OUT_BATCH_REGVAL(PA_SC_AA_CONFIG, 0); R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4); - R600_OUT_BATCH(2048); + R600_OUT_BATCH(0xffffff); R600_OUT_BATCH(0); R600_OUT_BATCH(0); R600_OUT_BATCH(0); @@ -1532,6 +1533,7 @@ set_default_state(context_t *context) R600_OUT_BATCH(0); R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, 0); + R600_OUT_BATCH_REGVAL(SX_ALPHA_TEST_CONTROL, 0); END_BATCH(); COMMIT_BATCH(); @@ -1613,7 +1615,7 @@ unsigned r600_blit(GLcontext *ctx, /* Flush is needed to make sure that source buffer has correct data */ radeonFlush(ctx); - rcommonEnsureCmdBufSpace(&context->radeon, 305, __FUNCTION__); + rcommonEnsureCmdBufSpace(&context->radeon, 311, __FUNCTION__); /* load shaders */ load_shaders(context->radeon.glCtx); @@ -1622,7 +1624,7 @@ unsigned r600_blit(GLcontext *ctx, return GL_FALSE; /* set clear state */ - /* 114 */ + /* 120 */ set_default_state(context); /* shaders */ diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index 84d9d42312..389b0412ba 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -72,6 +72,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define R600_ENABLE_GLSL_TEST 1 #define need_GL_VERSION_2_0 +#define need_GL_VERSION_2_1 +#define need_GL_ARB_draw_elements_base_vertex #define need_GL_ARB_occlusion_query #define need_GL_ARB_point_parameters #define need_GL_ARB_vertex_program @@ -140,6 +142,7 @@ static const struct dri_extension card_extensions[] = { {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, {"GL_SGIS_generate_mipmap", NULL}, {"GL_ARB_pixel_buffer_object", NULL}, + {"GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions }, {NULL, NULL} /* *INDENT-ON* */ }; @@ -157,6 +160,7 @@ static const struct dri_extension mm_extensions[] = { static const struct dri_extension gl_20_extension[] = { #ifdef R600_ENABLE_GLSL_TEST {"GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, + {"GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, #else {"GL_VERSION_2_0", GL_VERSION_2_0_functions }, #endif /* R600_ENABLE_GLSL_TEST */ diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index 41419f8460..512a52ede3 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -431,7 +431,7 @@ unsigned r600IsFormatRenderable(gl_format mesa_format) case MESA_FORMAT_Z24_S8: case MESA_FORMAT_Z16: case MESA_FORMAT_Z32: - case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: case MESA_FORMAT_SLA8: case MESA_FORMAT_SL8: return 1; diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c index 1600033b9b..ba3690b70e 100644 --- a/src/mesa/drivers/dri/r600/r600_texstate.c +++ b/src/mesa/drivers/dri/r600/r600_texstate.c @@ -605,17 +605,17 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa } break; /* EXT_texture_sRGB */ - case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask); - SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y, - SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask); SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X, + SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask); + SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask); SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit); break; diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index 1e955b93b2..bf8063391a 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -265,17 +265,6 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom) if (context->radeon.tcl.aos_count == 0) return; - BEGIN_BATCH_NO_AUTOSTATE(6); - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); - R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); - R600_OUT_BATCH(0); - - R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1)); - R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX); - R600_OUT_BATCH(0); - END_BATCH(); - COMMIT_BATCH(); - for(i=0; i<VERT_ATTRIB_MAX; i++) { if(vp->mesa_program->Base.InputsRead & (1 << i)) { @@ -523,9 +512,9 @@ static void r700SetRenderTarget(context_t *context, int id) CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask); CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit); break; - case MESA_FORMAT_SRGBA8: + case MESA_FORMAT_SARGB8: format = COLOR_8_8_8_8; - comp_swap = SWAP_STD_REV; + comp_swap = SWAP_ALT; number_type = NUMBER_SRGB; SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit); break; @@ -1480,9 +1469,6 @@ static int check_vtx(GLcontext *ctx, struct radeon_state_atom *atom) context_t *context = R700_CONTEXT(ctx); int count = context->radeon.tcl.aos_count * 18; - if (count) - count += 6; - radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count); return count; } @@ -1605,7 +1591,7 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(ps, always, 24, r700SendPSState); ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts); ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts); - ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState); + ALLOC_STATE(vtx, vtx, (VERT_ATTRIB_MAX * 18), r700SendVTXState); ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState); ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState); ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState); diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index ba55f38e05..c5771f9fd0 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -244,7 +244,8 @@ static int r700NumVerts(int num_verts, int prim) return num_verts - verts_off; } -static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) +static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, + int prim, GLint basevertex) { context_t *context = R700_CONTEXT(ctx); BATCH_LOCALS(&context->radeon); @@ -282,6 +283,7 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim total_emit = 3 /* VGT_PRIMITIVE_TYPE */ + 2 /* VGT_INDEX_TYPE */ + 2 /* NUM_INSTANCES */ + + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */ + 5 + 2; /* DRAW_INDEX */ BEGIN_BATCH_NO_AUTOSTATE(total_emit); @@ -294,6 +296,11 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim // num instances R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); R600_OUT_BATCH(1); + /* offset */ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2)); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(basevertex); //VTX_BASE_VTX_LOC + R600_OUT_BATCH(0); //VTX_START_INST_LOC // draw packet R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3)); R600_OUT_BATCH(context->ind_buf.bo_offset); @@ -364,6 +371,7 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, total_emit += 3 /* VGT_PRIMITIVE_TYPE */ + 2 /* VGT_INDEX_TYPE */ + 2 /* NUM_INSTANCES */ + + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */ + 3; /* DRAW */ BEGIN_BATCH_NO_AUTOSTATE(total_emit); @@ -376,6 +384,11 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, // num instances R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0)); R600_OUT_BATCH(1); + /* offset */ + R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2)); + R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX); + R600_OUT_BATCH(0); //VTX_BASE_VTX_LOC + R600_OUT_BATCH(0); //VTX_START_INST_LOC // draw packet if(start == 0) { @@ -433,16 +446,16 @@ static GLuint r700PredictRenderSize(GLcontext* ctx, dwords = PRE_EMIT_STATE_BUFSZ; if (ib) - dwords += nr_prims * 14; + dwords += nr_prims * 18; else { for (i = 0; i < nr_prims; ++i) { if (prim[i].start == 0) - dwords += 10; + dwords += 14; else if (prim[i].count > 0xffff) - dwords += prim[i].count + 10; + dwords += prim[i].count + 14; else - dwords += ((prim[i].count + 1) / 2) + 10; + dwords += ((prim[i].count + 1) / 2) + 14; } } @@ -923,7 +936,8 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx, r700RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, - prim[i].mode); + prim[i].mode, + prim[i].basevertex); else r700RunRenderPrimitiveImmediate(ctx, prim[i].start, @@ -975,15 +989,17 @@ static void r700DrawPrims(GLcontext *ctx, /* This check should get folded into just the places that * min/max index are really needed. */ - if (!index_bounds_valid) { - vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); - } - if (min_index) { + if (!vbo_all_varyings_in_vbos(arrays)) { + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + /* do we want to rebase, minimizes the + * amount of data to upload? */ + if (min_index) { vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims ); return; + } } - /* Make an attempt at drawing */ retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index b7ee9a134b..7d54fabebb 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -414,9 +414,9 @@ enum { CHIP_FAMILY_R350, CHIP_FAMILY_RV350, CHIP_FAMILY_RV380, + CHIP_FAMILY_RS400, CHIP_FAMILY_R420, CHIP_FAMILY_RV410, - CHIP_FAMILY_RS400, CHIP_FAMILY_RS600, CHIP_FAMILY_RS690, CHIP_FAMILY_RS740, diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index 517485091a..0597d4250d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -609,6 +609,7 @@ radeon_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb) void radeon_fbo_init(struct radeon_context *radeon) { +#if FEATURE_EXT_framebuffer_object radeon->glCtx->Driver.NewFramebuffer = radeon_new_framebuffer; radeon->glCtx->Driver.NewRenderbuffer = radeon_new_renderbuffer; radeon->glCtx->Driver.BindFramebuffer = radeon_bind_framebuffer; @@ -617,7 +618,10 @@ void radeon_fbo_init(struct radeon_context *radeon) radeon->glCtx->Driver.FinishRenderTexture = radeon_finish_render_texture; radeon->glCtx->Driver.ResizeBuffers = radeon_resize_buffers; radeon->glCtx->Driver.ValidateFramebuffer = radeon_validate_framebuffer; +#endif +#if FEATURE_EXT_framebuffer_blit radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer; +#endif } diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c index c877e6c176..c6e5f110ea 100644 --- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c +++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c @@ -133,7 +133,7 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree height = _mesa_next_pow_two_32(lvl->height); lvl->rowstride = get_texture_image_row_stride(rmesa, mt->mesaFormat, lvl->width, mt->tilebits); - lvl->size = get_texture_image_size(mt->mesaFormat, lvl->rowstride, lvl->height, lvl->depth, mt->tilebits); + lvl->size = get_texture_image_size(mt->mesaFormat, lvl->rowstride, height, lvl->depth, mt->tilebits); assert(lvl->size > 0); diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index 67be466c3f..29defe73a7 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -40,7 +40,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/macros.h" #include "main/simple_list.h" +#include "math/m_xform.h" + #include "swrast_setup/swrast_setup.h" + #include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_pipeline.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index d2b190e42e..8c6a50d2f0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -551,7 +551,7 @@ gl_format radeonChooseTextureFormat(GLcontext * ctx, case GL_SRGB8_ALPHA8: case GL_COMPRESSED_SRGB: case GL_COMPRESSED_SRGB_ALPHA: - return MESA_FORMAT_SRGBA8; + return MESA_FORMAT_SARGB8; case GL_SLUMINANCE: case GL_SLUMINANCE8: diff --git a/src/mesa/drivers/dri/savage/savagerender.c b/src/mesa/drivers/dri/savage/savagerender.c index c369bb124c..2d9e80e29c 100644 --- a/src/mesa/drivers/dri/savage/savagerender.c +++ b/src/mesa/drivers/dri/savage/savagerender.c @@ -33,6 +33,8 @@ #include "main/imports.h" #include "main/mtypes.h" +#include "math/m_xform.h" + #include "tnl/t_context.h" #include "savagecontext.h" diff --git a/src/mesa/drivers/dri/unichrome/via_render.c b/src/mesa/drivers/dri/unichrome/via_render.c index 896c43db1b..4351f11955 100644 --- a/src/mesa/drivers/dri/unichrome/via_render.c +++ b/src/mesa/drivers/dri/unichrome/via_render.c @@ -33,6 +33,8 @@ #include "main/macros.h" #include "main/mtypes.h" +#include "math/m_xform.h" + #include "tnl/t_context.h" #include "via_context.h" diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index 32f7d969d8..0f2d1a8f8d 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -177,7 +177,7 @@ /** * Per-program constants (power of two) * - * \c MAX_PROGRAM_LOCAL_PARAMS and \c MAX_UNIFORMS are just the assmebly shader + * \c MAX_PROGRAM_LOCAL_PARAMS and \c MAX_UNIFORMS are just the assembly shader * and GLSL shader names for the same thing. They should \b always have the * same value. Each refers to the number of vec4 values supplied as * per-program parameters. diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index a369532e99..b01fed1781 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -462,7 +462,7 @@ _mesa_init_current(GLcontext *ctx) /** - * Init vertex/fragment program limits. + * Init vertex/fragment/geometry program limits. * Important: drivers should override these with actual limits. */ static void @@ -477,16 +477,18 @@ init_program_limits(GLenum type, struct gl_program_constants *prog) prog->MaxLocalParams = MAX_PROGRAM_LOCAL_PARAMS; prog->MaxUniformComponents = 4 * MAX_UNIFORMS; - if (type == GL_VERTEX_PROGRAM_ARB) { + switch (type) { + case GL_VERTEX_PROGRAM_ARB: prog->MaxParameters = MAX_VERTEX_PROGRAM_PARAMS; prog->MaxAttribs = MAX_NV_VERTEX_PROGRAM_INPUTS; prog->MaxAddressRegs = MAX_VERTEX_PROGRAM_ADDRESS_REGS; - } - else if (type == GL_FRAGMENT_PROGRAM_ARB) { + break; + case GL_FRAGMENT_PROGRAM_ARB: prog->MaxParameters = MAX_NV_FRAGMENT_PROGRAM_PARAMS; prog->MaxAttribs = MAX_NV_FRAGMENT_PROGRAM_INPUTS; prog->MaxAddressRegs = MAX_FRAGMENT_PROGRAM_ADDRESS_REGS; - } else { + break; + case MESA_GEOMETRY_PROGRAM: prog->MaxParameters = MAX_NV_VERTEX_PROGRAM_PARAMS; prog->MaxAttribs = MAX_NV_VERTEX_PROGRAM_INPUTS; prog->MaxAddressRegs = MAX_VERTEX_PROGRAM_ADDRESS_REGS; @@ -497,6 +499,9 @@ init_program_limits(GLenum type, struct gl_program_constants *prog) prog->MaxGeometryUniformComponents = MAX_GEOMETRY_UNIFORM_COMPONENTS; prog->MaxGeometryOutputVertices = MAX_GEOMETRY_OUTPUT_VERTICES; prog->MaxGeometryTotalOutputComponents = MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS; + break; + default: + assert(0 && "Bad program type in init_program_limits()"); } /* Set the native limits to zero. This implies that there is no native diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index c399351096..46e5c932d0 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -756,7 +756,7 @@ _mesa_strdup( const char *s ) float _mesa_strtof( const char *s, char **end ) { -#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) +#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__) static locale_t loc = NULL; if (!loc) { loc = newlocale(LC_CTYPE_MASK, "C", NULL); diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index e89f55befe..b8bcda56bf 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -2505,29 +2505,29 @@ struct gl_framebuffer /** - * Limits for vertex and fragment programs. + * Limits for vertex and fragment programs/shaders. */ struct gl_program_constants { /* logical limits */ GLuint MaxInstructions; - GLuint MaxAluInstructions; /* fragment programs only, for now */ - GLuint MaxTexInstructions; /* fragment programs only, for now */ - GLuint MaxTexIndirections; /* fragment programs only, for now */ + GLuint MaxAluInstructions; + GLuint MaxTexInstructions; + GLuint MaxTexIndirections; GLuint MaxAttribs; GLuint MaxTemps; - GLuint MaxAddressRegs; /* vertex program only, for now */ + GLuint MaxAddressRegs; GLuint MaxParameters; GLuint MaxLocalParams; GLuint MaxEnvParams; /* native/hardware limits */ GLuint MaxNativeInstructions; - GLuint MaxNativeAluInstructions; /* fragment programs only, for now */ - GLuint MaxNativeTexInstructions; /* fragment programs only, for now */ - GLuint MaxNativeTexIndirections; /* fragment programs only, for now */ + GLuint MaxNativeAluInstructions; + GLuint MaxNativeTexInstructions; + GLuint MaxNativeTexIndirections; GLuint MaxNativeAttribs; GLuint MaxNativeTemps; - GLuint MaxNativeAddressRegs; /* vertex program only, for now */ + GLuint MaxNativeAddressRegs; GLuint MaxNativeParameters; /* For shaders */ GLuint MaxUniformComponents; diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c index 6f62415ba8..32aaa79f7f 100644 --- a/src/mesa/main/querymatrix.c +++ b/src/mesa/main/querymatrix.c @@ -36,9 +36,9 @@ #define INT_TO_FIXED(x) ((GLfixed) ((x) << 16)) #define FLOAT_TO_FIXED(x) ((GLfixed) ((x) * 65536.0)) -#if defined(WIN32) || defined(_WIN32_WCE) +#if defined(_MSC_VER) /* Oddly, the fpclassify() function doesn't exist in such a form - * on Windows. This is an implementation using slightly different + * on MSVC. This is an implementation using slightly different * lower-level Windows functions. */ #include <float.h> @@ -72,7 +72,7 @@ fpclassify(double x) #elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \ defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \ - (defined(__sun) && defined(__C99FEATURES__)) + (defined(__sun) && defined(__C99FEATURES__)) || defined(__MINGW32__) /* fpclassify is available. */ diff --git a/src/mesa/program/hash_table.h b/src/mesa/program/hash_table.h index ec088c7dde..f1c4fdcd1f 100644 --- a/src/mesa/program/hash_table.h +++ b/src/mesa/program/hash_table.h @@ -31,8 +31,6 @@ #ifndef HASH_TABLE_H #define HASH_TABLE_H -#include <string.h> - struct hash_table; typedef unsigned (*hash_func_t)(const void *key); diff --git a/src/mesa/program/nvfragparse.h b/src/mesa/program/nvfragparse.h index 544ab80c56..e28a6c4934 100644 --- a/src/mesa/program/nvfragparse.h +++ b/src/mesa/program/nvfragparse.h @@ -30,6 +30,7 @@ #ifndef NVFRAGPARSE_H #define NVFRAGPARSE_H +#include "main/mtypes.h" extern void _mesa_parse_nv_fragment_program(GLcontext *ctx, GLenum target, diff --git a/src/mesa/program/nvvertparse.h b/src/mesa/program/nvvertparse.h index 9919e22388..91ef79e6c3 100644 --- a/src/mesa/program/nvvertparse.h +++ b/src/mesa/program/nvvertparse.h @@ -29,6 +29,7 @@ #ifndef NVVERTPARSE_H #define NVVERTPARSE_H +#include "main/mtypes.h" extern void _mesa_parse_nv_vertex_program(GLcontext *ctx, GLenum target, diff --git a/src/mesa/program/prog_cache.h b/src/mesa/program/prog_cache.h index 4e1ccac03f..bfe8f99d44 100644 --- a/src/mesa/program/prog_cache.h +++ b/src/mesa/program/prog_cache.h @@ -30,6 +30,9 @@ #define PROG_CACHE_H +#include "main/mtypes.h" + + /** Opaque type */ struct gl_program_cache; diff --git a/src/mesa/program/prog_execute.h b/src/mesa/program/prog_execute.h index adefc5439d..f59b65176f 100644 --- a/src/mesa/program/prog_execute.h +++ b/src/mesa/program/prog_execute.h @@ -26,6 +26,7 @@ #define PROG_EXECUTE_H #include "main/config.h" +#include "main/mtypes.h" typedef void (*FetchTexelLodFunc)(GLcontext *ctx, const GLfloat texcoord[4], diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h index 1d850c4d5d..ca90de7ce1 100644 --- a/src/mesa/program/prog_instruction.h +++ b/src/mesa/program/prog_instruction.h @@ -38,7 +38,7 @@ #define PROG_INSTRUCTION_H -#include "main/mfeatures.h" +#include "main/glheader.h" /** diff --git a/src/mesa/program/prog_noise.h b/src/mesa/program/prog_noise.h index c4779479f9..dd7986efcd 100644 --- a/src/mesa/program/prog_noise.h +++ b/src/mesa/program/prog_noise.h @@ -25,6 +25,8 @@ #ifndef PROG_NOISE #define PROG_NOISE +#include "main/glheader.h" + extern GLfloat _mesa_noise1(GLfloat); extern GLfloat _mesa_noise2(GLfloat, GLfloat); extern GLfloat _mesa_noise3(GLfloat, GLfloat, GLfloat); diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c index 457ace14c6..ab878755e2 100644 --- a/src/mesa/program/prog_optimize.c +++ b/src/mesa/program/prog_optimize.c @@ -43,40 +43,117 @@ static GLboolean dbg = GL_FALSE; -/* Returns the mask of channels read from the given srcreg in this instruction. +#define NO_MASK 0xf + +/** + * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which + * are read from the given src in this instruction, We also provide + * one optional masks which may mask other components in the dst + * register */ static GLuint -get_src_arg_mask(const struct prog_instruction *inst, int arg) +get_src_arg_mask(const struct prog_instruction *inst, + GLuint arg, GLuint dst_mask) { - int writemask = inst->DstReg.WriteMask; + GLuint read_mask, channel_mask; + GLuint comp; - if (inst->CondUpdate) - writemask = WRITEMASK_XYZW; + ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode)); - switch (inst->Opcode) { - case OPCODE_MOV: - case OPCODE_ABS: - case OPCODE_ADD: - case OPCODE_MUL: - case OPCODE_SUB: - return writemask; - case OPCODE_RCP: - case OPCODE_SIN: - case OPCODE_COS: - case OPCODE_RSQ: - case OPCODE_POW: - case OPCODE_EX2: - return WRITEMASK_X; - case OPCODE_DP2: - return WRITEMASK_XY; - case OPCODE_DP3: - case OPCODE_XPD: - return WRITEMASK_XYZ; - default: - return WRITEMASK_XYZW; + /* Form the dst register, find the written channels */ + if (inst->CondUpdate) { + channel_mask = WRITEMASK_XYZW; + } + else { + switch (inst->Opcode) { + case OPCODE_MOV: + case OPCODE_MIN: + case OPCODE_MAX: + case OPCODE_ABS: + case OPCODE_ADD: + case OPCODE_MAD: + case OPCODE_MUL: + case OPCODE_SUB: + channel_mask = inst->DstReg.WriteMask & dst_mask; + break; + case OPCODE_RCP: + case OPCODE_SIN: + case OPCODE_COS: + case OPCODE_RSQ: + case OPCODE_POW: + case OPCODE_EX2: + case OPCODE_LOG: + channel_mask = WRITEMASK_X; + break; + case OPCODE_DP2: + channel_mask = WRITEMASK_XY; + break; + case OPCODE_DP3: + case OPCODE_XPD: + channel_mask = WRITEMASK_XYZ; + break; + default: + channel_mask = WRITEMASK_XYZW; + break; + } } + + /* Now, given the src swizzle and the written channels, find which + * components are actually read + */ + read_mask = 0x0; + for (comp = 0; comp < 4; ++comp) { + const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp); + ASSERT(coord < 4); + if (channel_mask & (1 << comp) && coord <= SWIZZLE_W) + read_mask |= 1 << coord; + } + + return read_mask; +} + + +/** + * For a MOV instruction, compute a write mask when src register also has + * a mask + */ +static GLuint +get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask) +{ + const GLuint mask = mov->DstReg.WriteMask; + GLuint comp; + GLuint updated_mask = 0x0; + + ASSERT(mov->Opcode == OPCODE_MOV); + + for (comp = 0; comp < 4; ++comp) { + GLuint src_comp; + if ((mask & (1 << comp)) == 0) + continue; + src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp); + if ((src_mask & (1 << src_comp)) == 0) + continue; + updated_mask |= 1 << comp; + } + + return updated_mask; +} + + +/** + * Ensure that the swizzle is regular. That is, all of the swizzle + * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE. + */ +static GLboolean +is_swizzle_regular(GLuint swz) +{ + return GET_SWZ(swz,0) <= SWIZZLE_W && + GET_SWZ(swz,1) <= SWIZZLE_W && + GET_SWZ(swz,2) <= SWIZZLE_W && + GET_SWZ(swz,3) <= SWIZZLE_W; } + /** * In 'prog' remove instruction[i] if removeFlags[i] == TRUE. * \return number of instructions removed @@ -153,82 +230,13 @@ replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) /** - * Consolidate temporary registers to use low numbers. For example, if the - * shader only uses temps 4, 5, 8, replace them with 0, 1, 2. - */ -static void -_mesa_consolidate_registers(struct gl_program *prog) -{ - GLboolean tempUsed[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; - GLint tempMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS]; - GLuint tempMax = 0, i; - - if (dbg) { - printf("Optimize: Begin register consolidation\n"); - } - - memset(tempUsed, 0, sizeof(tempUsed)); - - for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) { - tempMap[i] = -1; - } - - /* set tempUsed[i] if temporary [i] is referenced */ - for (i = 0; i < prog->NumInstructions; i++) { - const struct prog_instruction *inst = prog->Instructions + i; - const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); - GLuint j; - for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { - const GLuint index = inst->SrcReg[j].Index; - ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); - tempUsed[index] = GL_TRUE; - tempMax = MAX2(tempMax, index); - break; - } - } - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - const GLuint index = inst->DstReg.Index; - ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); - tempUsed[index] = GL_TRUE; - tempMax = MAX2(tempMax, index); - } - } - - /* allocate a new index for each temp that's used */ - { - GLuint freeTemp = 0; - for (i = 0; i <= tempMax; i++) { - if (tempUsed[i]) { - tempMap[i] = freeTemp++; - /*printf("replace %u with %u\n", i, tempMap[i]);*/ - } - } - if (freeTemp == tempMax + 1) { - /* no consolidation possible */ - return; - } - if (dbg) { - printf("Replace regs 0..%u with 0..%u\n", tempMax, freeTemp-1); - } - } - - replace_regs(prog, PROGRAM_TEMPORARY, tempMap); - - if (dbg) { - printf("Optimize: End register consolidation\n"); - } -} - - -/** * Remove dead instructions from the given program. * This is very primitive for now. Basically look for temp registers * that are written to but never read. Remove any instructions that * write to such registers. Be careful with condition code setters. */ -static void -_mesa_remove_dead_code(struct gl_program *prog) +static GLboolean +_mesa_remove_dead_code_global(struct gl_program *prog) { GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4]; GLboolean *removeInst; /* per-instruction removal flag */ @@ -256,7 +264,7 @@ _mesa_remove_dead_code(struct gl_program *prog) const GLuint index = inst->SrcReg[j].Index; GLuint read_mask; ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS); - read_mask = get_src_arg_mask(inst, j); + read_mask = get_src_arg_mask(inst, j, NO_MASK); if (inst->SrcReg[j].RelAddr) { if (dbg) @@ -265,25 +273,12 @@ _mesa_remove_dead_code(struct gl_program *prog) } for (comp = 0; comp < 4; comp++) { - GLuint swz = (inst->SrcReg[j].Swizzle >> (3 * comp)) & 0x7; - - if ((read_mask & (1 << comp)) == 0) + const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp); + ASSERT(swz < 4); + if ((read_mask & (1 << swz)) == 0) continue; - - switch (swz) { - case SWIZZLE_X: - tempRead[index][0] = GL_TRUE; - break; - case SWIZZLE_Y: - tempRead[index][1] = GL_TRUE; - break; - case SWIZZLE_Z: - tempRead[index][2] = GL_TRUE; - break; - case SWIZZLE_W: - tempRead[index][3] = GL_TRUE; - break; - } + if (swz <= SWIZZLE_W) + tempRead[index][swz] = GL_TRUE; } } } @@ -353,10 +348,11 @@ _mesa_remove_dead_code(struct gl_program *prog) done: free(removeInst); + return rem != 0; } -enum temp_use +enum inst_use { READ, WRITE, @@ -364,13 +360,19 @@ enum temp_use END }; + /** - * Scan forward in program from 'start' for the next occurance of TEMP[index]. + * Scan forward in program from 'start' for the next occurances of TEMP[index]. + * We look if an instruction reads the component given by the masks and if they + * are overwritten. * Return READ, WRITE, FLOW or END to indicate the next usage or an indicator * that we can't look further. */ -static enum temp_use -find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index) +static enum inst_use +find_next_use(const struct gl_program *prog, + GLuint start, + GLuint index, + GLuint mask) { GLuint i; @@ -378,30 +380,50 @@ find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index) const struct prog_instruction *inst = prog->Instructions + i; switch (inst->Opcode) { case OPCODE_BGNLOOP: - case OPCODE_ENDLOOP: case OPCODE_BGNSUB: + case OPCODE_BRA: + case OPCODE_CAL: + case OPCODE_CONT: + case OPCODE_IF: + case OPCODE_ELSE: + case OPCODE_ENDIF: + case OPCODE_ENDLOOP: case OPCODE_ENDSUB: + case OPCODE_RET: return FLOW; + case OPCODE_END: + return END; default: { const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY && - inst->SrcReg[j].Index == index) + if (inst->SrcReg[j].RelAddr || + (inst->SrcReg[j].File == PROGRAM_TEMPORARY && + inst->SrcReg[j].Index == index && + (get_src_arg_mask(inst,j,NO_MASK) & mask))) return READ; } - if (inst->DstReg.File == PROGRAM_TEMPORARY && - inst->DstReg.Index == index) - return WRITE; + if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 && + inst->DstReg.File == PROGRAM_TEMPORARY && + inst->DstReg.Index == index) { + mask &= ~inst->DstReg.WriteMask; + if (mask == 0) + return WRITE; + } } } } - return END; } -static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode) + +/** + * Is the given instruction opcode a flow-control opcode? + * XXX maybe move this into prog_instruction.[ch] + */ +static GLboolean +_mesa_is_flow_control_opcode(enum prog_opcode opcode) { switch (opcode) { case OPCODE_BGNLOOP: @@ -422,6 +444,37 @@ static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode) } } + +/** + * Test if the given instruction is a simple MOV (no conditional updating, + * not relative addressing, no negation/abs, etc). + */ +static GLboolean +can_downward_mov_be_modifed(const struct prog_instruction *mov) +{ + return + mov->Opcode == OPCODE_MOV && + mov->CondUpdate == GL_FALSE && + mov->SrcReg[0].RelAddr == 0 && + mov->SrcReg[0].Negate == 0 && + mov->SrcReg[0].Abs == 0 && + mov->SrcReg[0].HasIndex2 == 0 && + mov->SrcReg[0].RelAddr2 == 0 && + mov->DstReg.RelAddr == 0 && + mov->DstReg.CondMask == COND_TR && + mov->SaturateMode == SATURATE_OFF; +} + + +static GLboolean +can_upward_mov_be_modifed(const struct prog_instruction *mov) +{ + return + can_downward_mov_be_modifed(mov) && + mov->DstReg.File == PROGRAM_TEMPORARY; +} + + /** * Try to remove use of extraneous MOV instructions, to free them up for dead * code removal. @@ -449,14 +502,15 @@ _mesa_remove_extra_move_use(struct gl_program *prog) for (i = 0; i + 1 < prog->NumInstructions; i++) { const struct prog_instruction *mov = prog->Instructions + i; + GLuint dst_mask, src_mask; + if (can_upward_mov_be_modifed(mov) == GL_FALSE) + continue; - if (mov->Opcode != OPCODE_MOV || - mov->DstReg.File != PROGRAM_TEMPORARY || - mov->DstReg.RelAddr || - mov->DstReg.CondMask != COND_TR || - mov->SaturateMode != SATURATE_OFF || - mov->SrcReg[0].RelAddr) - continue; + /* Scanning the code, we maintain the components which are still active in + * these two masks + */ + dst_mask = mov->DstReg.WriteMask; + src_mask = get_src_arg_mask(mov, 0, NO_MASK); /* Walk through remaining instructions until the or src reg gets * rewritten or we get into some flow-control, eliminating the use of @@ -464,61 +518,60 @@ _mesa_remove_extra_move_use(struct gl_program *prog) */ for (j = i + 1; j < prog->NumInstructions; j++) { struct prog_instruction *inst2 = prog->Instructions + j; - GLuint arg; + GLuint arg; if (_mesa_is_flow_control_opcode(inst2->Opcode)) break; /* First rewrite this instruction's args if appropriate. */ for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) { - int comp; - int read_mask = get_src_arg_mask(inst2, arg); + GLuint comp, read_mask; if (inst2->SrcReg[arg].File != mov->DstReg.File || inst2->SrcReg[arg].Index != mov->DstReg.Index || inst2->SrcReg[arg].RelAddr || inst2->SrcReg[arg].Abs) continue; + read_mask = get_src_arg_mask(inst2, arg, NO_MASK); - /* Check that all the sources for this arg of inst2 come from inst1 - * or constants. - */ - for (comp = 0; comp < 4; comp++) { - int src_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); - - /* If the MOV didn't write that channel, can't use it. */ - if ((read_mask & (1 << comp)) && - src_swz <= SWIZZLE_W && - (mov->DstReg.WriteMask & (1 << src_swz)) == 0) - break; - } - if (comp != 4) - continue; - - /* Adjust the swizzles of inst2 to point at MOV's source */ - for (comp = 0; comp < 4; comp++) { - int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); - - if (inst2_swz <= SWIZZLE_W) { - GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz); - inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp)); - inst2->SrcReg[arg].Swizzle |= s << (3 * comp); - inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >> - inst2_swz) & 0x1) << comp); - } - } - inst2->SrcReg[arg].File = mov->SrcReg[0].File; - inst2->SrcReg[arg].Index = mov->SrcReg[0].Index; + /* Adjust the swizzles of inst2 to point at MOV's source if ALL the + * components read still come from the mov instructions + */ + if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) && + (read_mask & dst_mask) == read_mask) { + for (comp = 0; comp < 4; comp++) { + const GLuint inst2_swz = + GET_SWZ(inst2->SrcReg[arg].Swizzle, comp); + const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz); + inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp)); + inst2->SrcReg[arg].Swizzle |= s << (3 * comp); + inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >> + inst2_swz) & 0x1) << comp); + } + inst2->SrcReg[arg].File = mov->SrcReg[0].File; + inst2->SrcReg[arg].Index = mov->SrcReg[0].Index; + } } - /* If this instruction overwrote part of the move, our time is up. */ - if ((inst2->DstReg.File == mov->DstReg.File && - (inst2->DstReg.RelAddr || - inst2->DstReg.Index == mov->DstReg.Index)) || - (inst2->DstReg.File == mov->SrcReg[0].File && - (inst2->DstReg.RelAddr || - inst2->DstReg.Index == mov->SrcReg[0].Index))) - break; + /* The source of MOV is written. This potentially deactivates some + * components from the src and dst of the MOV instruction + */ + if (inst2->DstReg.File == mov->DstReg.File && + (inst2->DstReg.RelAddr || + inst2->DstReg.Index == mov->DstReg.Index)) { + dst_mask &= ~inst2->DstReg.WriteMask; + src_mask = get_src_arg_mask(mov, 0, dst_mask); + } + + /* Idem when the destination of mov is written */ + if (inst2->DstReg.File == mov->SrcReg[0].File && + (inst2->DstReg.RelAddr || + inst2->DstReg.Index == mov->SrcReg[0].Index)) { + src_mask &= ~inst2->DstReg.WriteMask; + dst_mask &= get_dst_mask_for_mov(mov, src_mask); + } + if (dst_mask == 0) + break; } } @@ -528,14 +581,151 @@ _mesa_remove_extra_move_use(struct gl_program *prog) } } + +/** + * Complements dead_code_global. Try to remove code in block of code by + * carefully monitoring the swizzles. Both functions should be merged into one + * with a proper control flow graph + */ +static GLboolean +_mesa_remove_dead_code_local(struct gl_program *prog) +{ + GLboolean *removeInst; + GLuint i, arg, rem = 0; + + removeInst = (GLboolean *) + calloc(1, prog->NumInstructions * sizeof(GLboolean)); + + for (i = 0; i < prog->NumInstructions; i++) { + const struct prog_instruction *inst = prog->Instructions + i; + const GLuint index = inst->DstReg.Index; + const GLuint mask = inst->DstReg.WriteMask; + enum inst_use use; + + /* We must deactivate the pass as soon as some indirection is used */ + if (inst->DstReg.RelAddr) + goto done; + for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) + if (inst->SrcReg[arg].RelAddr) + goto done; + + if (_mesa_is_flow_control_opcode(inst->Opcode) || + _mesa_num_inst_dst_regs(inst->Opcode) == 0 || + inst->DstReg.File != PROGRAM_TEMPORARY || + inst->DstReg.RelAddr) + continue; + + use = find_next_use(prog, i+1, index, mask); + if (use == WRITE || use == END) + removeInst[i] = GL_TRUE; + } + + rem = remove_instructions(prog, removeInst); + +done: + free(removeInst); + return rem != 0; +} + + +/** + * Try to inject the destination of mov as the destination of inst and recompute + * the swizzles operators for the sources of inst if required. Return GL_TRUE + * of the substitution was possible, GL_FALSE otherwise + */ +static GLboolean +_mesa_merge_mov_into_inst(struct prog_instruction *inst, + const struct prog_instruction *mov) +{ + /* Indirection table which associates destination and source components for + * the mov instruction + */ + const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK); + + /* Some components are not written by inst. We cannot remove the mov */ + if (mask != (inst->DstReg.WriteMask & mask)) + return GL_FALSE; + + /* Depending on the instruction, we may need to recompute the swizzles. + * Also, some other instructions (like TEX) are not linear. We will only + * consider completely active sources and destinations + */ + switch (inst->Opcode) { + + /* Carstesian instructions: we compute the swizzle */ + case OPCODE_MOV: + case OPCODE_MIN: + case OPCODE_MAX: + case OPCODE_ABS: + case OPCODE_ADD: + case OPCODE_MAD: + case OPCODE_MUL: + case OPCODE_SUB: + { + GLuint dst_to_src_comp[4] = {0,0,0,0}; + GLuint dst_comp, arg; + for (dst_comp = 0; dst_comp < 4; ++dst_comp) { + if (mov->DstReg.WriteMask & (1 << dst_comp)) { + const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp); + ASSERT(src_comp < 4); + dst_to_src_comp[dst_comp] = src_comp; + } + } + + /* Patch each source of the instruction */ + for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) { + const GLuint arg_swz = inst->SrcReg[arg].Swizzle; + inst->SrcReg[arg].Swizzle = 0; + + /* Reset each active component of the swizzle */ + for (dst_comp = 0; dst_comp < 4; ++dst_comp) { + GLuint src_comp, arg_comp; + if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0) + continue; + src_comp = dst_to_src_comp[dst_comp]; + ASSERT(src_comp < 4); + arg_comp = GET_SWZ(arg_swz, src_comp); + ASSERT(arg_comp < 4); + inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp); + } + } + inst->DstReg = mov->DstReg; + return GL_TRUE; + } + + /* Dot products and scalar instructions: we only change the destination */ + case OPCODE_RCP: + case OPCODE_SIN: + case OPCODE_COS: + case OPCODE_RSQ: + case OPCODE_POW: + case OPCODE_EX2: + case OPCODE_LOG: + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + inst->DstReg = mov->DstReg; + return GL_TRUE; + + /* All other instructions require fully active components with no swizzle */ + default: + if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW || + inst->DstReg.WriteMask != WRITEMASK_XYZW) + return GL_FALSE; + inst->DstReg = mov->DstReg; + return GL_TRUE; + } +} + + /** * Try to remove extraneous MOV instructions from the given program. */ -static void +static GLboolean _mesa_remove_extra_moves(struct gl_program *prog) { GLboolean *removeInst; /* per-instruction removal flag */ - GLuint i, rem, loopNesting = 0, subroutineNesting = 0; + GLuint i, rem = 0, nesting = 0; if (dbg) { printf("Optimize: Begin remove extra moves\n"); @@ -554,29 +744,24 @@ _mesa_remove_extra_moves(struct gl_program *prog) */ for (i = 0; i < prog->NumInstructions; i++) { - const struct prog_instruction *inst = prog->Instructions + i; + const struct prog_instruction *mov = prog->Instructions + i; - switch (inst->Opcode) { + switch (mov->Opcode) { case OPCODE_BGNLOOP: - loopNesting++; - break; - case OPCODE_ENDLOOP: - loopNesting--; - break; case OPCODE_BGNSUB: - subroutineNesting++; + case OPCODE_IF: + nesting++; break; + case OPCODE_ENDLOOP: case OPCODE_ENDSUB: - subroutineNesting--; + case OPCODE_ENDIF: + nesting--; break; case OPCODE_MOV: - if (i > 0 && - loopNesting == 0 && - subroutineNesting == 0 && - inst->SrcReg[0].File == PROGRAM_TEMPORARY && - inst->SrcReg[0].Swizzle == SWIZZLE_XYZW) { + if (i > 0 && can_downward_mov_be_modifed(mov) && nesting == 0) { + /* see if this MOV can be removed */ - const GLuint tempIndex = inst->SrcReg[0].Index; + const GLuint id = mov->SrcReg[0].Index; struct prog_instruction *prevInst; GLuint prevI; @@ -587,11 +772,13 @@ _mesa_remove_extra_moves(struct gl_program *prog) prevInst = prog->Instructions + prevI; if (prevInst->DstReg.File == PROGRAM_TEMPORARY && - prevInst->DstReg.Index == tempIndex && - prevInst->DstReg.WriteMask == WRITEMASK_XYZW) { + prevInst->DstReg.Index == id && + prevInst->DstReg.RelAddr == 0 && + prevInst->DstReg.CondSrc == 0 && + prevInst->DstReg.CondMask == COND_TR) { - enum temp_use next_use = - find_next_temp_use(prog, i + 1, tempIndex); + const GLuint dst_mask = prevInst->DstReg.WriteMask; + enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask); if (next_use == WRITE || next_use == END) { /* OK, we can safely remove this MOV instruction. @@ -601,18 +788,13 @@ _mesa_remove_extra_moves(struct gl_program *prog) * Into: * prevI: FOO z, x, y; */ - - /* patch up prev inst */ - prevInst->DstReg.File = inst->DstReg.File; - prevInst->DstReg.Index = inst->DstReg.Index; - - /* flag this instruction for removal */ - removeInst[i] = GL_TRUE; - - if (dbg) { - printf("Remove MOV at %u\n", i); - printf("new prev inst %u: ", prevI); - _mesa_print_instruction(prevInst); + if (_mesa_merge_mov_into_inst(prevInst, mov)) { + removeInst[i] = GL_TRUE; + if (dbg) { + printf("Remove MOV at %u\n", i); + printf("new prev inst %u: ", prevI); + _mesa_print_instruction(prevInst); + } } } } @@ -632,6 +814,8 @@ _mesa_remove_extra_moves(struct gl_program *prog) printf("Optimize: End remove extra moves. %u instructions removed\n", rem); /*_mesa_print_program(prog);*/ } + + return rem != 0; } @@ -718,6 +902,7 @@ compare_start(const void *a, const void *b) return 0; } + /** sort the interval list according to interval starts */ static void sort_interval_list_by_start(struct interval_list *list) @@ -1025,6 +1210,17 @@ _mesa_reallocate_registers(struct gl_program *prog) } +#if 0 +static void +print_it(GLcontext *ctx, struct gl_program *program, const char *txt) { + fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions); + _mesa_print_program(program); + _mesa_print_program_parameters(ctx, program); + fprintf(stderr, "\n\n"); +} +#endif + + /** * Apply optimizations to the given program to eliminate unnecessary * instructions, temp regs, etc. @@ -1032,16 +1228,19 @@ _mesa_reallocate_registers(struct gl_program *prog) void _mesa_optimize_program(GLcontext *ctx, struct gl_program *program) { - _mesa_remove_extra_move_use(program); - - if (1) - _mesa_remove_dead_code(program); - - if (0) /* not tested much yet */ - _mesa_remove_extra_moves(program); - - if (0) - _mesa_consolidate_registers(program); - else + GLboolean any_change; + + /* Stop when no modifications were output */ + do { + any_change = GL_FALSE; + _mesa_remove_extra_move_use(program); + if (_mesa_remove_dead_code_global(program)) + any_change = GL_TRUE; + if (_mesa_remove_extra_moves(program)) + any_change = GL_TRUE; + if (_mesa_remove_dead_code_local(program)) + any_change = GL_TRUE; _mesa_reallocate_registers(program); + } while (any_change); } + diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h index 43894a2723..06cd9cb2c2 100644 --- a/src/mesa/program/prog_optimize.h +++ b/src/mesa/program/prog_optimize.h @@ -27,6 +27,7 @@ #include "main/config.h" +#include "main/mtypes.h" struct gl_program; diff --git a/src/mesa/program/prog_print.h b/src/mesa/program/prog_print.h index 4ffd5ab96c..78b90aeb4d 100644 --- a/src/mesa/program/prog_print.h +++ b/src/mesa/program/prog_print.h @@ -26,6 +26,16 @@ #ifndef PROG_PRINT_H #define PROG_PRINT_H +#include <stdio.h> + +#include "main/glheader.h" +#include "main/mtypes.h" + +struct gl_program; +struct gl_program_parameter_list; +struct gl_shader; +struct prog_instruction; + /** * The output style to use when printing programs. diff --git a/src/mesa/program/prog_uniform.h b/src/mesa/program/prog_uniform.h index a671d30bfe..7988d534a7 100644 --- a/src/mesa/program/prog_uniform.h +++ b/src/mesa/program/prog_uniform.h @@ -31,8 +31,7 @@ #ifndef PROG_UNIFORM_H #define PROG_UNIFORM_H -#include "main/mtypes.h" -#include "prog_statevars.h" +#include "main/glheader.h" /** diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c index d99584d63b..3b6d682744 100644 --- a/src/mesa/program/program.c +++ b/src/mesa/program/program.c @@ -55,13 +55,21 @@ _mesa_init_program(GLcontext *ctx) /* * If this assertion fails, we need to increase the field - * size for register indexes. + * size for register indexes (see INST_INDEX_BITS). */ ASSERT(ctx->Const.VertexProgram.MaxUniformComponents / 4 <= (1 << INST_INDEX_BITS)); ASSERT(ctx->Const.FragmentProgram.MaxUniformComponents / 4 <= (1 << INST_INDEX_BITS)); + ASSERT(ctx->Const.VertexProgram.MaxTemps <= (1 << INST_INDEX_BITS)); + ASSERT(ctx->Const.VertexProgram.MaxLocalParams <= (1 << INST_INDEX_BITS)); + ASSERT(ctx->Const.FragmentProgram.MaxTemps <= (1 << INST_INDEX_BITS)); + ASSERT(ctx->Const.FragmentProgram.MaxLocalParams <= (1 << INST_INDEX_BITS)); + + ASSERT(ctx->Const.VertexProgram.MaxUniformComponents <= 4 * MAX_UNIFORMS); + ASSERT(ctx->Const.FragmentProgram.MaxUniformComponents <= 4 * MAX_UNIFORMS); + /* If this fails, increase prog_instruction::TexSrcUnit size */ ASSERT(MAX_TEXTURE_UNITS < (1 << 5)); diff --git a/src/mesa/program/programopt.h b/src/mesa/program/programopt.h index 21fac07849..4af6357f97 100644 --- a/src/mesa/program/programopt.h +++ b/src/mesa/program/programopt.h @@ -26,6 +26,7 @@ #ifndef PROGRAMOPT_H #define PROGRAMOPT_H 1 +#include "main/mtypes.h" extern void _mesa_insert_mvp_code(GLcontext *ctx, struct gl_vertex_program *vprog); diff --git a/src/mesa/slang/slang_builtin.c b/src/mesa/slang/slang_builtin.c index a7e0efcb7b..179571fab4 100644 --- a/src/mesa/slang/slang_builtin.c +++ b/src/mesa/slang/slang_builtin.c @@ -35,8 +35,10 @@ #include "program/prog_instruction.h" #include "program/prog_parameter.h" #include "program/prog_statevars.h" -#include "slang/slang_ir.h" #include "slang/slang_builtin.h" +#include "slang/slang_compile_struct.h" +#include "slang/slang_ir.h" +#include "slang/slang_typeinfo.h" /** special state token (see below) */ diff --git a/src/mesa/slang/slang_builtin.h b/src/mesa/slang/slang_builtin.h index ed9ae80b3c..dc92f83f8e 100644 --- a/src/mesa/slang/slang_builtin.h +++ b/src/mesa/slang/slang_builtin.h @@ -26,8 +26,8 @@ #ifndef SLANG_BUILTIN_H #define SLANG_BUILTIN_H -#include "program/prog_parameter.h" -#include "slang_utility.h" +#include "main/glheader.h" +#include "main/mtypes.h" #include "slang_ir.h" diff --git a/src/mesa/slang/slang_codegen.h b/src/mesa/slang/slang_codegen.h index 461633fe34..376a8cc264 100644 --- a/src/mesa/slang/slang_codegen.h +++ b/src/mesa/slang/slang_codegen.h @@ -27,9 +27,14 @@ #define SLANG_CODEGEN_H -#include "main/imports.h" +#include "main/glheader.h" #include "slang_compile.h" +#include "slang_compile_variable.h" +#include "slang_typeinfo.h" +#include "slang_utility.h" +#include "slang_vartable.h" +struct slang_function_; #define MAX_LOOP_DEPTH 30 diff --git a/src/mesa/slang/slang_compile.c b/src/mesa/slang/slang_compile.c index 12ab4666ae..de1bb56cd9 100644 --- a/src/mesa/slang/slang_compile.c +++ b/src/mesa/slang/slang_compile.c @@ -36,6 +36,7 @@ #include "program/prog_print.h" #include "program/prog_parameter.h" #include "../../glsl/pp/sl_pp_public.h" +#include "../../glsl/pp/sl_pp_purify.h" #include "../../glsl/cl/sl_cl_parse.h" #include "slang_codegen.h" #include "slang_compile.h" diff --git a/src/mesa/slang/slang_compile.h b/src/mesa/slang/slang_compile.h index 71fcaa3993..6061f878e7 100644 --- a/src/mesa/slang/slang_compile.h +++ b/src/mesa/slang/slang_compile.h @@ -25,13 +25,14 @@ #if !defined SLANG_COMPILE_H #define SLANG_COMPILE_H -#include "main/imports.h" +#include "main/glheader.h" #include "main/mtypes.h" -#include "slang_typeinfo.h" -#include "slang_compile_variable.h" -#include "slang_compile_struct.h" -#include "slang_compile_operation.h" #include "slang_compile_function.h" +#include "slang_compile_struct.h" +#include "slang_compile_variable.h" +#include "slang_utility.h" + +struct slang_code_object_; #if defined __cplusplus extern "C" { diff --git a/src/mesa/slang/slang_compile_function.h b/src/mesa/slang/slang_compile_function.h index a5445ec253..0eced3ca1a 100644 --- a/src/mesa/slang/slang_compile_function.h +++ b/src/mesa/slang/slang_compile_function.h @@ -25,6 +25,14 @@ #ifndef SLANG_COMPILE_FUNCTION_H #define SLANG_COMPILE_FUNCTION_H +#include "main/glheader.h" +#include "slang_compile_operation.h" +#include "slang_compile_variable.h" +#include "slang_log.h" +#include "slang_utility.h" + +struct slang_name_space_; +struct slang_operation_; /** * Types of functions. diff --git a/src/mesa/slang/slang_compile_operation.h b/src/mesa/slang/slang_compile_operation.h index 1f15c19896..b32573e022 100644 --- a/src/mesa/slang/slang_compile_operation.h +++ b/src/mesa/slang/slang_compile_operation.h @@ -26,6 +26,11 @@ #define SLANG_COMPILE_OPERATION_H +#include "main/compiler.h" +#include "main/glheader.h" +#include "slang_compile_variable.h" +#include "slang_utility.h" + /** * Types of slang operations. * These are the types of the AST (abstract syntax tree) nodes. diff --git a/src/mesa/slang/slang_compile_struct.h b/src/mesa/slang/slang_compile_struct.h index 90c5512f4d..7be6f204e1 100644 --- a/src/mesa/slang/slang_compile_struct.h +++ b/src/mesa/slang/slang_compile_struct.h @@ -29,6 +29,9 @@ extern "C" { #endif +#include "main/glheader.h" +#include "slang_utility.h" + struct slang_function_; typedef struct slang_struct_scope_ diff --git a/src/mesa/slang/slang_compile_variable.h b/src/mesa/slang/slang_compile_variable.h index 5c9d248b35..48dc6efca4 100644 --- a/src/mesa/slang/slang_compile_variable.h +++ b/src/mesa/slang/slang_compile_variable.h @@ -26,7 +26,9 @@ #define SLANG_COMPILE_VARIABLE_H -struct slang_ir_storage_; +#include "main/glheader.h" +#include "slang_typeinfo.h" +#include "slang_utility.h" /** diff --git a/src/mesa/slang/slang_emit.h b/src/mesa/slang/slang_emit.h index ab4c202d67..f93d6b00d6 100644 --- a/src/mesa/slang/slang_emit.h +++ b/src/mesa/slang/slang_emit.h @@ -25,11 +25,9 @@ #ifndef SLANG_EMIT_H #define SLANG_EMIT_H - -#include "main/imports.h" -#include "slang_compile.h" +#include "main/glheader.h" #include "slang_ir.h" -#include "main/mtypes.h" +#include "slang_vartable.h" extern GLuint diff --git a/src/mesa/slang/slang_ir.h b/src/mesa/slang/slang_ir.h index b7a373746b..a010efcb34 100644 --- a/src/mesa/slang/slang_ir.h +++ b/src/mesa/slang/slang_ir.h @@ -33,10 +33,11 @@ #define SLANG_IR_H -#include "main/imports.h" -#include "slang_compile.h" -#include "slang_label.h" +#include "main/glheader.h" #include "main/mtypes.h" +#include "program/prog_instruction.h" +#include "slang_compile_variable.h" +#include "slang_label.h" /** diff --git a/src/mesa/slang/slang_label.c b/src/mesa/slang/slang_label.c index 8e3a8ebc1a..a161139800 100644 --- a/src/mesa/slang/slang_label.c +++ b/src/mesa/slang/slang_label.c @@ -7,6 +7,9 @@ */ +#include "main/imports.h" +#include "main/mtypes.h" +#include "program/prog_instruction.h" #include "slang_label.h" #include "slang_mem.h" diff --git a/src/mesa/slang/slang_label.h b/src/mesa/slang/slang_label.h index 4d04df18d2..b0cff3a8e8 100644 --- a/src/mesa/slang/slang_label.h +++ b/src/mesa/slang/slang_label.h @@ -1,10 +1,9 @@ #ifndef SLANG_LABEL_H #define SLANG_LABEL_H 1 -#include "main/imports.h" -#include "main/mtypes.h" -#include "program/prog_instruction.h" +#include "main/glheader.h" +struct gl_program; struct slang_label_ { diff --git a/src/mesa/slang/slang_link.c b/src/mesa/slang/slang_link.c index 94db6f918d..28ad0b74e8 100644 --- a/src/mesa/slang/slang_link.c +++ b/src/mesa/slang/slang_link.c @@ -42,6 +42,7 @@ #include "program/prog_statevars.h" #include "program/prog_uniform.h" #include "slang_builtin.h" +#include "slang_compile.h" #include "slang_link.h" @@ -1206,11 +1207,11 @@ _slang_link(GLcontext *ctx, vertNotify = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB, &shProg->FragmentProgram->Base); if (ctx->Shader.Flags & GLSL_DUMP) { - printf("Mesa pre-link fragment program:\n"); + fprintf(stderr, "Mesa pre-link fragment program:\n"); _mesa_print_program(&fragProg->Base); _mesa_print_program_parameters(ctx, &fragProg->Base); - printf("Mesa post-link fragment program:\n"); + fprintf(stderr, "Mesa post-link fragment program:\n"); _mesa_print_program(&shProg->FragmentProgram->Base); _mesa_print_program_parameters(ctx, &shProg->FragmentProgram->Base); } @@ -1229,11 +1230,11 @@ _slang_link(GLcontext *ctx, geomNotify = ctx->Driver.ProgramStringNotify(ctx, MESA_GEOMETRY_PROGRAM, &shProg->GeometryProgram->Base); if (ctx->Shader.Flags & GLSL_DUMP) { - printf("Mesa pre-link geometry program:\n"); + fprintf(stderr, "Mesa pre-link geometry program:\n"); _mesa_print_program(&geomProg->Base); _mesa_print_program_parameters(ctx, &geomProg->Base); - printf("Mesa post-link geometry program:\n"); + fprintf(stderr, "Mesa post-link geometry program:\n"); _mesa_print_program(&shProg->GeometryProgram->Base); _mesa_print_program_parameters(ctx, &shProg->GeometryProgram->Base); } @@ -1247,11 +1248,11 @@ _slang_link(GLcontext *ctx, fragNotify = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, &shProg->VertexProgram->Base); if (ctx->Shader.Flags & GLSL_DUMP) { - printf("Mesa pre-link vertex program:\n"); + fprintf(stderr, "Mesa pre-link vertex program:\n"); _mesa_print_program(&vertProg->Base); _mesa_print_program_parameters(ctx, &vertProg->Base); - printf("Mesa post-link vertex program:\n"); + fprintf(stderr, "Mesa post-link vertex program:\n"); _mesa_print_program(&shProg->VertexProgram->Base); _mesa_print_program_parameters(ctx, &shProg->VertexProgram->Base); } @@ -1266,10 +1267,10 @@ _slang_link(GLcontext *ctx, } if (ctx->Shader.Flags & GLSL_DUMP) { - printf("Varying vars:\n"); + fprintf(stderr, "Varying vars:\n"); _mesa_print_parameter_list(shProg->Varying); if (shProg->InfoLog) { - printf("Info Log: %s\n", shProg->InfoLog); + fprintf(stderr, "Info Log: %s\n", shProg->InfoLog); } } diff --git a/src/mesa/slang/slang_link.h b/src/mesa/slang/slang_link.h index 2b44d20787..3e9fa2d743 100644 --- a/src/mesa/slang/slang_link.h +++ b/src/mesa/slang/slang_link.h @@ -25,7 +25,7 @@ #ifndef SLANG_LINK_H #define SLANG_LINK_H 1 -#include "slang_compile.h" +#include "main/mtypes.h" extern void diff --git a/src/mesa/slang/slang_log.h b/src/mesa/slang/slang_log.h index dcaba0285a..544a26654e 100644 --- a/src/mesa/slang/slang_log.h +++ b/src/mesa/slang/slang_log.h @@ -27,6 +27,8 @@ #define SLANG_LOG_H +#include "main/glheader.h" + typedef struct slang_info_log_ { char *text; diff --git a/src/mesa/slang/slang_mem.h b/src/mesa/slang/slang_mem.h index b5bfae2479..0f06df3c0c 100644 --- a/src/mesa/slang/slang_mem.h +++ b/src/mesa/slang/slang_mem.h @@ -27,7 +27,7 @@ #define SLANG_MEM_H -#include "main/imports.h" +#include "main/glheader.h" typedef struct slang_mempool_ slang_mempool; diff --git a/src/mesa/slang/slang_print.h b/src/mesa/slang/slang_print.h index 46605c8061..99da304143 100644 --- a/src/mesa/slang/slang_print.h +++ b/src/mesa/slang/slang_print.h @@ -3,6 +3,12 @@ #ifndef SLANG_PRINT #define SLANG_PRINT +#include "main/glheader.h" +#include "slang_compile_function.h" +#include "slang_compile_operation.h" +#include "slang_compile_variable.h" +#include "slang_typeinfo.h" + extern void slang_print_function(const slang_function *f, GLboolean body); diff --git a/src/mesa/slang/slang_simplify.h b/src/mesa/slang/slang_simplify.h index 8689c23b1a..37fb938d4f 100644 --- a/src/mesa/slang/slang_simplify.h +++ b/src/mesa/slang/slang_simplify.h @@ -26,6 +26,13 @@ #define SLANG_SIMPLIFY_H +#include "main/glheader.h" +#include "slang_compile.h" +#include "slang_compile_function.h" +#include "slang_compile_operation.h" +#include "slang_log.h" +#include "slang_utility.h" + extern GLint _slang_lookup_constant(const char *name); diff --git a/src/mesa/slang/slang_storage.h b/src/mesa/slang/slang_storage.h index 1876a36dd6..de1f1841a3 100644 --- a/src/mesa/slang/slang_storage.h +++ b/src/mesa/slang/slang_storage.h @@ -25,7 +25,12 @@ #ifndef SLANG_STORAGE_H #define SLANG_STORAGE_H -#include "slang_compile.h" +#include "main/glheader.h" +#include "slang_compile_function.h" +#include "slang_compile_struct.h" +#include "slang_compile_variable.h" +#include "slang_typeinfo.h" +#include "slang_utility.h" /* diff --git a/src/mesa/slang/slang_typeinfo.h b/src/mesa/slang/slang_typeinfo.h index 2251b06325..5ddfe9612c 100644 --- a/src/mesa/slang/slang_typeinfo.h +++ b/src/mesa/slang/slang_typeinfo.h @@ -25,11 +25,9 @@ #ifndef SLANG_TYPEINFO_H #define SLANG_TYPEINFO_H 1 -#include "main/imports.h" -#include "main/mtypes.h" +#include "main/glheader.h" #include "slang_log.h" #include "slang_utility.h" -#include "slang_vartable.h" struct slang_operation_; diff --git a/src/mesa/slang/slang_utility.h b/src/mesa/slang/slang_utility.h index 2c0d0bcbb2..cb9b6d2aaa 100644 --- a/src/mesa/slang/slang_utility.h +++ b/src/mesa/slang/slang_utility.h @@ -26,6 +26,8 @@ #define SLANG_UTILITY_H +#include "main/glheader.h" + /* Compile-time assertions. If the expression is zero, try to declare an * array of size [-1] to cause compilation error. */ diff --git a/src/mesa/slang/slang_vartable.h b/src/mesa/slang/slang_vartable.h index 94bcd63f45..97945b89d0 100644 --- a/src/mesa/slang/slang_vartable.h +++ b/src/mesa/slang/slang_vartable.h @@ -2,6 +2,9 @@ #ifndef SLANG_VARTABLE_H #define SLANG_VARTABLE_H +#include "main/glheader.h" +#include "slang_utility.h" + struct slang_ir_storage_; typedef struct slang_var_table_ slang_var_table; diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index cebaad5f00..05442ef91b 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -40,7 +40,6 @@ #include "program/program.h" #include "pipe/p_context.h" -#include "pipe/p_shader_tokens.h" #include "util/u_simple_shaders.h" diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 13119ce203..86bb788903 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -543,6 +543,7 @@ st_ReadBuffer(GLcontext *ctx, GLenum buffer) void st_init_fbo_functions(struct dd_function_table *functions) { +#if FEATURE_EXT_framebuffer_object functions->NewFramebuffer = st_new_framebuffer; functions->NewRenderbuffer = st_new_renderbuffer; functions->BindFramebuffer = st_bind_framebuffer; @@ -550,6 +551,7 @@ void st_init_fbo_functions(struct dd_function_table *functions) functions->RenderTexture = st_render_texture; functions->FinishRenderTexture = st_finish_render_texture; functions->ValidateFramebuffer = st_validate_framebuffer; +#endif /* no longer needed by core Mesa, drivers handle resizes... functions->ResizeBuffers = st_resize_buffers; */ diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 3eb4708688..2ce5f08753 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -63,7 +63,7 @@ #include "cso_cache/cso_context.h" -DEBUG_GET_ONCE_BOOL_OPTION(mesa_mvp_dp4, "MESA_MVP_DP4", FALSE); +DEBUG_GET_ONCE_BOOL_OPTION(mesa_mvp_dp4, "MESA_MVP_DP4", FALSE) /** diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index 1260fe04b1..df32491d04 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -56,7 +56,7 @@ static const struct debug_named_value st_debug_flags[] = { DEBUG_NAMED_VALUE_END }; -DEBUG_GET_ONCE_FLAGS_OPTION(st_debug, "ST_DEBUG", st_debug_flags, 0); +DEBUG_GET_ONCE_FLAGS_OPTION(st_debug, "ST_DEBUG", st_debug_flags, 0) #endif diff --git a/src/mesa/swrast/s_alpha.h b/src/mesa/swrast/s_alpha.h index 7a5b72e650..239484a974 100644 --- a/src/mesa/swrast/s_alpha.h +++ b/src/mesa/swrast/s_alpha.h @@ -28,7 +28,8 @@ #define S_ALPHA_H -#include "s_context.h" +#include "main/mtypes.h" +#include "s_span.h" extern GLint diff --git a/src/mesa/swrast/s_atifragshader.c b/src/mesa/swrast/s_atifragshader.c index 336415d57a..1338b6802d 100644 --- a/src/mesa/swrast/s_atifragshader.c +++ b/src/mesa/swrast/s_atifragshader.c @@ -24,6 +24,7 @@ #include "main/macros.h" #include "main/atifragshader.h" #include "swrast/s_atifragshader.h" +#include "swrast/s_context.h" /** diff --git a/src/mesa/swrast/s_atifragshader.h b/src/mesa/swrast/s_atifragshader.h index 871a0c0455..cce455a046 100644 --- a/src/mesa/swrast/s_atifragshader.h +++ b/src/mesa/swrast/s_atifragshader.h @@ -27,7 +27,8 @@ #define S_ATIFRAGSHADER_H -#include "s_context.h" +#include "main/mtypes.h" +#include "s_span.h" extern void diff --git a/src/mesa/swrast/s_blend.h b/src/mesa/swrast/s_blend.h index 8d5a81635d..9cedde3bf2 100644 --- a/src/mesa/swrast/s_blend.h +++ b/src/mesa/swrast/s_blend.h @@ -27,7 +27,8 @@ #define S_BLEND_H -#include "s_context.h" +#include "main/mtypes.h" +#include "s_span.h" extern void diff --git a/src/mesa/swrast/s_context.h b/src/mesa/swrast/s_context.h index c9755e6da1..6d81f74768 100644 --- a/src/mesa/swrast/s_context.h +++ b/src/mesa/swrast/s_context.h @@ -43,6 +43,7 @@ #ifndef S_CONTEXT_H #define S_CONTEXT_H +#include "main/compiler.h" #include "main/mtypes.h" #include "program/prog_execute.h" #include "swrast.h" diff --git a/src/mesa/swrast/s_depth.c b/src/mesa/swrast/s_depth.c index ed637cac12..f952fd6baa 100644 --- a/src/mesa/swrast/s_depth.c +++ b/src/mesa/swrast/s_depth.c @@ -30,7 +30,6 @@ #include "main/imports.h" #include "s_depth.h" -#include "s_context.h" #include "s_span.h" diff --git a/src/mesa/swrast/s_depth.h b/src/mesa/swrast/s_depth.h index 7eae366742..878d242f5e 100644 --- a/src/mesa/swrast/s_depth.h +++ b/src/mesa/swrast/s_depth.h @@ -27,7 +27,8 @@ #define S_DEPTH_H -#include "s_context.h" +#include "main/mtypes.h" +#include "s_span.h" extern GLuint diff --git a/src/mesa/swrast/s_fog.h b/src/mesa/swrast/s_fog.h index 06107de3f9..a496746d10 100644 --- a/src/mesa/swrast/s_fog.h +++ b/src/mesa/swrast/s_fog.h @@ -28,7 +28,8 @@ #define S_FOG_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_span.h" extern GLfloat diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c index a2c2a10c3d..9facb44d9b 100644 --- a/src/mesa/swrast/s_fragprog.c +++ b/src/mesa/swrast/s_fragprog.c @@ -26,6 +26,7 @@ #include "main/colormac.h" #include "program/prog_instruction.h" +#include "s_context.h" #include "s_fragprog.h" #include "s_span.h" diff --git a/src/mesa/swrast/s_fragprog.h b/src/mesa/swrast/s_fragprog.h index e1b7e67918..92b9d01e17 100644 --- a/src/mesa/swrast/s_fragprog.h +++ b/src/mesa/swrast/s_fragprog.h @@ -27,7 +27,8 @@ #define S_FRAGPROG_H -#include "s_context.h" +#include "main/mtypes.h" +#include "s_span.h" extern void diff --git a/src/mesa/swrast/s_logic.h b/src/mesa/swrast/s_logic.h index e8cfae33f2..d609513348 100644 --- a/src/mesa/swrast/s_logic.h +++ b/src/mesa/swrast/s_logic.h @@ -27,7 +27,8 @@ #define S_LOGIC_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_span.h" extern void _swrast_logicop_rgba_span(GLcontext *ctx, struct gl_renderbuffer *rb, diff --git a/src/mesa/swrast/s_masking.h b/src/mesa/swrast/s_masking.h index 3ba4f8356c..cb000da0fd 100644 --- a/src/mesa/swrast/s_masking.h +++ b/src/mesa/swrast/s_masking.h @@ -27,7 +27,8 @@ #define S_MASKING_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_span.h" extern void diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c index 3b3929a41b..8931cdec1b 100644 --- a/src/mesa/swrast/s_span.c +++ b/src/mesa/swrast/s_span.c @@ -970,6 +970,10 @@ shade_texture_span(GLcontext *ctx, SWspan *span) if (span->primitive == GL_BITMAP && span->array->ChanType != GL_FLOAT) { convert_color_type(span, GL_FLOAT, 0); } + else { + span->array->rgba = (void *) span->array->attribs[FRAG_ATTRIB_COL0]; + } + if (span->primitive != GL_POINT || (span->interpMask & SPAN_RGBA) || ctx->Point.PointSprite) { @@ -1221,9 +1225,22 @@ _swrast_write_rgba_span( GLcontext *ctx, SWspan *span) GLchan rgbaSave[MAX_WIDTH][4]; const GLuint fragOutput = multiFragOutputs ? buf : 0; + /* set span->array->rgba to colors for render buffer's datatype */ if (rb->DataType != span->array->ChanType || fragOutput > 0) { convert_color_type(span, rb->DataType, fragOutput); } + else { + if (rb->DataType == GL_UNSIGNED_BYTE) { + span->array->rgba = span->array->rgba8; + } + else if (rb->DataType == GL_UNSIGNED_SHORT) { + span->array->rgba = (void *) span->array->rgba16; + } + else { + span->array->rgba = (void *) + span->array->attribs[FRAG_ATTRIB_COL0]; + } + } if (!multiFragOutputs && numBuffers > 1) { /* save colors for second, third renderbuffer writes */ diff --git a/src/mesa/swrast/s_stencil.h b/src/mesa/swrast/s_stencil.h index cd6cbc57b0..c076ebbe2a 100644 --- a/src/mesa/swrast/s_stencil.h +++ b/src/mesa/swrast/s_stencil.h @@ -27,7 +27,8 @@ #define S_STENCIL_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_span.h" diff --git a/src/mesa/swrast/s_texcombine.h b/src/mesa/swrast/s_texcombine.h index 9ed96efb87..4f5dfbe1af 100644 --- a/src/mesa/swrast/s_texcombine.h +++ b/src/mesa/swrast/s_texcombine.h @@ -27,7 +27,8 @@ #define S_TEXCOMBINE_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_span.h" extern void _swrast_texture_span( GLcontext *ctx, SWspan *span ); diff --git a/src/mesa/swrast/s_texfilter.h b/src/mesa/swrast/s_texfilter.h index 2e265d685c..eceab59658 100644 --- a/src/mesa/swrast/s_texfilter.h +++ b/src/mesa/swrast/s_texfilter.h @@ -27,7 +27,8 @@ #define S_TEXFILTER_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_context.h" extern texture_sample_func diff --git a/src/mesa/swrast/s_zoom.h b/src/mesa/swrast/s_zoom.h index 43917be65f..09f624efad 100644 --- a/src/mesa/swrast/s_zoom.h +++ b/src/mesa/swrast/s_zoom.h @@ -25,7 +25,8 @@ #ifndef S_ZOOM_H #define S_ZOOM_H -#include "swrast.h" +#include "main/mtypes.h" +#include "s_span.h" extern void diff --git a/src/mesa/swrast_setup/ss_context.h b/src/mesa/swrast_setup/ss_context.h index 1ec293fade..56551ab273 100644 --- a/src/mesa/swrast_setup/ss_context.h +++ b/src/mesa/swrast_setup/ss_context.h @@ -28,9 +28,8 @@ #ifndef SS_CONTEXT_H #define SS_CONTEXT_H -#include "main/mtypes.h" +#include "main/glheader.h" #include "swrast/swrast.h" -#include "swrast_setup.h" #include "tnl/t_context.h" typedef struct { diff --git a/src/mesa/swrast_setup/ss_triangle.h b/src/mesa/swrast_setup/ss_triangle.h index 007fa2e914..ac553cbd01 100644 --- a/src/mesa/swrast_setup/ss_triangle.h +++ b/src/mesa/swrast_setup/ss_triangle.h @@ -29,7 +29,7 @@ #ifndef SS_TRIANGLE_H #define SS_TRIANGLE_H -#include "ss_context.h" +#include "main/mtypes.h" void _swsetup_trifuncs_init( GLcontext *ctx ); diff --git a/src/mesa/swrast_setup/ss_vb.h b/src/mesa/swrast_setup/ss_vb.h index 2ad1f56f39..944a3b78d8 100644 --- a/src/mesa/swrast_setup/ss_vb.h +++ b/src/mesa/swrast_setup/ss_vb.h @@ -30,7 +30,6 @@ #define SS_VB_H #include "main/mtypes.h" -#include "swrast_setup.h" void _swsetup_vb_init( GLcontext *ctx ); void _swsetup_choose_rastersetup_func( GLcontext *ctx ); diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h index ebaae6335b..258906f795 100644 --- a/src/mesa/tnl/t_context.h +++ b/src/mesa/tnl/t_context.h @@ -53,9 +53,7 @@ #include "main/bitset.h" #include "main/mtypes.h" -#include "math/m_matrix.h" #include "math/m_vector.h" -#include "math/m_xform.h" #include "vbo/vbo.h" diff --git a/src/mesa/tnl/t_vb_program.c b/src/mesa/tnl/t_vb_program.c index 3973df9a67..f3a338ef1e 100644 --- a/src/mesa/tnl/t_vb_program.c +++ b/src/mesa/tnl/t_vb_program.c @@ -35,6 +35,7 @@ #include "main/colormac.h" #include "main/macros.h" #include "main/imports.h" +#include "math/m_xform.h" #include "program/prog_instruction.h" #include "program/prog_statevars.h" #include "program/prog_execute.h" diff --git a/src/mesa/tnl/t_vb_render.c b/src/mesa/tnl/t_vb_render.c index c1bebc9942..7d991009a1 100644 --- a/src/mesa/tnl/t_vb_render.c +++ b/src/mesa/tnl/t_vb_render.c @@ -44,6 +44,7 @@ #include "main/macros.h" #include "main/imports.h" #include "main/mtypes.h" +#include "math/m_xform.h" #include "t_pipeline.h" diff --git a/src/mesa/vf/vf.h b/src/mesa/vf/vf.h index 83d7547619..5fe392bbe5 100644 --- a/src/mesa/vf/vf.h +++ b/src/mesa/vf/vf.h @@ -28,7 +28,7 @@ #ifndef VF_VERTEX_H #define VF_VERTEX_H -#include "main/mtypes.h" +#include "main/glheader.h" #include "math/m_vector.h" enum { diff --git a/src/mesa/x86/3dnow.h b/src/mesa/x86/3dnow.h index df9f2638d7..1c1fedcd4f 100644 --- a/src/mesa/x86/3dnow.h +++ b/src/mesa/x86/3dnow.h @@ -31,8 +31,6 @@ #ifndef __3DNOW_H__ #define __3DNOW_H__ -#include "math/m_xform.h" - void _mesa_init_3dnow_transform_asm( void ); #endif diff --git a/src/mesa/x86/mmx.h b/src/mesa/x86/mmx.h index 5641936bdb..47a0d4b54d 100644 --- a/src/mesa/x86/mmx.h +++ b/src/mesa/x86/mmx.h @@ -26,6 +26,9 @@ #ifndef ASM_MMX_H #define ASM_MMX_H +#include "main/compiler.h" +#include "main/mtypes.h" + extern void _ASMAPI _mesa_mmx_blend_transparency( GLcontext *ctx, GLuint n, const GLubyte mask[], GLvoid *rgba, const GLvoid *dest, diff --git a/src/mesa/x86/sse.h b/src/mesa/x86/sse.h index 521f91e411..e92ddc1394 100644 --- a/src/mesa/x86/sse.h +++ b/src/mesa/x86/sse.h @@ -31,8 +31,6 @@ #ifndef __SSE_H__ #define __SSE_H__ -#include "math/m_xform.h" - void _mesa_init_sse_transform_asm( void ); #endif |