summaryrefslogtreecommitdiff
path: root/src/mesa
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa')
-rw-r--r--src/mesa/SConscript1
-rw-r--r--src/mesa/drivers/dri/i810/i810render.c2
-rw-r--r--src/mesa/drivers/dri/i915/intel_render.c2
-rw-r--r--src/mesa/drivers/dri/mga/mgarender.c2
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_driver.c2
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_fbo.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c263
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c26
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c20
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h19
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c43
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c29
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c21
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c38
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c117
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c8
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c2
-rw-r--r--src/mesa/drivers/dri/r300/r300_reg.h21
-rw-r--r--src/mesa/drivers/dri/r300/r300_texstate.c4
-rw-r--r--src/mesa/drivers/dri/r600/r600_blit.c26
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c4
-rw-r--r--src/mesa/drivers/dri/r600/r600_tex.c2
-rw-r--r--src/mesa/drivers/dri/r600/r600_texstate.c10
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.c20
-rw-r--r--src/mesa/drivers/dri/r600/r700_render.c38
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_chipset.h2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_fbo.c4
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_swtcl.c3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_texture.c2
-rw-r--r--src/mesa/drivers/dri/savage/savagerender.c2
-rw-r--r--src/mesa/drivers/dri/unichrome/via_render.c2
-rw-r--r--src/mesa/main/config.h2
-rw-r--r--src/mesa/main/context.c15
-rw-r--r--src/mesa/main/imports.c2
-rw-r--r--src/mesa/main/mtypes.h18
-rw-r--r--src/mesa/main/querymatrix.c6
-rw-r--r--src/mesa/program/hash_table.h2
-rw-r--r--src/mesa/program/nvfragparse.h1
-rw-r--r--src/mesa/program/nvvertparse.h1
-rw-r--r--src/mesa/program/prog_cache.h3
-rw-r--r--src/mesa/program/prog_execute.h1
-rw-r--r--src/mesa/program/prog_instruction.h2
-rw-r--r--src/mesa/program/prog_noise.h2
-rw-r--r--src/mesa/program/prog_optimize.c637
-rw-r--r--src/mesa/program/prog_optimize.h1
-rw-r--r--src/mesa/program/prog_print.h10
-rw-r--r--src/mesa/program/prog_uniform.h3
-rw-r--r--src/mesa/program/program.c10
-rw-r--r--src/mesa/program/programopt.h1
-rw-r--r--src/mesa/slang/slang_builtin.c4
-rw-r--r--src/mesa/slang/slang_builtin.h4
-rw-r--r--src/mesa/slang/slang_codegen.h7
-rw-r--r--src/mesa/slang/slang_compile.c1
-rw-r--r--src/mesa/slang/slang_compile.h11
-rw-r--r--src/mesa/slang/slang_compile_function.h8
-rw-r--r--src/mesa/slang/slang_compile_operation.h5
-rw-r--r--src/mesa/slang/slang_compile_struct.h3
-rw-r--r--src/mesa/slang/slang_compile_variable.h4
-rw-r--r--src/mesa/slang/slang_emit.h6
-rw-r--r--src/mesa/slang/slang_ir.h7
-rw-r--r--src/mesa/slang/slang_label.c3
-rw-r--r--src/mesa/slang/slang_label.h5
-rw-r--r--src/mesa/slang/slang_link.c17
-rw-r--r--src/mesa/slang/slang_link.h2
-rw-r--r--src/mesa/slang/slang_log.h2
-rw-r--r--src/mesa/slang/slang_mem.h2
-rw-r--r--src/mesa/slang/slang_print.h6
-rw-r--r--src/mesa/slang/slang_simplify.h7
-rw-r--r--src/mesa/slang/slang_storage.h7
-rw-r--r--src/mesa/slang/slang_typeinfo.h4
-rw-r--r--src/mesa/slang/slang_utility.h2
-rw-r--r--src/mesa/slang/slang_vartable.h3
-rw-r--r--src/mesa/state_tracker/st_atom_shader.c1
-rw-r--r--src/mesa/state_tracker/st_cb_fbo.c2
-rw-r--r--src/mesa/state_tracker/st_context.c2
-rw-r--r--src/mesa/state_tracker/st_debug.c2
-rw-r--r--src/mesa/swrast/s_alpha.h3
-rw-r--r--src/mesa/swrast/s_atifragshader.c1
-rw-r--r--src/mesa/swrast/s_atifragshader.h3
-rw-r--r--src/mesa/swrast/s_blend.h3
-rw-r--r--src/mesa/swrast/s_context.h1
-rw-r--r--src/mesa/swrast/s_depth.c1
-rw-r--r--src/mesa/swrast/s_depth.h3
-rw-r--r--src/mesa/swrast/s_fog.h3
-rw-r--r--src/mesa/swrast/s_fragprog.c1
-rw-r--r--src/mesa/swrast/s_fragprog.h3
-rw-r--r--src/mesa/swrast/s_logic.h3
-rw-r--r--src/mesa/swrast/s_masking.h3
-rw-r--r--src/mesa/swrast/s_span.c17
-rw-r--r--src/mesa/swrast/s_stencil.h3
-rw-r--r--src/mesa/swrast/s_texcombine.h3
-rw-r--r--src/mesa/swrast/s_texfilter.h3
-rw-r--r--src/mesa/swrast/s_zoom.h3
-rw-r--r--src/mesa/swrast_setup/ss_context.h3
-rw-r--r--src/mesa/swrast_setup/ss_triangle.h2
-rw-r--r--src/mesa/swrast_setup/ss_vb.h1
-rw-r--r--src/mesa/tnl/t_context.h2
-rw-r--r--src/mesa/tnl/t_vb_program.c1
-rw-r--r--src/mesa/tnl/t_vb_render.c1
-rw-r--r--src/mesa/vf/vf.h2
-rw-r--r--src/mesa/x86/3dnow.h2
-rw-r--r--src/mesa/x86/mmx.h3
-rw-r--r--src/mesa/x86/sse.h2
109 files changed, 1262 insertions, 417 deletions
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
index 5200a244f2..5c02abc914 100644
--- a/src/mesa/SConscript
+++ b/src/mesa/SConscript
@@ -82,6 +82,7 @@ if env['platform'] != 'winddk':
'main/pixelstore.c',
'main/points.c',
'main/polygon.c',
+ 'main/querymatrix.c',
'main/queryobj.c',
'main/rastpos.c',
'main/readpix.c',
diff --git a/src/mesa/drivers/dri/i810/i810render.c b/src/mesa/drivers/dri/i810/i810render.c
index b543d4f012..205f0cebc1 100644
--- a/src/mesa/drivers/dri/i810/i810render.c
+++ b/src/mesa/drivers/dri/i810/i810render.c
@@ -37,6 +37,8 @@
#include "main/imports.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "i810screen.h"
diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c
index ec209391ab..add0adacb5 100644
--- a/src/mesa/drivers/dri/i915/intel_render.c
+++ b/src/mesa/drivers/dri/i915/intel_render.c
@@ -37,6 +37,8 @@
#include "main/mtypes.h"
#include "main/enums.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "tnl/t_vertex.h"
#include "tnl/t_pipeline.h"
diff --git a/src/mesa/drivers/dri/mga/mgarender.c b/src/mesa/drivers/dri/mga/mgarender.c
index 8b8fc485d3..cc0cea618d 100644
--- a/src/mesa/drivers/dri/mga/mgarender.c
+++ b/src/mesa/drivers/dri/mga/mgarender.c
@@ -44,6 +44,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/imports.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "mgacontext.h"
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c
index 4ec864c181..6452fe218e 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c
@@ -138,5 +138,7 @@ nouveau_driver_functions_init(struct dd_function_table *functions)
functions->DrawPixels = _mesa_meta_DrawPixels;
functions->CopyPixels = _mesa_meta_CopyPixels;
functions->Bitmap = _mesa_meta_Bitmap;
+#if FEATURE_EXT_framebuffer_blit
functions->BlitFramebuffer = _mesa_meta_BlitFramebuffer;
+#endif
}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
index bd1273beea..32d8f2d0f9 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
@@ -262,10 +262,12 @@ nouveau_finish_render_texture(GLcontext *ctx,
void
nouveau_fbo_functions_init(struct dd_function_table *functions)
{
+#if FEATURE_EXT_framebuffer_object
functions->NewFramebuffer = nouveau_framebuffer_new;
functions->NewRenderbuffer = nouveau_renderbuffer_new;
functions->BindFramebuffer = nouveau_bind_framebuffer;
functions->FramebufferRenderbuffer = nouveau_framebuffer_renderbuffer;
functions->RenderTexture = nouveau_render_texture;
functions->FinishRenderTexture = nouveau_finish_render_texture;
+#endif
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index c6246a81a2..d2fa816894 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -113,7 +113,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "after unroll loops");
}
else{
- rc_transform_loops(&c->Base, &loop_state);
+ rc_transform_loops(&c->Base, &loop_state, -1);
debug_program_log(c, "after transform loops");
rc_emulate_branches(&c->Base);
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index e940fedec2..666c9c2a7a 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -32,6 +32,11 @@
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
+struct loop {
+ int BgnLoop;
+
+};
+
/*
* Take an already-setup and valid source then swizzle it appropriately to
* obtain a constant ZERO or ONE source.
@@ -332,11 +337,140 @@ static void ei_pow(struct r300_vertex_program_code *vp,
inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
}
+static void mark_write(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ unsigned int * writemasks = userdata;
+
+ if (file != RC_FILE_TEMPORARY)
+ return;
+
+ if (index >= R300_VS_MAX_TEMPS)
+ return;
+
+ writemasks[index] |= mask;
+}
+
+static unsigned long t_pred_src(struct r300_vertex_program_compiler * compiler)
+{
+ return PVS_SRC_OPERAND(compiler->PredicateIndex,
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_ZERO),
+ t_swizzle(RC_SWIZZLE_W),
+ t_src_class(RC_FILE_TEMPORARY),
+ 0);
+}
+
+static unsigned long t_pred_dst(struct r300_vertex_program_compiler * compiler,
+ unsigned int hw_opcode, int is_math)
+{
+ return PVS_OP_DST_OPERAND(hw_opcode,
+ is_math,
+ 0,
+ compiler->PredicateIndex,
+ RC_MASK_W,
+ t_dst_class(RC_FILE_TEMPORARY));
+
+}
+
+static void ei_if(struct r300_vertex_program_compiler * compiler,
+ struct rc_instruction *rci,
+ unsigned int * inst,
+ unsigned int branch_depth)
+{
+ unsigned int predicate_opcode;
+ int is_math = 0;
+
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode IF not supported\n");
+ return;
+ }
+
+ /* Reserve a temporary to use as our predicate stack counter, if we
+ * don't already have one. */
+ if (!compiler->PredicateMask) {
+ unsigned int writemasks[R300_VS_MAX_TEMPS];
+ memset(writemasks, 0, sizeof(writemasks));
+ struct rc_instruction * inst;
+ unsigned int i;
+ for(inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_writes_mask(inst, mark_write, writemasks);
+ }
+ for(i = 0; i < R300_VS_MAX_TEMPS; i++) {
+ unsigned int mask = ~writemasks[i] & RC_MASK_XYZW;
+ /* Only the W component can be used fo the predicate
+ * stack counter. */
+ if (mask & RC_MASK_W) {
+ compiler->PredicateMask = RC_MASK_W;
+ compiler->PredicateIndex = i;
+ break;
+ }
+ }
+ if (i == R300_VS_MAX_TEMPS) {
+ rc_error(&compiler->Base, "No free temporary to use for"
+ " predicate stack counter.\n");
+ return;
+ }
+ }
+ predicate_opcode =
+ branch_depth ? VE_PRED_SET_NEQ_PUSH : ME_PRED_SET_NEQ;
+
+ rci->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(rci->U.I.SrcReg[0].Swizzle,0));
+ if (branch_depth == 0) {
+ is_math = 1;
+ predicate_opcode = ME_PRED_SET_NEQ;
+ inst[1] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+ inst[2] = 0;
+ } else {
+ predicate_opcode = VE_PRED_SET_NEQ_PUSH;
+ inst[1] = t_pred_src(compiler);
+ inst[2] = t_src(compiler->code, &rci->U.I.SrcReg[0]);
+ }
+
+ inst[0] = t_pred_dst(compiler, predicate_opcode, is_math);
+ inst[3] = 0;
+
+}
+
+static void ei_else(struct r300_vertex_program_compiler * compiler,
+ unsigned int * inst)
+{
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode ELSE not supported\n");
+ return;
+ }
+ inst[0] = t_pred_dst(compiler, ME_PRED_SET_INV, 1);
+ inst[1] = t_pred_src(compiler);
+ inst[2] = 0;
+ inst[3] = 0;
+}
+
+static void ei_endif(struct r300_vertex_program_compiler *compiler,
+ unsigned int * inst)
+{
+ if (!compiler->Base.is_r500) {
+ rc_error(&compiler->Base,"Opcode ENDIF not supported\n");
+ return;
+ }
+ inst[0] = t_pred_dst(compiler, ME_PRED_SET_POP, 1);
+ inst[1] = t_pred_src(compiler);
+ inst[2] = 0;
+ inst[3] = 0;
+}
static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
{
struct rc_instruction *rci;
+ struct loop * loops;
+ int current_loop_depth = 0;
+ int loops_reserved = 0;
+
+ unsigned int branch_depth = 0;
+
compiler->code->pos_end = 0; /* Not supported yet */
compiler->code->length = 0;
@@ -366,9 +500,12 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case RC_OPCODE_ELSE: ei_else(compiler, inst); break;
+ case RC_OPCODE_ENDIF: ei_endif(compiler, inst); branch_depth--; break;
case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case RC_OPCODE_IF: ei_if(compiler, rci, inst, branch_depth); branch_depth++; break;
case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
@@ -385,11 +522,86 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
+ case RC_OPCODE_BGNLOOP:
+ {
+ struct loop * l;
+
+ if ((!compiler->Base.is_r500
+ && loops_reserved >= R300_VS_MAX_LOOP_DEPTH)
+ || loops_reserved >= R500_VS_MAX_FC_DEPTH) {
+ rc_error(&compiler->Base,
+ "Loops are nested too deep.");
+ return;
+ }
+ memory_pool_array_reserve(&compiler->Base.Pool,
+ struct loop, loops, current_loop_depth,
+ loops_reserved, 1);
+ l = &loops[current_loop_depth++];
+ memset(l , 0, sizeof(struct loop));
+ l->BgnLoop = (compiler->code->length / 4);
+ continue;
+ }
+ case RC_OPCODE_ENDLOOP:
+ {
+ struct loop * l = &loops[current_loop_depth - 1];
+ unsigned int act_addr = l->BgnLoop - 1;
+ unsigned int last_addr = (compiler->code->length / 4) - 1;
+ unsigned int ret_addr = l->BgnLoop;
+
+ if (loops_reserved >= R300_VS_MAX_FC_OPS) {
+ rc_error(&compiler->Base,
+ "Too many flow control instructions.");
+ return;
+ }
+ if (compiler->Base.is_r500) {
+ compiler->code->fc_op_addrs.r500
+ [compiler->code->num_fc_ops].lw =
+ R500_PVS_FC_ACT_ADRS(act_addr)
+ | R500_PVS_FC_LOOP_CNT_JMP_INST(0xffff)
+ ;
+ compiler->code->fc_op_addrs.r500
+ [compiler->code->num_fc_ops].uw =
+ R500_PVS_FC_LAST_INST(last_addr)
+ | R500_PVS_FC_RTN_INST(ret_addr)
+ ;
+ } else {
+ compiler->code->fc_op_addrs.r300
+ [compiler->code->num_fc_ops] =
+ R300_PVS_FC_ACT_ADRS(act_addr)
+ | R300_PVS_FC_LOOP_CNT_JMP_INST(0xff)
+ | R300_PVS_FC_LAST_INST(last_addr)
+ | R300_PVS_FC_RTN_INST(ret_addr)
+ ;
+ }
+ compiler->code->fc_loop_index[compiler->code->num_fc_ops] =
+ R300_PVS_FC_LOOP_INIT_VAL(0x0)
+ | R300_PVS_FC_LOOP_STEP_VAL(0x1)
+ ;
+ compiler->code->fc_ops |= R300_VAP_PVS_FC_OPC_LOOP(
+ compiler->code->num_fc_ops);
+ compiler->code->num_fc_ops++;
+ current_loop_depth--;
+ continue;
+ }
+
default:
rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
return;
}
+ /* Non-flow control instructions that are inside an if statement
+ * need to pay attention to the predicate bit. */
+ if (branch_depth
+ && vpi->Opcode != RC_OPCODE_IF
+ && vpi->Opcode != RC_OPCODE_ELSE
+ && vpi->Opcode != RC_OPCODE_ENDIF) {
+
+ inst[0] |= (PVS_DST_PRED_ENABLE_MASK
+ << PVS_DST_PRED_ENABLE_SHIFT);
+ inst[0] |= (PVS_DST_PRED_SENSE_MASK
+ << PVS_DST_PRED_SENSE_SHIFT);
+ }
+
compiler->code->length += 4;
if (compiler->Base.Error)
@@ -406,6 +618,7 @@ struct temporary_allocation {
static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
{
struct rc_instruction *inst;
+ struct rc_instruction *end_loop = NULL;
unsigned int num_orig_temps = 0;
char hwtemps[R300_VS_MAX_TEMPS];
struct temporary_allocation * ta;
@@ -440,10 +653,35 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
/* Pass 2: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ /* Instructions inside of loops need to use the ENDLOOP
+ * instruction as their LastRead. */
+ if (!end_loop && inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ int endloops = 1;
+ struct rc_instruction * ptr;
+ for(ptr = inst->Next;
+ ptr != &compiler->Base.Program.Instructions;
+ ptr = ptr->Next){
+ if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ endloops++;
+ } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+ endloops--;
+ if (endloops <= 0) {
+ end_loop = ptr;
+ break;
+ }
+ }
+ }
+ }
+
+ if (inst == end_loop) {
+ end_loop = NULL;
+ continue;
+ }
for (i = 0; i < opcode->NumSrcRegs; ++i) {
if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
- ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
+ ta[inst->U.I.SrcReg[i].Index].LastRead =
+ end_loop ? end_loop : inst;
}
}
@@ -640,22 +878,17 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
debug_program_log(compiler, "before compilation");
- /* XXX Ideally this should be done only for r3xx, but since
- * we don't have branching support for r5xx, we use the emulation
- * on all chipsets. */
+ if (compiler->Base.is_r500)
+ rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU);
+ else
+ rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU);
- if (compiler->Base.is_r500){
- rc_transform_loops(&compiler->Base, &loop_state);
- rc_emulate_loops(&loop_state, R500_VS_MAX_ALU);
- } else {
- rc_transform_loops(&compiler->Base, &loop_state);
- rc_emulate_loops(&loop_state, R300_VS_MAX_ALU);
- }
debug_program_log(compiler, "after emulate loops");
- rc_emulate_branches(&compiler->Base);
-
- debug_program_log(compiler, "after emulate branches");
+ if (!compiler->Base.is_r500) {
+ rc_emulate_branches(&compiler->Base);
+ debug_program_log(compiler, "after emulate branches");
+ }
if (compiler->Base.is_r500) {
struct radeon_program_transformation transformations[] = {
@@ -717,6 +950,6 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
if (compiler->Base.Debug) {
fprintf(stderr, "Final vertex program code:\n");
- r300_vertex_program_dump(compiler->code);
+ r300_vertex_program_dump(compiler);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
index 5800f1a78e..e6009338e2 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
@@ -20,7 +20,9 @@
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE. */
+#include "radeon_compiler.h"
#include "radeon_code.h"
+#include "../r300_reg.h"
#include <stdio.h>
@@ -133,6 +135,10 @@ static void r300_vs_op_dump(uint32_t op)
{
fprintf(stderr, " dst: %d%s op: ",
(op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+ if ((op >> PVS_DST_PRED_ENABLE_SHIFT) & 0x1) {
+ fprintf(stderr, "PRED %u",
+ (op >> PVS_DST_PRED_SENSE_SHIFT) & 0x1);
+ }
if (op & 0x80) {
if (op & 0x1) {
fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
@@ -160,8 +166,9 @@ static void r300_vs_src_dump(uint32_t src)
r300_vs_swiz_debug[(src >> 22) & 0x7]);
}
-void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
+void r300_vertex_program_dump(struct r300_vertex_program_compiler * c)
{
+ struct r300_vertex_program_code * vs = c->code;
unsigned instrcount = vs->length / 4;
unsigned i;
@@ -177,4 +184,21 @@ void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
r300_vs_src_dump(vs->body.d[offset+1+src]);
}
}
+
+ fprintf(stderr, "Flow Control Ops: 0x%08x\n",vs->fc_ops);
+ for(i = 0; i < vs->num_fc_ops; i++) {
+ switch((vs->fc_ops >> (i * 2)) & 0x3 ) {
+ case 0: fprintf(stderr, "NOP"); break;
+ case 1: fprintf(stderr, "JUMP"); break;
+ case 2: fprintf(stderr, "LOOP"); break;
+ case 3: fprintf(stderr, "JSR"); break;
+ }
+ if (c->Base.is_r500) {
+ fprintf(stderr,": uw-> 0x%08x lw-> 0x%08x\n",
+ vs->fc_op_addrs.r500[i].uw,
+ vs->fc_op_addrs.r500[i].lw);
+ } else {
+ fprintf(stderr,": 0x%08x\n", vs->fc_op_addrs.r300[i]);
+ }
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index c3f817ad4e..9b60e30f58 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -70,6 +70,10 @@ struct loop_info {
int * Brks;
int BrkCount;
int BrkReserved;
+
+ int * Conts;
+ int ContCount;
+ int ContReserved;
};
struct emit_state {
@@ -413,20 +417,22 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
;
break;
- case RC_OPCODE_CONTINUE:
+ case RC_OPCODE_CONT:
loop = &s->Loops[s->CurrentLoopDepth - 1];
- s->Code->inst[newip].inst2 = R500_FC_OP_JUMP
+ memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
+ loop->ContCount, loop->ContReserved, 1);
+ loop->Conts[loop->ContCount++] = newip;
+ s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
| R500_FC_JUMP_FUNC(0xff)
| R500_FC_B_OP1_DECR
| R500_FC_B_POP_CNT(
s->CurrentBranchDepth - loop->BranchDepth)
+ | R500_FC_IGNORE_UNCOVERED
;
- s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->BgnLoop);
break;
case RC_OPCODE_ENDLOOP:
{
- unsigned int i;
loop = &s->Loops[s->CurrentLoopDepth - 1];
/* Emit ENDLOOP */
s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
@@ -449,6 +455,12 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
R500_FC_JUMP_ADDR(newip + 1);
}
+
+ /* Set jump address for CONT instructions. */
+ while(loop->ContCount--) {
+ s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
+ R500_FC_JUMP_ADDR(newip);
+ }
s->CurrentLoopDepth--;
break;
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index e14a3520dd..896246d203 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -243,6 +243,12 @@ struct rX00_fragment_program_code {
#define R500_VS_MAX_ALU 1024
#define R500_VS_MAX_ALU_DWORDS (R500_VS_MAX_ALU * 4)
#define R300_VS_MAX_TEMPS 32
+/* This is the max for all chipsets (r300-r500) */
+#define R300_VS_MAX_FC_OPS 16
+/* The r500 maximum depth is not just for loops, but any combination of loops
+ * and subroutine jumps. */
+#define R500_VS_MAX_FC_DEPTH 8
+#define R300_VS_MAX_LOOP_DEPTH 1
#define VSF_MAX_INPUTS 32
#define VSF_MAX_OUTPUTS 32
@@ -263,9 +269,18 @@ struct r300_vertex_program_code {
uint32_t InputsRead;
uint32_t OutputsWritten;
-};
-void r300_vertex_program_dump(struct r300_vertex_program_code * vs);
+ unsigned int num_fc_ops;
+ uint32_t fc_ops;
+ union {
+ uint32_t r300[R300_VS_MAX_FC_OPS];
+ struct {
+ uint32_t lw;
+ uint32_t uw;
+ } r500[R300_VS_MAX_FC_OPS];
+ } fc_op_addrs;
+ int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
+};
#endif /* RADEON_CODE_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
index 1c8ba864a4..935dc9b0a8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -307,3 +307,46 @@ void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsig
}
}
+
+/**
+ * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
+ * Gallium and OpenGL define it the other way around.
+ *
+ * So let's just negate FACE at the beginning of the shader and rewrite the rest
+ * of the shader to read from the newly allocated temporary.
+ */
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
+{
+ unsigned tempregi = rc_find_free_temporary(c);
+ struct rc_instruction *inst_add;
+ struct rc_instruction *inst;
+
+ /* perspective divide */
+ inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
+ inst_add->U.I.Opcode = RC_OPCODE_ADD;
+
+ inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_add->U.I.DstReg.Index = tempregi;
+ inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
+
+ inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+
+ inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
+ inst_add->U.I.SrcReg[1].Index = face;
+ inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
+ inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
+
+ for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+ inst->U.I.SrcReg[i].Index == face) {
+ inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[i].Index = tempregi;
+ }
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index f15905d79d..7c42eb3ae5 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -81,6 +81,7 @@ void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_ou
void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
int full_vtransform);
+void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face);
struct r300_fragment_program_compiler {
struct radeon_compiler Base;
@@ -110,8 +111,12 @@ struct r300_vertex_program_compiler {
void * UserData;
void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
+
+ int PredicateIndex;
+ unsigned int PredicateMask;
};
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+void r300_vertex_program_dump(struct r300_vertex_program_compiler * c);
#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index 31566a937f..faf531b412 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -274,7 +274,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
}
break;
}
- case RC_OPCODE_CONTINUE:
+ case RC_OPCODE_CONT:
break;
case RC_OPCODE_ENDIF:
push_branch(&s);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
index 24c3ae57b6..32d4b45dd6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -421,12 +421,9 @@ static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
* ENDLOOP; -> ENDLOOP
*
* @param inst A pointer to a BGNLOOP instruction.
- * @return If the loop can be unrolled, a pointer to the first instruction of
- * the unrolled loop.
- * Otherwise, A pointer to the ENDLOOP instruction.
- * Null if there is an error.
+ * @return 1 for success, 0 for failure
*/
-static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+static int transform_loop(struct emulate_loop_state * s,
struct rc_instruction * inst)
{
struct loop_info * loop;
@@ -437,10 +434,10 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
loop = &s->Loops[s->LoopCount++];
if (!build_loop_info(s->C, loop, inst))
- return NULL;
+ return 0;
- if(try_unroll_loop(s->C, loop, -1)){
- return loop->BeginLoop->Next;
+ if(try_unroll_loop(s->C, loop, s->prog_inst_limit)){
+ return 1;
}
/* Reverse the conditional instruction */
@@ -465,33 +462,31 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
break;
default:
rc_error(s->C, "loop->Cond is not a conditional.\n");
- return NULL;
+ return 0;
}
/* Prepare the loop to be emulated */
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
- return loop->EndLoop;
+ return 1;
}
void rc_transform_loops(struct radeon_compiler *c,
- struct emulate_loop_state * s)
+ struct emulate_loop_state * s, int prog_inst_limit)
{
struct rc_instruction * ptr;
memset(s, 0, sizeof(struct emulate_loop_state));
s->C = c;
- ptr = s->C->Program.Instructions.Next;
- while(ptr != &s->C->Program.Instructions) {
+ s->prog_inst_limit = prog_inst_limit;
+ for(ptr = s->C->Program.Instructions.Next;
+ ptr != &s->C->Program.Instructions; ptr = ptr->Next) {
if(ptr->Type == RC_INSTRUCTION_NORMAL &&
ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
- ptr = transform_loop(s, ptr);
- if(!ptr){
+ if (!transform_loop(s, ptr))
return;
- }
}
- ptr = ptr->Next;
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
index 86d91ef14b..bba1f68e30 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -21,10 +21,11 @@ struct emulate_loop_state {
struct loop_info * Loops;
unsigned int LoopCount;
unsigned int LoopReserved;
+ int prog_inst_limit;
};
void rc_transform_loops(struct radeon_compiler *c,
- struct emulate_loop_state * s);
+ struct emulate_loop_state * s, int prog_inst_limit);
void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index 04f234f11d..da495a3afa 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -95,6 +95,12 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsComponentwise = 1
},
{
+ .Opcode = RC_OPCODE_DP2,
+ .Name = "DP2",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
.Opcode = RC_OPCODE_DP3,
.Name = "DP3",
.NumSrcRegs = 2,
@@ -295,6 +301,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsComponentwise = 1
},
{
+ .Opcode = RC_OPCODE_SSG,
+ .Name = "SSG",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
.Opcode = RC_OPCODE_SUB,
.Name = "SUB",
.NumSrcRegs = 2,
@@ -386,8 +399,8 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.NumSrcRegs = 0,
},
{
- .Opcode = RC_OPCODE_CONTINUE,
- .Name = "CONTINUE",
+ .Opcode = RC_OPCODE_CONT,
+ .Name = "CONT",
.IsFlowControl = 1,
.NumSrcRegs = 0
},
@@ -435,6 +448,10 @@ void rc_compute_sources_for_writemask(
case RC_OPCODE_ARL:
srcmasks[0] |= RC_MASK_X;
break;
+ case RC_OPCODE_DP2:
+ srcmasks[0] |= RC_MASK_XY;
+ srcmasks[1] |= RC_MASK_XY;
+ break;
case RC_OPCODE_DP3:
srcmasks[0] |= RC_MASK_XYZ;
srcmasks[1] |= RC_MASK_XYZ;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 8b9fa07dde..d3f639c870 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -64,6 +64,9 @@ typedef enum {
* dst.c = d src0.c / dy */
RC_OPCODE_DDY,
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */
+ RC_OPCODE_DP2,
+
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
RC_OPCODE_DP3,
@@ -154,6 +157,9 @@ typedef enum {
/** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SNE,
+ /** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */
+ RC_OPCODE_SSG,
+
/** vec4 instruction: dst.c = src0.c - src1.c */
RC_OPCODE_SUB,
@@ -187,7 +193,7 @@ typedef enum {
RC_OPCODE_ENDLOOP,
- RC_OPCODE_CONTINUE,
+ RC_OPCODE_CONT,
/** special instruction, used in R300-R500 fragment program pair instructions
* indicates that the result of the alpha operation shall be replicated
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index 8a912da461..ce72cd97ab 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -65,6 +65,11 @@ struct regalloc_state {
struct hardware_register * HwTemporary;
unsigned int NumHwTemporaries;
+ /**
+ * If an instruction is inside of a loop, end_loop will be the
+ * IP of the ENDLOOP instruction, otherwise end_loop will be 0
+ */
+ int end_loop;
};
static void print_live_intervals(struct live_intervals * src)
@@ -178,10 +183,10 @@ static void scan_callback(void * data, struct rc_instruction * inst,
else
reg->Live.Start = inst->IP;
reg->Live.End = inst->IP;
- } else {
- if (inst->IP > reg->Live.End)
- reg->Live.End = inst->IP;
- }
+ } else if (s->end_loop)
+ reg->Live.End = s->end_loop;
+ else if (inst->IP > reg->Live.End)
+ reg->Live.End = inst->IP;
}
static void compute_live_intervals(struct regalloc_state * s)
@@ -191,6 +196,31 @@ static void compute_live_intervals(struct regalloc_state * s)
for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
inst != &s->C->Program.Instructions;
inst = inst->Next) {
+
+ /* For all instructions inside of a loop, the ENDLOOP
+ * instruction is used as the end of the live interval. */
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP && !s->end_loop) {
+ int loops = 1;
+ struct rc_instruction * tmp;
+ for(tmp = inst->Next;
+ tmp != &s->C->Program.Instructions;
+ tmp = tmp->Next) {
+ if (tmp->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ loops++;
+ break;
+ } else if (tmp->U.I.Opcode
+ == RC_OPCODE_ENDLOOP) {
+ if(!--loops) {
+ s->end_loop = tmp->IP;
+ break;
+ }
+ }
+ }
+ }
+
+ if (inst->IP == s->end_loop)
+ s->end_loop = 0;
+
rc_for_all_reads_mask(inst, scan_callback, s);
rc_for_all_writes_mask(inst, scan_callback, s);
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 857aae5514..704a7bb2d2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -216,18 +216,18 @@ static void transform_CEIL(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
-static void transform_DP3(struct radeon_compiler* c,
+static void transform_DP2(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_src_register src0 = inst->U.I.SrcReg[0];
struct rc_src_register src1 = inst->U.I.SrcReg[1];
- src0.Negate &= ~RC_MASK_W;
- src0.Swizzle &= ~(7 << (3 * 3));
- src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
- src1.Negate &= ~RC_MASK_W;
- src1.Swizzle &= ~(7 << (3 * 3));
- src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
- emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+ src0.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+ src0.Swizzle &= ~(63 << (3 * 2));
+ src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+ src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+ src1.Swizzle &= ~(63 << (3 * 2));
+ src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+ emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
rc_remove_instruction(inst);
}
@@ -464,6 +464,43 @@ static void transform_SNE(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+static void transform_SSG(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* result = sign(x)
+ *
+ * CMP tmp0, -x, 1, 0
+ * CMP tmp1, x, 1, 0
+ * ADD result, tmp0, -tmp1;
+ */
+ unsigned tmp0, tmp1;
+
+ /* 0 < x */
+ tmp0 = rc_find_free_temporary(c);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
+ dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask),
+ negate(inst->U.I.SrcReg[0]),
+ builtin_one,
+ builtin_zero);
+
+ /* x < 0 */
+ tmp1 = rc_find_free_temporary(c);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
+ dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
+ inst->U.I.SrcReg[0],
+ builtin_one,
+ builtin_zero);
+
+ /* Either both are zero, or one of them is one and the other is zero. */
+ /* result = tmp0 - tmp1 */
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tmp0),
+ negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
+
+ rc_remove_instruction(inst);
+}
+
static void transform_SUB(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -516,6 +553,7 @@ int radeonTransformALU(
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_DST: transform_DST(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
@@ -530,6 +568,7 @@ int radeonTransformALU(
case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
+ case RC_OPCODE_SSG: transform_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
@@ -577,6 +616,29 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+static void transform_r300_vertex_DP2(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_instruction *next_inst = inst->Next;
+ transform_DP2(c, inst);
+ next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4;
+}
+
+static void transform_r300_vertex_DP3(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_src_register src0 = inst->U.I.SrcReg[0];
+ struct rc_src_register src1 = inst->U.I.SrcReg[1];
+ src0.Negate &= ~RC_MASK_W;
+ src0.Swizzle &= ~(7 << (3 * 3));
+ src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ src1.Negate &= ~RC_MASK_W;
+ src1.Swizzle &= ~(7 << (3 * 3));
+ src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+ rc_remove_instruction(inst);
+}
+
static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -672,6 +734,41 @@ static void transform_r300_vertex_SLE(struct radeon_compiler* c,
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
}
+static void transform_r300_vertex_SSG(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* result = sign(x)
+ *
+ * SLT tmp0, 0, x;
+ * SLT tmp1, x, 0;
+ * ADD result, tmp0, -tmp1;
+ */
+ unsigned tmp0, tmp1;
+
+ /* 0 < x */
+ tmp0 = rc_find_free_temporary(c);
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask),
+ builtin_zero,
+ inst->U.I.SrcReg[0]);
+
+ /* x < 0 */
+ tmp1 = rc_find_free_temporary(c);
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
+ inst->U.I.SrcReg[0],
+ builtin_zero);
+
+ /* Either both are zero, or one of them is one and the other is zero. */
+ /* result = tmp0 - tmp1 */
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tmp0),
+ negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
+
+ rc_remove_instruction(inst);
+}
+
/**
* For use with radeonLocalTransform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
@@ -685,7 +782,8 @@ int r300_transform_vertex_alu(
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
- case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
+ case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
+ case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
@@ -705,6 +803,7 @@ int r300_transform_vertex_alu(
return 1;
}
return 0;
+ case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
index 9c4b65f4c0..ddce590ee6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -117,8 +117,8 @@ int radeonTransformTEX(
struct rc_instruction * inst_rcp = NULL;
struct rc_instruction * inst_mad;
struct rc_instruction * inst_cmp;
- unsigned tmp_texsample = rc_find_free_temporary(c);
- unsigned tmp_sum = rc_find_free_temporary(c);
+ unsigned tmp_texsample;
+ unsigned tmp_sum;
unsigned tmp_recip_w = 0;
int pass, fail, tex;
@@ -126,6 +126,7 @@ int radeonTransformTEX(
struct rc_dst_register output_reg = inst->U.I.DstReg;
/* Redirect TEX to a new temp. */
+ tmp_texsample = rc_find_free_temporary(c);
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = tmp_texsample;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
@@ -144,6 +145,7 @@ int radeonTransformTEX(
}
/* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */
+ tmp_sum = rc_find_free_temporary(c);
inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mad->U.I.DstReg.Index = tmp_sum;
@@ -199,6 +201,8 @@ int radeonTransformTEX(
inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
+
+ assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w);
}
}
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index e4b302bbad..3d2f8928fa 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -461,7 +461,7 @@ static void r300InitGLExtensions(GLcontext *ctx)
if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) {
_mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
}
- if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350)
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_R420)
_mesa_enable_extension(ctx, "GL_ARB_half_float_vertex");
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index f25264b6f2..f7705b0f6f 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -441,6 +441,12 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228
#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c
+#define R300_VAP_PVS_FLOW_CNTL_ADDRS_0 0x2230
+#define R300_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 8)
+#define R300_PVS_FC_LAST_INST(x) ((x) << 16)
+#define R300_PVS_FC_RTN_INST(x) ((x) << 24)
+
/* gap */
/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
@@ -459,6 +465,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
# define R300_2288_R300 0x00750000 /* -- nh */
# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
+#define R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0 0x2290
+#define R300_PVS_FC_LOOP_INIT_VAL(x) ((x) << 0)
+#define R300_PVS_FC_LOOP_STEP_VAL(x) ((x) << 8)
+
/* gap */
/* Addresses are relative to the vertex program instruction area of the
@@ -489,6 +499,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC
+#define R300_VAP_PVS_FC_OPC_JUMP(x) (1 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_LOOP(x) (2 << (2 * (x)))
+#define R300_VAP_PVS_FC_OPC_JSR(x) (3 << (2 * (x)))
/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
* immediate vertices
@@ -505,6 +518,14 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* write 0 to indicate end of packet? */
#define R300_VAP_VTX_END_OF_PKT 0x24AC
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0 0x2500
+#define R500_PVS_FC_ACT_ADRS(x) ((x) << 0)
+#define R500_PVS_FC_LOOP_CNT_JMP_INST(x) ((x) << 16)
+
+#define R500_VAP_PVS_FLOW_CNTL_ADDRS_UW_0 0x2504
+#define R500_PVS_FC_LAST_INST(x) ((x) << 0)
+#define R500_PVS_FC_RTN_INST(x) ((x) << 16)
+
/* gap */
/* These are values from r300_reg/r300_reg.h - they are known to be correct
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
index 4ba6740e3d..9458869826 100644
--- a/src/mesa/drivers/dri/r300/r300_texstate.c
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -152,8 +152,8 @@ int32_t r300TranslateTexFormat(gl_format mesaFormat)
case MESA_FORMAT_Z32:
return R300_EASY_TX_FORMAT(X, X, X, X, X32);
/* EXT_texture_sRGB */
- case MESA_FORMAT_SRGBA8:
- return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA;
+ case MESA_FORMAT_SARGB8:
+ return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA;
case MESA_FORMAT_SLA8:
return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA;
case MESA_FORMAT_SL8:
diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c
index 619678214f..4fd425b809 100644
--- a/src/mesa/drivers/dri/r600/r600_blit.c
+++ b/src/mesa/drivers/dri/r600/r600_blit.c
@@ -72,7 +72,7 @@ unsigned r600_check_blit(gl_format mesa_format)
case MESA_FORMAT_Z24_S8:
case MESA_FORMAT_Z16:
case MESA_FORMAT_Z32:
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
case MESA_FORMAT_SLA8:
case MESA_FORMAT_SL8:
break;
@@ -320,9 +320,9 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
CLEARbit(cb_color0_info, SOURCE_FORMAT_bit);
SETfield(cb_color0_info, NUMBER_UNORM, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
break;
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
format = COLOR_8_8_8_8;
- comp_swap = SWAP_STD_REV;
+ comp_swap = SWAP_ALT;
SETbit(cb_color0_info, SOURCE_FORMAT_bit);
SETfield(cb_color0_info, NUMBER_SRGB, NUMBER_TYPE_shift, NUMBER_TYPE_mask);
break;
@@ -1050,17 +1050,17 @@ set_tex_resource(context_t * context,
SETfield(sq_tex_resource4, SQ_SEL_X,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
break;
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
SETfield(sq_tex_resource1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(sq_tex_resource4, SQ_SEL_W,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(sq_tex_resource4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(sq_tex_resource4, SQ_SEL_Y,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
SETfield(sq_tex_resource4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(sq_tex_resource4, SQ_SEL_W,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
SETbit(sq_tex_resource4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
break;
@@ -1454,7 +1454,7 @@ set_default_state(context_t *context)
SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit);
}
- BEGIN_BATCH_NO_AUTOSTATE(114);
+ BEGIN_BATCH_NO_AUTOSTATE(117);
R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6);
R600_OUT_BATCH(sq_config);
R600_OUT_BATCH(sq_gpr_resource_mgmt_1);
@@ -1499,9 +1499,10 @@ set_default_state(context_t *context)
R600_OUT_BATCH_REGVAL(PA_SU_VTX_CNTL, (PIX_CENTER_bit) |
(X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
(X_1_256TH << QUANT_MODE_shift));
+ R600_OUT_BATCH_REGVAL(PA_SC_AA_CONFIG, 0);
R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4);
- R600_OUT_BATCH(2048);
+ R600_OUT_BATCH(0xffffff);
R600_OUT_BATCH(0);
R600_OUT_BATCH(0);
R600_OUT_BATCH(0);
@@ -1532,6 +1533,7 @@ set_default_state(context_t *context)
R600_OUT_BATCH(0);
R600_OUT_BATCH_REGVAL(VGT_STRMOUT_BUFFER_EN, 0);
+ R600_OUT_BATCH_REGVAL(SX_ALPHA_TEST_CONTROL, 0);
END_BATCH();
COMMIT_BATCH();
@@ -1613,7 +1615,7 @@ unsigned r600_blit(GLcontext *ctx,
/* Flush is needed to make sure that source buffer has correct data */
radeonFlush(ctx);
- rcommonEnsureCmdBufSpace(&context->radeon, 305, __FUNCTION__);
+ rcommonEnsureCmdBufSpace(&context->radeon, 311, __FUNCTION__);
/* load shaders */
load_shaders(context->radeon.glCtx);
@@ -1622,7 +1624,7 @@ unsigned r600_blit(GLcontext *ctx,
return GL_FALSE;
/* set clear state */
- /* 114 */
+ /* 120 */
set_default_state(context);
/* shaders */
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index 84d9d42312..389b0412ba 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -72,6 +72,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R600_ENABLE_GLSL_TEST 1
#define need_GL_VERSION_2_0
+#define need_GL_VERSION_2_1
+#define need_GL_ARB_draw_elements_base_vertex
#define need_GL_ARB_occlusion_query
#define need_GL_ARB_point_parameters
#define need_GL_ARB_vertex_program
@@ -140,6 +142,7 @@ static const struct dri_extension card_extensions[] = {
{"GL_NV_vertex_program", GL_NV_vertex_program_functions},
{"GL_SGIS_generate_mipmap", NULL},
{"GL_ARB_pixel_buffer_object", NULL},
+ {"GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions },
{NULL, NULL}
/* *INDENT-ON* */
};
@@ -157,6 +160,7 @@ static const struct dri_extension mm_extensions[] = {
static const struct dri_extension gl_20_extension[] = {
#ifdef R600_ENABLE_GLSL_TEST
{"GL_ARB_shading_language_100", GL_VERSION_2_0_functions },
+ {"GL_ARB_shading_language_120", GL_VERSION_2_1_functions },
#else
{"GL_VERSION_2_0", GL_VERSION_2_0_functions },
#endif /* R600_ENABLE_GLSL_TEST */
diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c
index 41419f8460..512a52ede3 100644
--- a/src/mesa/drivers/dri/r600/r600_tex.c
+++ b/src/mesa/drivers/dri/r600/r600_tex.c
@@ -431,7 +431,7 @@ unsigned r600IsFormatRenderable(gl_format mesa_format)
case MESA_FORMAT_Z24_S8:
case MESA_FORMAT_Z16:
case MESA_FORMAT_Z32:
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
case MESA_FORMAT_SLA8:
case MESA_FORMAT_SL8:
return 1;
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index 1600033b9b..ba3690b70e 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -605,17 +605,17 @@ static GLboolean r600GetTexFormat(struct gl_texture_object *tObj, gl_format mesa
}
break;
/* EXT_texture_sRGB */
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
SETfield(t->SQ_TEX_RESOURCE1, FMT_8_8_8_8,
SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_shift, SQ_TEX_RESOURCE_WORD1_0__DATA_FORMAT_mask);
- SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
- SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
break;
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index 1e955b93b2..bf8063391a 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -265,17 +265,6 @@ static void r700SendVTXState(GLcontext *ctx, struct radeon_state_atom *atom)
if (context->radeon.tcl.aos_count == 0)
return;
- BEGIN_BATCH_NO_AUTOSTATE(6);
- R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
- R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
- R600_OUT_BATCH(0);
-
- R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
- R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX);
- R600_OUT_BATCH(0);
- END_BATCH();
- COMMIT_BATCH();
-
for(i=0; i<VERT_ATTRIB_MAX; i++) {
if(vp->mesa_program->Base.InputsRead & (1 << i))
{
@@ -523,9 +512,9 @@ static void r700SetRenderTarget(context_t *context, int id)
CB_COLOR0_INFO__ARRAY_MODE_shift, CB_COLOR0_INFO__ARRAY_MODE_mask);
CLEARbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
break;
- case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
format = COLOR_8_8_8_8;
- comp_swap = SWAP_STD_REV;
+ comp_swap = SWAP_ALT;
number_type = NUMBER_SRGB;
SETbit(r700->render_target[id].CB_COLOR0_INFO.u32All, SOURCE_FORMAT_bit);
break;
@@ -1480,9 +1469,6 @@ static int check_vtx(GLcontext *ctx, struct radeon_state_atom *atom)
context_t *context = R700_CONTEXT(ctx);
int count = context->radeon.tcl.aos_count * 18;
- if (count)
- count += 6;
-
radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
return count;
}
@@ -1605,7 +1591,7 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(ps, always, 24, r700SendPSState);
ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts);
ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts);
- ALLOC_STATE(vtx, vtx, (6 + (VERT_ATTRIB_MAX * 18)), r700SendVTXState);
+ ALLOC_STATE(vtx, vtx, (VERT_ATTRIB_MAX * 18), r700SendVTXState);
ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState);
ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState);
ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState);
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index ba55f38e05..c5771f9fd0 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -244,7 +244,8 @@ static int r700NumVerts(int num_verts, int prim)
return num_verts - verts_off;
}
-static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
+static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end,
+ int prim, GLint basevertex)
{
context_t *context = R700_CONTEXT(ctx);
BATCH_LOCALS(&context->radeon);
@@ -282,6 +283,7 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim
total_emit = 3 /* VGT_PRIMITIVE_TYPE */
+ 2 /* VGT_INDEX_TYPE */
+ 2 /* NUM_INSTANCES */
+ + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */
+ 5 + 2; /* DRAW_INDEX */
BEGIN_BATCH_NO_AUTOSTATE(total_emit);
@@ -294,6 +296,11 @@ static void r700RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim
// num instances
R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
R600_OUT_BATCH(1);
+ /* offset */
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2));
+ R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(basevertex); //VTX_BASE_VTX_LOC
+ R600_OUT_BATCH(0); //VTX_START_INST_LOC
// draw packet
R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
R600_OUT_BATCH(context->ind_buf.bo_offset);
@@ -364,6 +371,7 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end,
total_emit += 3 /* VGT_PRIMITIVE_TYPE */
+ 2 /* VGT_INDEX_TYPE */
+ 2 /* NUM_INSTANCES */
+ + 4 /* VTX_BASE_VTX_LOC + VTX_START_INST_LOC */
+ 3; /* DRAW */
BEGIN_BATCH_NO_AUTOSTATE(total_emit);
@@ -376,6 +384,11 @@ static void r700RunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end,
// num instances
R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
R600_OUT_BATCH(1);
+ /* offset */
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 2));
+ R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(0); //VTX_BASE_VTX_LOC
+ R600_OUT_BATCH(0); //VTX_START_INST_LOC
// draw packet
if(start == 0)
{
@@ -433,16 +446,16 @@ static GLuint r700PredictRenderSize(GLcontext* ctx,
dwords = PRE_EMIT_STATE_BUFSZ;
if (ib)
- dwords += nr_prims * 14;
+ dwords += nr_prims * 18;
else {
for (i = 0; i < nr_prims; ++i)
{
if (prim[i].start == 0)
- dwords += 10;
+ dwords += 14;
else if (prim[i].count > 0xffff)
- dwords += prim[i].count + 10;
+ dwords += prim[i].count + 14;
else
- dwords += ((prim[i].count + 1) / 2) + 10;
+ dwords += ((prim[i].count + 1) / 2) + 14;
}
}
@@ -923,7 +936,8 @@ static GLboolean r700TryDrawPrims(GLcontext *ctx,
r700RunRenderPrimitive(ctx,
prim[i].start,
prim[i].start + prim[i].count,
- prim[i].mode);
+ prim[i].mode,
+ prim[i].basevertex);
else
r700RunRenderPrimitiveImmediate(ctx,
prim[i].start,
@@ -975,15 +989,17 @@ static void r700DrawPrims(GLcontext *ctx,
/* This check should get folded into just the places that
* min/max index are really needed.
*/
- if (!index_bounds_valid) {
- vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
- }
- if (min_index) {
+ if (!vbo_all_varyings_in_vbos(arrays)) {
+ if (!index_bounds_valid)
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+ /* do we want to rebase, minimizes the
+ * amount of data to upload? */
+ if (min_index) {
vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r700DrawPrims );
return;
+ }
}
-
/* Make an attempt at drawing */
retval = r700TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
index b7ee9a134b..7d54fabebb 100644
--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h
+++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
@@ -414,9 +414,9 @@ enum {
CHIP_FAMILY_R350,
CHIP_FAMILY_RV350,
CHIP_FAMILY_RV380,
+ CHIP_FAMILY_RS400,
CHIP_FAMILY_R420,
CHIP_FAMILY_RV410,
- CHIP_FAMILY_RS400,
CHIP_FAMILY_RS600,
CHIP_FAMILY_RS690,
CHIP_FAMILY_RS740,
diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c
index 517485091a..0597d4250d 100644
--- a/src/mesa/drivers/dri/radeon/radeon_fbo.c
+++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c
@@ -609,6 +609,7 @@ radeon_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
void radeon_fbo_init(struct radeon_context *radeon)
{
+#if FEATURE_EXT_framebuffer_object
radeon->glCtx->Driver.NewFramebuffer = radeon_new_framebuffer;
radeon->glCtx->Driver.NewRenderbuffer = radeon_new_renderbuffer;
radeon->glCtx->Driver.BindFramebuffer = radeon_bind_framebuffer;
@@ -617,7 +618,10 @@ void radeon_fbo_init(struct radeon_context *radeon)
radeon->glCtx->Driver.FinishRenderTexture = radeon_finish_render_texture;
radeon->glCtx->Driver.ResizeBuffers = radeon_resize_buffers;
radeon->glCtx->Driver.ValidateFramebuffer = radeon_validate_framebuffer;
+#endif
+#if FEATURE_EXT_framebuffer_blit
radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer;
+#endif
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
index c877e6c176..c6e5f110ea 100644
--- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
@@ -133,7 +133,7 @@ static void compute_tex_image_offset(radeonContextPtr rmesa, radeon_mipmap_tree
height = _mesa_next_pow_two_32(lvl->height);
lvl->rowstride = get_texture_image_row_stride(rmesa, mt->mesaFormat, lvl->width, mt->tilebits);
- lvl->size = get_texture_image_size(mt->mesaFormat, lvl->rowstride, lvl->height, lvl->depth, mt->tilebits);
+ lvl->size = get_texture_image_size(mt->mesaFormat, lvl->rowstride, height, lvl->depth, mt->tilebits);
assert(lvl->size > 0);
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index 67be466c3f..29defe73a7 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -40,7 +40,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "main/macros.h"
#include "main/simple_list.h"
+#include "math/m_xform.h"
+
#include "swrast_setup/swrast_setup.h"
+
#include "tnl/tnl.h"
#include "tnl/t_context.h"
#include "tnl/t_pipeline.h"
diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c
index d2b190e42e..8c6a50d2f0 100644
--- a/src/mesa/drivers/dri/radeon/radeon_texture.c
+++ b/src/mesa/drivers/dri/radeon/radeon_texture.c
@@ -551,7 +551,7 @@ gl_format radeonChooseTextureFormat(GLcontext * ctx,
case GL_SRGB8_ALPHA8:
case GL_COMPRESSED_SRGB:
case GL_COMPRESSED_SRGB_ALPHA:
- return MESA_FORMAT_SRGBA8;
+ return MESA_FORMAT_SARGB8;
case GL_SLUMINANCE:
case GL_SLUMINANCE8:
diff --git a/src/mesa/drivers/dri/savage/savagerender.c b/src/mesa/drivers/dri/savage/savagerender.c
index c369bb124c..2d9e80e29c 100644
--- a/src/mesa/drivers/dri/savage/savagerender.c
+++ b/src/mesa/drivers/dri/savage/savagerender.c
@@ -33,6 +33,8 @@
#include "main/imports.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "savagecontext.h"
diff --git a/src/mesa/drivers/dri/unichrome/via_render.c b/src/mesa/drivers/dri/unichrome/via_render.c
index 896c43db1b..4351f11955 100644
--- a/src/mesa/drivers/dri/unichrome/via_render.c
+++ b/src/mesa/drivers/dri/unichrome/via_render.c
@@ -33,6 +33,8 @@
#include "main/macros.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
+
#include "tnl/t_context.h"
#include "via_context.h"
diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h
index 32f7d969d8..0f2d1a8f8d 100644
--- a/src/mesa/main/config.h
+++ b/src/mesa/main/config.h
@@ -177,7 +177,7 @@
/**
* Per-program constants (power of two)
*
- * \c MAX_PROGRAM_LOCAL_PARAMS and \c MAX_UNIFORMS are just the assmebly shader
+ * \c MAX_PROGRAM_LOCAL_PARAMS and \c MAX_UNIFORMS are just the assembly shader
* and GLSL shader names for the same thing. They should \b always have the
* same value. Each refers to the number of vec4 values supplied as
* per-program parameters.
diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c
index a369532e99..b01fed1781 100644
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -462,7 +462,7 @@ _mesa_init_current(GLcontext *ctx)
/**
- * Init vertex/fragment program limits.
+ * Init vertex/fragment/geometry program limits.
* Important: drivers should override these with actual limits.
*/
static void
@@ -477,16 +477,18 @@ init_program_limits(GLenum type, struct gl_program_constants *prog)
prog->MaxLocalParams = MAX_PROGRAM_LOCAL_PARAMS;
prog->MaxUniformComponents = 4 * MAX_UNIFORMS;
- if (type == GL_VERTEX_PROGRAM_ARB) {
+ switch (type) {
+ case GL_VERTEX_PROGRAM_ARB:
prog->MaxParameters = MAX_VERTEX_PROGRAM_PARAMS;
prog->MaxAttribs = MAX_NV_VERTEX_PROGRAM_INPUTS;
prog->MaxAddressRegs = MAX_VERTEX_PROGRAM_ADDRESS_REGS;
- }
- else if (type == GL_FRAGMENT_PROGRAM_ARB) {
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
prog->MaxParameters = MAX_NV_FRAGMENT_PROGRAM_PARAMS;
prog->MaxAttribs = MAX_NV_FRAGMENT_PROGRAM_INPUTS;
prog->MaxAddressRegs = MAX_FRAGMENT_PROGRAM_ADDRESS_REGS;
- } else {
+ break;
+ case MESA_GEOMETRY_PROGRAM:
prog->MaxParameters = MAX_NV_VERTEX_PROGRAM_PARAMS;
prog->MaxAttribs = MAX_NV_VERTEX_PROGRAM_INPUTS;
prog->MaxAddressRegs = MAX_VERTEX_PROGRAM_ADDRESS_REGS;
@@ -497,6 +499,9 @@ init_program_limits(GLenum type, struct gl_program_constants *prog)
prog->MaxGeometryUniformComponents = MAX_GEOMETRY_UNIFORM_COMPONENTS;
prog->MaxGeometryOutputVertices = MAX_GEOMETRY_OUTPUT_VERTICES;
prog->MaxGeometryTotalOutputComponents = MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS;
+ break;
+ default:
+ assert(0 && "Bad program type in init_program_limits()");
}
/* Set the native limits to zero. This implies that there is no native
diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index c399351096..46e5c932d0 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -756,7 +756,7 @@ _mesa_strdup( const char *s )
float
_mesa_strtof( const char *s, char **end )
{
-#if defined(_GNU_SOURCE) && !defined(__CYGWIN__)
+#if defined(_GNU_SOURCE) && !defined(__CYGWIN__) && !defined(__FreeBSD__)
static locale_t loc = NULL;
if (!loc) {
loc = newlocale(LC_CTYPE_MASK, "C", NULL);
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index e89f55befe..b8bcda56bf 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2505,29 +2505,29 @@ struct gl_framebuffer
/**
- * Limits for vertex and fragment programs.
+ * Limits for vertex and fragment programs/shaders.
*/
struct gl_program_constants
{
/* logical limits */
GLuint MaxInstructions;
- GLuint MaxAluInstructions; /* fragment programs only, for now */
- GLuint MaxTexInstructions; /* fragment programs only, for now */
- GLuint MaxTexIndirections; /* fragment programs only, for now */
+ GLuint MaxAluInstructions;
+ GLuint MaxTexInstructions;
+ GLuint MaxTexIndirections;
GLuint MaxAttribs;
GLuint MaxTemps;
- GLuint MaxAddressRegs; /* vertex program only, for now */
+ GLuint MaxAddressRegs;
GLuint MaxParameters;
GLuint MaxLocalParams;
GLuint MaxEnvParams;
/* native/hardware limits */
GLuint MaxNativeInstructions;
- GLuint MaxNativeAluInstructions; /* fragment programs only, for now */
- GLuint MaxNativeTexInstructions; /* fragment programs only, for now */
- GLuint MaxNativeTexIndirections; /* fragment programs only, for now */
+ GLuint MaxNativeAluInstructions;
+ GLuint MaxNativeTexInstructions;
+ GLuint MaxNativeTexIndirections;
GLuint MaxNativeAttribs;
GLuint MaxNativeTemps;
- GLuint MaxNativeAddressRegs; /* vertex program only, for now */
+ GLuint MaxNativeAddressRegs;
GLuint MaxNativeParameters;
/* For shaders */
GLuint MaxUniformComponents;
diff --git a/src/mesa/main/querymatrix.c b/src/mesa/main/querymatrix.c
index 6f62415ba8..32aaa79f7f 100644
--- a/src/mesa/main/querymatrix.c
+++ b/src/mesa/main/querymatrix.c
@@ -36,9 +36,9 @@
#define INT_TO_FIXED(x) ((GLfixed) ((x) << 16))
#define FLOAT_TO_FIXED(x) ((GLfixed) ((x) * 65536.0))
-#if defined(WIN32) || defined(_WIN32_WCE)
+#if defined(_MSC_VER)
/* Oddly, the fpclassify() function doesn't exist in such a form
- * on Windows. This is an implementation using slightly different
+ * on MSVC. This is an implementation using slightly different
* lower-level Windows functions.
*/
#include <float.h>
@@ -72,7 +72,7 @@ fpclassify(double x)
#elif defined(__APPLE__) || defined(__CYGWIN__) || defined(__FreeBSD__) || \
defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) || \
- (defined(__sun) && defined(__C99FEATURES__))
+ (defined(__sun) && defined(__C99FEATURES__)) || defined(__MINGW32__)
/* fpclassify is available. */
diff --git a/src/mesa/program/hash_table.h b/src/mesa/program/hash_table.h
index ec088c7dde..f1c4fdcd1f 100644
--- a/src/mesa/program/hash_table.h
+++ b/src/mesa/program/hash_table.h
@@ -31,8 +31,6 @@
#ifndef HASH_TABLE_H
#define HASH_TABLE_H
-#include <string.h>
-
struct hash_table;
typedef unsigned (*hash_func_t)(const void *key);
diff --git a/src/mesa/program/nvfragparse.h b/src/mesa/program/nvfragparse.h
index 544ab80c56..e28a6c4934 100644
--- a/src/mesa/program/nvfragparse.h
+++ b/src/mesa/program/nvfragparse.h
@@ -30,6 +30,7 @@
#ifndef NVFRAGPARSE_H
#define NVFRAGPARSE_H
+#include "main/mtypes.h"
extern void
_mesa_parse_nv_fragment_program(GLcontext *ctx, GLenum target,
diff --git a/src/mesa/program/nvvertparse.h b/src/mesa/program/nvvertparse.h
index 9919e22388..91ef79e6c3 100644
--- a/src/mesa/program/nvvertparse.h
+++ b/src/mesa/program/nvvertparse.h
@@ -29,6 +29,7 @@
#ifndef NVVERTPARSE_H
#define NVVERTPARSE_H
+#include "main/mtypes.h"
extern void
_mesa_parse_nv_vertex_program(GLcontext *ctx, GLenum target,
diff --git a/src/mesa/program/prog_cache.h b/src/mesa/program/prog_cache.h
index 4e1ccac03f..bfe8f99d44 100644
--- a/src/mesa/program/prog_cache.h
+++ b/src/mesa/program/prog_cache.h
@@ -30,6 +30,9 @@
#define PROG_CACHE_H
+#include "main/mtypes.h"
+
+
/** Opaque type */
struct gl_program_cache;
diff --git a/src/mesa/program/prog_execute.h b/src/mesa/program/prog_execute.h
index adefc5439d..f59b65176f 100644
--- a/src/mesa/program/prog_execute.h
+++ b/src/mesa/program/prog_execute.h
@@ -26,6 +26,7 @@
#define PROG_EXECUTE_H
#include "main/config.h"
+#include "main/mtypes.h"
typedef void (*FetchTexelLodFunc)(GLcontext *ctx, const GLfloat texcoord[4],
diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h
index 1d850c4d5d..ca90de7ce1 100644
--- a/src/mesa/program/prog_instruction.h
+++ b/src/mesa/program/prog_instruction.h
@@ -38,7 +38,7 @@
#define PROG_INSTRUCTION_H
-#include "main/mfeatures.h"
+#include "main/glheader.h"
/**
diff --git a/src/mesa/program/prog_noise.h b/src/mesa/program/prog_noise.h
index c4779479f9..dd7986efcd 100644
--- a/src/mesa/program/prog_noise.h
+++ b/src/mesa/program/prog_noise.h
@@ -25,6 +25,8 @@
#ifndef PROG_NOISE
#define PROG_NOISE
+#include "main/glheader.h"
+
extern GLfloat _mesa_noise1(GLfloat);
extern GLfloat _mesa_noise2(GLfloat, GLfloat);
extern GLfloat _mesa_noise3(GLfloat, GLfloat, GLfloat);
diff --git a/src/mesa/program/prog_optimize.c b/src/mesa/program/prog_optimize.c
index 457ace14c6..ab878755e2 100644
--- a/src/mesa/program/prog_optimize.c
+++ b/src/mesa/program/prog_optimize.c
@@ -43,40 +43,117 @@
static GLboolean dbg = GL_FALSE;
-/* Returns the mask of channels read from the given srcreg in this instruction.
+#define NO_MASK 0xf
+
+/**
+ * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
+ * are read from the given src in this instruction, We also provide
+ * one optional masks which may mask other components in the dst
+ * register
*/
static GLuint
-get_src_arg_mask(const struct prog_instruction *inst, int arg)
+get_src_arg_mask(const struct prog_instruction *inst,
+ GLuint arg, GLuint dst_mask)
{
- int writemask = inst->DstReg.WriteMask;
+ GLuint read_mask, channel_mask;
+ GLuint comp;
- if (inst->CondUpdate)
- writemask = WRITEMASK_XYZW;
+ ASSERT(arg < _mesa_num_inst_src_regs(inst->Opcode));
- switch (inst->Opcode) {
- case OPCODE_MOV:
- case OPCODE_ABS:
- case OPCODE_ADD:
- case OPCODE_MUL:
- case OPCODE_SUB:
- return writemask;
- case OPCODE_RCP:
- case OPCODE_SIN:
- case OPCODE_COS:
- case OPCODE_RSQ:
- case OPCODE_POW:
- case OPCODE_EX2:
- return WRITEMASK_X;
- case OPCODE_DP2:
- return WRITEMASK_XY;
- case OPCODE_DP3:
- case OPCODE_XPD:
- return WRITEMASK_XYZ;
- default:
- return WRITEMASK_XYZW;
+ /* Form the dst register, find the written channels */
+ if (inst->CondUpdate) {
+ channel_mask = WRITEMASK_XYZW;
+ }
+ else {
+ switch (inst->Opcode) {
+ case OPCODE_MOV:
+ case OPCODE_MIN:
+ case OPCODE_MAX:
+ case OPCODE_ABS:
+ case OPCODE_ADD:
+ case OPCODE_MAD:
+ case OPCODE_MUL:
+ case OPCODE_SUB:
+ channel_mask = inst->DstReg.WriteMask & dst_mask;
+ break;
+ case OPCODE_RCP:
+ case OPCODE_SIN:
+ case OPCODE_COS:
+ case OPCODE_RSQ:
+ case OPCODE_POW:
+ case OPCODE_EX2:
+ case OPCODE_LOG:
+ channel_mask = WRITEMASK_X;
+ break;
+ case OPCODE_DP2:
+ channel_mask = WRITEMASK_XY;
+ break;
+ case OPCODE_DP3:
+ case OPCODE_XPD:
+ channel_mask = WRITEMASK_XYZ;
+ break;
+ default:
+ channel_mask = WRITEMASK_XYZW;
+ break;
+ }
}
+
+ /* Now, given the src swizzle and the written channels, find which
+ * components are actually read
+ */
+ read_mask = 0x0;
+ for (comp = 0; comp < 4; ++comp) {
+ const GLuint coord = GET_SWZ(inst->SrcReg[arg].Swizzle, comp);
+ ASSERT(coord < 4);
+ if (channel_mask & (1 << comp) && coord <= SWIZZLE_W)
+ read_mask |= 1 << coord;
+ }
+
+ return read_mask;
+}
+
+
+/**
+ * For a MOV instruction, compute a write mask when src register also has
+ * a mask
+ */
+static GLuint
+get_dst_mask_for_mov(const struct prog_instruction *mov, GLuint src_mask)
+{
+ const GLuint mask = mov->DstReg.WriteMask;
+ GLuint comp;
+ GLuint updated_mask = 0x0;
+
+ ASSERT(mov->Opcode == OPCODE_MOV);
+
+ for (comp = 0; comp < 4; ++comp) {
+ GLuint src_comp;
+ if ((mask & (1 << comp)) == 0)
+ continue;
+ src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, comp);
+ if ((src_mask & (1 << src_comp)) == 0)
+ continue;
+ updated_mask |= 1 << comp;
+ }
+
+ return updated_mask;
+}
+
+
+/**
+ * Ensure that the swizzle is regular. That is, all of the swizzle
+ * terms are SWIZZLE_X,Y,Z,W and not SWIZZLE_ZERO or SWIZZLE_ONE.
+ */
+static GLboolean
+is_swizzle_regular(GLuint swz)
+{
+ return GET_SWZ(swz,0) <= SWIZZLE_W &&
+ GET_SWZ(swz,1) <= SWIZZLE_W &&
+ GET_SWZ(swz,2) <= SWIZZLE_W &&
+ GET_SWZ(swz,3) <= SWIZZLE_W;
}
+
/**
* In 'prog' remove instruction[i] if removeFlags[i] == TRUE.
* \return number of instructions removed
@@ -153,82 +230,13 @@ replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[])
/**
- * Consolidate temporary registers to use low numbers. For example, if the
- * shader only uses temps 4, 5, 8, replace them with 0, 1, 2.
- */
-static void
-_mesa_consolidate_registers(struct gl_program *prog)
-{
- GLboolean tempUsed[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
- GLint tempMap[REG_ALLOCATE_MAX_PROGRAM_TEMPS];
- GLuint tempMax = 0, i;
-
- if (dbg) {
- printf("Optimize: Begin register consolidation\n");
- }
-
- memset(tempUsed, 0, sizeof(tempUsed));
-
- for (i = 0; i < REG_ALLOCATE_MAX_PROGRAM_TEMPS; i++) {
- tempMap[i] = -1;
- }
-
- /* set tempUsed[i] if temporary [i] is referenced */
- for (i = 0; i < prog->NumInstructions; i++) {
- const struct prog_instruction *inst = prog->Instructions + i;
- const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
- GLuint j;
- for (j = 0; j < numSrc; j++) {
- if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
- const GLuint index = inst->SrcReg[j].Index;
- ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
- tempUsed[index] = GL_TRUE;
- tempMax = MAX2(tempMax, index);
- break;
- }
- }
- if (inst->DstReg.File == PROGRAM_TEMPORARY) {
- const GLuint index = inst->DstReg.Index;
- ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
- tempUsed[index] = GL_TRUE;
- tempMax = MAX2(tempMax, index);
- }
- }
-
- /* allocate a new index for each temp that's used */
- {
- GLuint freeTemp = 0;
- for (i = 0; i <= tempMax; i++) {
- if (tempUsed[i]) {
- tempMap[i] = freeTemp++;
- /*printf("replace %u with %u\n", i, tempMap[i]);*/
- }
- }
- if (freeTemp == tempMax + 1) {
- /* no consolidation possible */
- return;
- }
- if (dbg) {
- printf("Replace regs 0..%u with 0..%u\n", tempMax, freeTemp-1);
- }
- }
-
- replace_regs(prog, PROGRAM_TEMPORARY, tempMap);
-
- if (dbg) {
- printf("Optimize: End register consolidation\n");
- }
-}
-
-
-/**
* Remove dead instructions from the given program.
* This is very primitive for now. Basically look for temp registers
* that are written to but never read. Remove any instructions that
* write to such registers. Be careful with condition code setters.
*/
-static void
-_mesa_remove_dead_code(struct gl_program *prog)
+static GLboolean
+_mesa_remove_dead_code_global(struct gl_program *prog)
{
GLboolean tempRead[REG_ALLOCATE_MAX_PROGRAM_TEMPS][4];
GLboolean *removeInst; /* per-instruction removal flag */
@@ -256,7 +264,7 @@ _mesa_remove_dead_code(struct gl_program *prog)
const GLuint index = inst->SrcReg[j].Index;
GLuint read_mask;
ASSERT(index < REG_ALLOCATE_MAX_PROGRAM_TEMPS);
- read_mask = get_src_arg_mask(inst, j);
+ read_mask = get_src_arg_mask(inst, j, NO_MASK);
if (inst->SrcReg[j].RelAddr) {
if (dbg)
@@ -265,25 +273,12 @@ _mesa_remove_dead_code(struct gl_program *prog)
}
for (comp = 0; comp < 4; comp++) {
- GLuint swz = (inst->SrcReg[j].Swizzle >> (3 * comp)) & 0x7;
-
- if ((read_mask & (1 << comp)) == 0)
+ const GLuint swz = GET_SWZ(inst->SrcReg[j].Swizzle, comp);
+ ASSERT(swz < 4);
+ if ((read_mask & (1 << swz)) == 0)
continue;
-
- switch (swz) {
- case SWIZZLE_X:
- tempRead[index][0] = GL_TRUE;
- break;
- case SWIZZLE_Y:
- tempRead[index][1] = GL_TRUE;
- break;
- case SWIZZLE_Z:
- tempRead[index][2] = GL_TRUE;
- break;
- case SWIZZLE_W:
- tempRead[index][3] = GL_TRUE;
- break;
- }
+ if (swz <= SWIZZLE_W)
+ tempRead[index][swz] = GL_TRUE;
}
}
}
@@ -353,10 +348,11 @@ _mesa_remove_dead_code(struct gl_program *prog)
done:
free(removeInst);
+ return rem != 0;
}
-enum temp_use
+enum inst_use
{
READ,
WRITE,
@@ -364,13 +360,19 @@ enum temp_use
END
};
+
/**
- * Scan forward in program from 'start' for the next occurance of TEMP[index].
+ * Scan forward in program from 'start' for the next occurances of TEMP[index].
+ * We look if an instruction reads the component given by the masks and if they
+ * are overwritten.
* Return READ, WRITE, FLOW or END to indicate the next usage or an indicator
* that we can't look further.
*/
-static enum temp_use
-find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index)
+static enum inst_use
+find_next_use(const struct gl_program *prog,
+ GLuint start,
+ GLuint index,
+ GLuint mask)
{
GLuint i;
@@ -378,30 +380,50 @@ find_next_temp_use(const struct gl_program *prog, GLuint start, GLuint index)
const struct prog_instruction *inst = prog->Instructions + i;
switch (inst->Opcode) {
case OPCODE_BGNLOOP:
- case OPCODE_ENDLOOP:
case OPCODE_BGNSUB:
+ case OPCODE_BRA:
+ case OPCODE_CAL:
+ case OPCODE_CONT:
+ case OPCODE_IF:
+ case OPCODE_ELSE:
+ case OPCODE_ENDIF:
+ case OPCODE_ENDLOOP:
case OPCODE_ENDSUB:
+ case OPCODE_RET:
return FLOW;
+ case OPCODE_END:
+ return END;
default:
{
const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode);
GLuint j;
for (j = 0; j < numSrc; j++) {
- if (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
- inst->SrcReg[j].Index == index)
+ if (inst->SrcReg[j].RelAddr ||
+ (inst->SrcReg[j].File == PROGRAM_TEMPORARY &&
+ inst->SrcReg[j].Index == index &&
+ (get_src_arg_mask(inst,j,NO_MASK) & mask)))
return READ;
}
- if (inst->DstReg.File == PROGRAM_TEMPORARY &&
- inst->DstReg.Index == index)
- return WRITE;
+ if (_mesa_num_inst_dst_regs(inst->Opcode) == 1 &&
+ inst->DstReg.File == PROGRAM_TEMPORARY &&
+ inst->DstReg.Index == index) {
+ mask &= ~inst->DstReg.WriteMask;
+ if (mask == 0)
+ return WRITE;
+ }
}
}
}
-
return END;
}
-static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode)
+
+/**
+ * Is the given instruction opcode a flow-control opcode?
+ * XXX maybe move this into prog_instruction.[ch]
+ */
+static GLboolean
+_mesa_is_flow_control_opcode(enum prog_opcode opcode)
{
switch (opcode) {
case OPCODE_BGNLOOP:
@@ -422,6 +444,37 @@ static GLboolean _mesa_is_flow_control_opcode(enum prog_opcode opcode)
}
}
+
+/**
+ * Test if the given instruction is a simple MOV (no conditional updating,
+ * not relative addressing, no negation/abs, etc).
+ */
+static GLboolean
+can_downward_mov_be_modifed(const struct prog_instruction *mov)
+{
+ return
+ mov->Opcode == OPCODE_MOV &&
+ mov->CondUpdate == GL_FALSE &&
+ mov->SrcReg[0].RelAddr == 0 &&
+ mov->SrcReg[0].Negate == 0 &&
+ mov->SrcReg[0].Abs == 0 &&
+ mov->SrcReg[0].HasIndex2 == 0 &&
+ mov->SrcReg[0].RelAddr2 == 0 &&
+ mov->DstReg.RelAddr == 0 &&
+ mov->DstReg.CondMask == COND_TR &&
+ mov->SaturateMode == SATURATE_OFF;
+}
+
+
+static GLboolean
+can_upward_mov_be_modifed(const struct prog_instruction *mov)
+{
+ return
+ can_downward_mov_be_modifed(mov) &&
+ mov->DstReg.File == PROGRAM_TEMPORARY;
+}
+
+
/**
* Try to remove use of extraneous MOV instructions, to free them up for dead
* code removal.
@@ -449,14 +502,15 @@ _mesa_remove_extra_move_use(struct gl_program *prog)
for (i = 0; i + 1 < prog->NumInstructions; i++) {
const struct prog_instruction *mov = prog->Instructions + i;
+ GLuint dst_mask, src_mask;
+ if (can_upward_mov_be_modifed(mov) == GL_FALSE)
+ continue;
- if (mov->Opcode != OPCODE_MOV ||
- mov->DstReg.File != PROGRAM_TEMPORARY ||
- mov->DstReg.RelAddr ||
- mov->DstReg.CondMask != COND_TR ||
- mov->SaturateMode != SATURATE_OFF ||
- mov->SrcReg[0].RelAddr)
- continue;
+ /* Scanning the code, we maintain the components which are still active in
+ * these two masks
+ */
+ dst_mask = mov->DstReg.WriteMask;
+ src_mask = get_src_arg_mask(mov, 0, NO_MASK);
/* Walk through remaining instructions until the or src reg gets
* rewritten or we get into some flow-control, eliminating the use of
@@ -464,61 +518,60 @@ _mesa_remove_extra_move_use(struct gl_program *prog)
*/
for (j = i + 1; j < prog->NumInstructions; j++) {
struct prog_instruction *inst2 = prog->Instructions + j;
- GLuint arg;
+ GLuint arg;
if (_mesa_is_flow_control_opcode(inst2->Opcode))
break;
/* First rewrite this instruction's args if appropriate. */
for (arg = 0; arg < _mesa_num_inst_src_regs(inst2->Opcode); arg++) {
- int comp;
- int read_mask = get_src_arg_mask(inst2, arg);
+ GLuint comp, read_mask;
if (inst2->SrcReg[arg].File != mov->DstReg.File ||
inst2->SrcReg[arg].Index != mov->DstReg.Index ||
inst2->SrcReg[arg].RelAddr ||
inst2->SrcReg[arg].Abs)
continue;
+ read_mask = get_src_arg_mask(inst2, arg, NO_MASK);
- /* Check that all the sources for this arg of inst2 come from inst1
- * or constants.
- */
- for (comp = 0; comp < 4; comp++) {
- int src_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
-
- /* If the MOV didn't write that channel, can't use it. */
- if ((read_mask & (1 << comp)) &&
- src_swz <= SWIZZLE_W &&
- (mov->DstReg.WriteMask & (1 << src_swz)) == 0)
- break;
- }
- if (comp != 4)
- continue;
-
- /* Adjust the swizzles of inst2 to point at MOV's source */
- for (comp = 0; comp < 4; comp++) {
- int inst2_swz = GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
-
- if (inst2_swz <= SWIZZLE_W) {
- GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
- inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
- inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
- inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
- inst2_swz) & 0x1) << comp);
- }
- }
- inst2->SrcReg[arg].File = mov->SrcReg[0].File;
- inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
+ /* Adjust the swizzles of inst2 to point at MOV's source if ALL the
+ * components read still come from the mov instructions
+ */
+ if (is_swizzle_regular(inst2->SrcReg[arg].Swizzle) &&
+ (read_mask & dst_mask) == read_mask) {
+ for (comp = 0; comp < 4; comp++) {
+ const GLuint inst2_swz =
+ GET_SWZ(inst2->SrcReg[arg].Swizzle, comp);
+ const GLuint s = GET_SWZ(mov->SrcReg[0].Swizzle, inst2_swz);
+ inst2->SrcReg[arg].Swizzle &= ~(7 << (3 * comp));
+ inst2->SrcReg[arg].Swizzle |= s << (3 * comp);
+ inst2->SrcReg[arg].Negate ^= (((mov->SrcReg[0].Negate >>
+ inst2_swz) & 0x1) << comp);
+ }
+ inst2->SrcReg[arg].File = mov->SrcReg[0].File;
+ inst2->SrcReg[arg].Index = mov->SrcReg[0].Index;
+ }
}
- /* If this instruction overwrote part of the move, our time is up. */
- if ((inst2->DstReg.File == mov->DstReg.File &&
- (inst2->DstReg.RelAddr ||
- inst2->DstReg.Index == mov->DstReg.Index)) ||
- (inst2->DstReg.File == mov->SrcReg[0].File &&
- (inst2->DstReg.RelAddr ||
- inst2->DstReg.Index == mov->SrcReg[0].Index)))
- break;
+ /* The source of MOV is written. This potentially deactivates some
+ * components from the src and dst of the MOV instruction
+ */
+ if (inst2->DstReg.File == mov->DstReg.File &&
+ (inst2->DstReg.RelAddr ||
+ inst2->DstReg.Index == mov->DstReg.Index)) {
+ dst_mask &= ~inst2->DstReg.WriteMask;
+ src_mask = get_src_arg_mask(mov, 0, dst_mask);
+ }
+
+ /* Idem when the destination of mov is written */
+ if (inst2->DstReg.File == mov->SrcReg[0].File &&
+ (inst2->DstReg.RelAddr ||
+ inst2->DstReg.Index == mov->SrcReg[0].Index)) {
+ src_mask &= ~inst2->DstReg.WriteMask;
+ dst_mask &= get_dst_mask_for_mov(mov, src_mask);
+ }
+ if (dst_mask == 0)
+ break;
}
}
@@ -528,14 +581,151 @@ _mesa_remove_extra_move_use(struct gl_program *prog)
}
}
+
+/**
+ * Complements dead_code_global. Try to remove code in block of code by
+ * carefully monitoring the swizzles. Both functions should be merged into one
+ * with a proper control flow graph
+ */
+static GLboolean
+_mesa_remove_dead_code_local(struct gl_program *prog)
+{
+ GLboolean *removeInst;
+ GLuint i, arg, rem = 0;
+
+ removeInst = (GLboolean *)
+ calloc(1, prog->NumInstructions * sizeof(GLboolean));
+
+ for (i = 0; i < prog->NumInstructions; i++) {
+ const struct prog_instruction *inst = prog->Instructions + i;
+ const GLuint index = inst->DstReg.Index;
+ const GLuint mask = inst->DstReg.WriteMask;
+ enum inst_use use;
+
+ /* We must deactivate the pass as soon as some indirection is used */
+ if (inst->DstReg.RelAddr)
+ goto done;
+ for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++)
+ if (inst->SrcReg[arg].RelAddr)
+ goto done;
+
+ if (_mesa_is_flow_control_opcode(inst->Opcode) ||
+ _mesa_num_inst_dst_regs(inst->Opcode) == 0 ||
+ inst->DstReg.File != PROGRAM_TEMPORARY ||
+ inst->DstReg.RelAddr)
+ continue;
+
+ use = find_next_use(prog, i+1, index, mask);
+ if (use == WRITE || use == END)
+ removeInst[i] = GL_TRUE;
+ }
+
+ rem = remove_instructions(prog, removeInst);
+
+done:
+ free(removeInst);
+ return rem != 0;
+}
+
+
+/**
+ * Try to inject the destination of mov as the destination of inst and recompute
+ * the swizzles operators for the sources of inst if required. Return GL_TRUE
+ * of the substitution was possible, GL_FALSE otherwise
+ */
+static GLboolean
+_mesa_merge_mov_into_inst(struct prog_instruction *inst,
+ const struct prog_instruction *mov)
+{
+ /* Indirection table which associates destination and source components for
+ * the mov instruction
+ */
+ const GLuint mask = get_src_arg_mask(mov, 0, NO_MASK);
+
+ /* Some components are not written by inst. We cannot remove the mov */
+ if (mask != (inst->DstReg.WriteMask & mask))
+ return GL_FALSE;
+
+ /* Depending on the instruction, we may need to recompute the swizzles.
+ * Also, some other instructions (like TEX) are not linear. We will only
+ * consider completely active sources and destinations
+ */
+ switch (inst->Opcode) {
+
+ /* Carstesian instructions: we compute the swizzle */
+ case OPCODE_MOV:
+ case OPCODE_MIN:
+ case OPCODE_MAX:
+ case OPCODE_ABS:
+ case OPCODE_ADD:
+ case OPCODE_MAD:
+ case OPCODE_MUL:
+ case OPCODE_SUB:
+ {
+ GLuint dst_to_src_comp[4] = {0,0,0,0};
+ GLuint dst_comp, arg;
+ for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
+ if (mov->DstReg.WriteMask & (1 << dst_comp)) {
+ const GLuint src_comp = GET_SWZ(mov->SrcReg[0].Swizzle, dst_comp);
+ ASSERT(src_comp < 4);
+ dst_to_src_comp[dst_comp] = src_comp;
+ }
+ }
+
+ /* Patch each source of the instruction */
+ for (arg = 0; arg < _mesa_num_inst_src_regs(inst->Opcode); arg++) {
+ const GLuint arg_swz = inst->SrcReg[arg].Swizzle;
+ inst->SrcReg[arg].Swizzle = 0;
+
+ /* Reset each active component of the swizzle */
+ for (dst_comp = 0; dst_comp < 4; ++dst_comp) {
+ GLuint src_comp, arg_comp;
+ if ((mov->DstReg.WriteMask & (1 << dst_comp)) == 0)
+ continue;
+ src_comp = dst_to_src_comp[dst_comp];
+ ASSERT(src_comp < 4);
+ arg_comp = GET_SWZ(arg_swz, src_comp);
+ ASSERT(arg_comp < 4);
+ inst->SrcReg[arg].Swizzle |= arg_comp << (3*dst_comp);
+ }
+ }
+ inst->DstReg = mov->DstReg;
+ return GL_TRUE;
+ }
+
+ /* Dot products and scalar instructions: we only change the destination */
+ case OPCODE_RCP:
+ case OPCODE_SIN:
+ case OPCODE_COS:
+ case OPCODE_RSQ:
+ case OPCODE_POW:
+ case OPCODE_EX2:
+ case OPCODE_LOG:
+ case OPCODE_DP2:
+ case OPCODE_DP3:
+ case OPCODE_DP4:
+ inst->DstReg = mov->DstReg;
+ return GL_TRUE;
+
+ /* All other instructions require fully active components with no swizzle */
+ default:
+ if (mov->SrcReg[0].Swizzle != SWIZZLE_XYZW ||
+ inst->DstReg.WriteMask != WRITEMASK_XYZW)
+ return GL_FALSE;
+ inst->DstReg = mov->DstReg;
+ return GL_TRUE;
+ }
+}
+
+
/**
* Try to remove extraneous MOV instructions from the given program.
*/
-static void
+static GLboolean
_mesa_remove_extra_moves(struct gl_program *prog)
{
GLboolean *removeInst; /* per-instruction removal flag */
- GLuint i, rem, loopNesting = 0, subroutineNesting = 0;
+ GLuint i, rem = 0, nesting = 0;
if (dbg) {
printf("Optimize: Begin remove extra moves\n");
@@ -554,29 +744,24 @@ _mesa_remove_extra_moves(struct gl_program *prog)
*/
for (i = 0; i < prog->NumInstructions; i++) {
- const struct prog_instruction *inst = prog->Instructions + i;
+ const struct prog_instruction *mov = prog->Instructions + i;
- switch (inst->Opcode) {
+ switch (mov->Opcode) {
case OPCODE_BGNLOOP:
- loopNesting++;
- break;
- case OPCODE_ENDLOOP:
- loopNesting--;
- break;
case OPCODE_BGNSUB:
- subroutineNesting++;
+ case OPCODE_IF:
+ nesting++;
break;
+ case OPCODE_ENDLOOP:
case OPCODE_ENDSUB:
- subroutineNesting--;
+ case OPCODE_ENDIF:
+ nesting--;
break;
case OPCODE_MOV:
- if (i > 0 &&
- loopNesting == 0 &&
- subroutineNesting == 0 &&
- inst->SrcReg[0].File == PROGRAM_TEMPORARY &&
- inst->SrcReg[0].Swizzle == SWIZZLE_XYZW) {
+ if (i > 0 && can_downward_mov_be_modifed(mov) && nesting == 0) {
+
/* see if this MOV can be removed */
- const GLuint tempIndex = inst->SrcReg[0].Index;
+ const GLuint id = mov->SrcReg[0].Index;
struct prog_instruction *prevInst;
GLuint prevI;
@@ -587,11 +772,13 @@ _mesa_remove_extra_moves(struct gl_program *prog)
prevInst = prog->Instructions + prevI;
if (prevInst->DstReg.File == PROGRAM_TEMPORARY &&
- prevInst->DstReg.Index == tempIndex &&
- prevInst->DstReg.WriteMask == WRITEMASK_XYZW) {
+ prevInst->DstReg.Index == id &&
+ prevInst->DstReg.RelAddr == 0 &&
+ prevInst->DstReg.CondSrc == 0 &&
+ prevInst->DstReg.CondMask == COND_TR) {
- enum temp_use next_use =
- find_next_temp_use(prog, i + 1, tempIndex);
+ const GLuint dst_mask = prevInst->DstReg.WriteMask;
+ enum inst_use next_use = find_next_use(prog, i+1, id, dst_mask);
if (next_use == WRITE || next_use == END) {
/* OK, we can safely remove this MOV instruction.
@@ -601,18 +788,13 @@ _mesa_remove_extra_moves(struct gl_program *prog)
* Into:
* prevI: FOO z, x, y;
*/
-
- /* patch up prev inst */
- prevInst->DstReg.File = inst->DstReg.File;
- prevInst->DstReg.Index = inst->DstReg.Index;
-
- /* flag this instruction for removal */
- removeInst[i] = GL_TRUE;
-
- if (dbg) {
- printf("Remove MOV at %u\n", i);
- printf("new prev inst %u: ", prevI);
- _mesa_print_instruction(prevInst);
+ if (_mesa_merge_mov_into_inst(prevInst, mov)) {
+ removeInst[i] = GL_TRUE;
+ if (dbg) {
+ printf("Remove MOV at %u\n", i);
+ printf("new prev inst %u: ", prevI);
+ _mesa_print_instruction(prevInst);
+ }
}
}
}
@@ -632,6 +814,8 @@ _mesa_remove_extra_moves(struct gl_program *prog)
printf("Optimize: End remove extra moves. %u instructions removed\n", rem);
/*_mesa_print_program(prog);*/
}
+
+ return rem != 0;
}
@@ -718,6 +902,7 @@ compare_start(const void *a, const void *b)
return 0;
}
+
/** sort the interval list according to interval starts */
static void
sort_interval_list_by_start(struct interval_list *list)
@@ -1025,6 +1210,17 @@ _mesa_reallocate_registers(struct gl_program *prog)
}
+#if 0
+static void
+print_it(GLcontext *ctx, struct gl_program *program, const char *txt) {
+ fprintf(stderr, "%s (%u inst):\n", txt, program->NumInstructions);
+ _mesa_print_program(program);
+ _mesa_print_program_parameters(ctx, program);
+ fprintf(stderr, "\n\n");
+}
+#endif
+
+
/**
* Apply optimizations to the given program to eliminate unnecessary
* instructions, temp regs, etc.
@@ -1032,16 +1228,19 @@ _mesa_reallocate_registers(struct gl_program *prog)
void
_mesa_optimize_program(GLcontext *ctx, struct gl_program *program)
{
- _mesa_remove_extra_move_use(program);
-
- if (1)
- _mesa_remove_dead_code(program);
-
- if (0) /* not tested much yet */
- _mesa_remove_extra_moves(program);
-
- if (0)
- _mesa_consolidate_registers(program);
- else
+ GLboolean any_change;
+
+ /* Stop when no modifications were output */
+ do {
+ any_change = GL_FALSE;
+ _mesa_remove_extra_move_use(program);
+ if (_mesa_remove_dead_code_global(program))
+ any_change = GL_TRUE;
+ if (_mesa_remove_extra_moves(program))
+ any_change = GL_TRUE;
+ if (_mesa_remove_dead_code_local(program))
+ any_change = GL_TRUE;
_mesa_reallocate_registers(program);
+ } while (any_change);
}
+
diff --git a/src/mesa/program/prog_optimize.h b/src/mesa/program/prog_optimize.h
index 43894a2723..06cd9cb2c2 100644
--- a/src/mesa/program/prog_optimize.h
+++ b/src/mesa/program/prog_optimize.h
@@ -27,6 +27,7 @@
#include "main/config.h"
+#include "main/mtypes.h"
struct gl_program;
diff --git a/src/mesa/program/prog_print.h b/src/mesa/program/prog_print.h
index 4ffd5ab96c..78b90aeb4d 100644
--- a/src/mesa/program/prog_print.h
+++ b/src/mesa/program/prog_print.h
@@ -26,6 +26,16 @@
#ifndef PROG_PRINT_H
#define PROG_PRINT_H
+#include <stdio.h>
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+
+struct gl_program;
+struct gl_program_parameter_list;
+struct gl_shader;
+struct prog_instruction;
+
/**
* The output style to use when printing programs.
diff --git a/src/mesa/program/prog_uniform.h b/src/mesa/program/prog_uniform.h
index a671d30bfe..7988d534a7 100644
--- a/src/mesa/program/prog_uniform.h
+++ b/src/mesa/program/prog_uniform.h
@@ -31,8 +31,7 @@
#ifndef PROG_UNIFORM_H
#define PROG_UNIFORM_H
-#include "main/mtypes.h"
-#include "prog_statevars.h"
+#include "main/glheader.h"
/**
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index d99584d63b..3b6d682744 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -55,13 +55,21 @@ _mesa_init_program(GLcontext *ctx)
/*
* If this assertion fails, we need to increase the field
- * size for register indexes.
+ * size for register indexes (see INST_INDEX_BITS).
*/
ASSERT(ctx->Const.VertexProgram.MaxUniformComponents / 4
<= (1 << INST_INDEX_BITS));
ASSERT(ctx->Const.FragmentProgram.MaxUniformComponents / 4
<= (1 << INST_INDEX_BITS));
+ ASSERT(ctx->Const.VertexProgram.MaxTemps <= (1 << INST_INDEX_BITS));
+ ASSERT(ctx->Const.VertexProgram.MaxLocalParams <= (1 << INST_INDEX_BITS));
+ ASSERT(ctx->Const.FragmentProgram.MaxTemps <= (1 << INST_INDEX_BITS));
+ ASSERT(ctx->Const.FragmentProgram.MaxLocalParams <= (1 << INST_INDEX_BITS));
+
+ ASSERT(ctx->Const.VertexProgram.MaxUniformComponents <= 4 * MAX_UNIFORMS);
+ ASSERT(ctx->Const.FragmentProgram.MaxUniformComponents <= 4 * MAX_UNIFORMS);
+
/* If this fails, increase prog_instruction::TexSrcUnit size */
ASSERT(MAX_TEXTURE_UNITS < (1 << 5));
diff --git a/src/mesa/program/programopt.h b/src/mesa/program/programopt.h
index 21fac07849..4af6357f97 100644
--- a/src/mesa/program/programopt.h
+++ b/src/mesa/program/programopt.h
@@ -26,6 +26,7 @@
#ifndef PROGRAMOPT_H
#define PROGRAMOPT_H 1
+#include "main/mtypes.h"
extern void
_mesa_insert_mvp_code(GLcontext *ctx, struct gl_vertex_program *vprog);
diff --git a/src/mesa/slang/slang_builtin.c b/src/mesa/slang/slang_builtin.c
index a7e0efcb7b..179571fab4 100644
--- a/src/mesa/slang/slang_builtin.c
+++ b/src/mesa/slang/slang_builtin.c
@@ -35,8 +35,10 @@
#include "program/prog_instruction.h"
#include "program/prog_parameter.h"
#include "program/prog_statevars.h"
-#include "slang/slang_ir.h"
#include "slang/slang_builtin.h"
+#include "slang/slang_compile_struct.h"
+#include "slang/slang_ir.h"
+#include "slang/slang_typeinfo.h"
/** special state token (see below) */
diff --git a/src/mesa/slang/slang_builtin.h b/src/mesa/slang/slang_builtin.h
index ed9ae80b3c..dc92f83f8e 100644
--- a/src/mesa/slang/slang_builtin.h
+++ b/src/mesa/slang/slang_builtin.h
@@ -26,8 +26,8 @@
#ifndef SLANG_BUILTIN_H
#define SLANG_BUILTIN_H
-#include "program/prog_parameter.h"
-#include "slang_utility.h"
+#include "main/glheader.h"
+#include "main/mtypes.h"
#include "slang_ir.h"
diff --git a/src/mesa/slang/slang_codegen.h b/src/mesa/slang/slang_codegen.h
index 461633fe34..376a8cc264 100644
--- a/src/mesa/slang/slang_codegen.h
+++ b/src/mesa/slang/slang_codegen.h
@@ -27,9 +27,14 @@
#define SLANG_CODEGEN_H
-#include "main/imports.h"
+#include "main/glheader.h"
#include "slang_compile.h"
+#include "slang_compile_variable.h"
+#include "slang_typeinfo.h"
+#include "slang_utility.h"
+#include "slang_vartable.h"
+struct slang_function_;
#define MAX_LOOP_DEPTH 30
diff --git a/src/mesa/slang/slang_compile.c b/src/mesa/slang/slang_compile.c
index 12ab4666ae..de1bb56cd9 100644
--- a/src/mesa/slang/slang_compile.c
+++ b/src/mesa/slang/slang_compile.c
@@ -36,6 +36,7 @@
#include "program/prog_print.h"
#include "program/prog_parameter.h"
#include "../../glsl/pp/sl_pp_public.h"
+#include "../../glsl/pp/sl_pp_purify.h"
#include "../../glsl/cl/sl_cl_parse.h"
#include "slang_codegen.h"
#include "slang_compile.h"
diff --git a/src/mesa/slang/slang_compile.h b/src/mesa/slang/slang_compile.h
index 71fcaa3993..6061f878e7 100644
--- a/src/mesa/slang/slang_compile.h
+++ b/src/mesa/slang/slang_compile.h
@@ -25,13 +25,14 @@
#if !defined SLANG_COMPILE_H
#define SLANG_COMPILE_H
-#include "main/imports.h"
+#include "main/glheader.h"
#include "main/mtypes.h"
-#include "slang_typeinfo.h"
-#include "slang_compile_variable.h"
-#include "slang_compile_struct.h"
-#include "slang_compile_operation.h"
#include "slang_compile_function.h"
+#include "slang_compile_struct.h"
+#include "slang_compile_variable.h"
+#include "slang_utility.h"
+
+struct slang_code_object_;
#if defined __cplusplus
extern "C" {
diff --git a/src/mesa/slang/slang_compile_function.h b/src/mesa/slang/slang_compile_function.h
index a5445ec253..0eced3ca1a 100644
--- a/src/mesa/slang/slang_compile_function.h
+++ b/src/mesa/slang/slang_compile_function.h
@@ -25,6 +25,14 @@
#ifndef SLANG_COMPILE_FUNCTION_H
#define SLANG_COMPILE_FUNCTION_H
+#include "main/glheader.h"
+#include "slang_compile_operation.h"
+#include "slang_compile_variable.h"
+#include "slang_log.h"
+#include "slang_utility.h"
+
+struct slang_name_space_;
+struct slang_operation_;
/**
* Types of functions.
diff --git a/src/mesa/slang/slang_compile_operation.h b/src/mesa/slang/slang_compile_operation.h
index 1f15c19896..b32573e022 100644
--- a/src/mesa/slang/slang_compile_operation.h
+++ b/src/mesa/slang/slang_compile_operation.h
@@ -26,6 +26,11 @@
#define SLANG_COMPILE_OPERATION_H
+#include "main/compiler.h"
+#include "main/glheader.h"
+#include "slang_compile_variable.h"
+#include "slang_utility.h"
+
/**
* Types of slang operations.
* These are the types of the AST (abstract syntax tree) nodes.
diff --git a/src/mesa/slang/slang_compile_struct.h b/src/mesa/slang/slang_compile_struct.h
index 90c5512f4d..7be6f204e1 100644
--- a/src/mesa/slang/slang_compile_struct.h
+++ b/src/mesa/slang/slang_compile_struct.h
@@ -29,6 +29,9 @@
extern "C" {
#endif
+#include "main/glheader.h"
+#include "slang_utility.h"
+
struct slang_function_;
typedef struct slang_struct_scope_
diff --git a/src/mesa/slang/slang_compile_variable.h b/src/mesa/slang/slang_compile_variable.h
index 5c9d248b35..48dc6efca4 100644
--- a/src/mesa/slang/slang_compile_variable.h
+++ b/src/mesa/slang/slang_compile_variable.h
@@ -26,7 +26,9 @@
#define SLANG_COMPILE_VARIABLE_H
-struct slang_ir_storage_;
+#include "main/glheader.h"
+#include "slang_typeinfo.h"
+#include "slang_utility.h"
/**
diff --git a/src/mesa/slang/slang_emit.h b/src/mesa/slang/slang_emit.h
index ab4c202d67..f93d6b00d6 100644
--- a/src/mesa/slang/slang_emit.h
+++ b/src/mesa/slang/slang_emit.h
@@ -25,11 +25,9 @@
#ifndef SLANG_EMIT_H
#define SLANG_EMIT_H
-
-#include "main/imports.h"
-#include "slang_compile.h"
+#include "main/glheader.h"
#include "slang_ir.h"
-#include "main/mtypes.h"
+#include "slang_vartable.h"
extern GLuint
diff --git a/src/mesa/slang/slang_ir.h b/src/mesa/slang/slang_ir.h
index b7a373746b..a010efcb34 100644
--- a/src/mesa/slang/slang_ir.h
+++ b/src/mesa/slang/slang_ir.h
@@ -33,10 +33,11 @@
#define SLANG_IR_H
-#include "main/imports.h"
-#include "slang_compile.h"
-#include "slang_label.h"
+#include "main/glheader.h"
#include "main/mtypes.h"
+#include "program/prog_instruction.h"
+#include "slang_compile_variable.h"
+#include "slang_label.h"
/**
diff --git a/src/mesa/slang/slang_label.c b/src/mesa/slang/slang_label.c
index 8e3a8ebc1a..a161139800 100644
--- a/src/mesa/slang/slang_label.c
+++ b/src/mesa/slang/slang_label.c
@@ -7,6 +7,9 @@
*/
+#include "main/imports.h"
+#include "main/mtypes.h"
+#include "program/prog_instruction.h"
#include "slang_label.h"
#include "slang_mem.h"
diff --git a/src/mesa/slang/slang_label.h b/src/mesa/slang/slang_label.h
index 4d04df18d2..b0cff3a8e8 100644
--- a/src/mesa/slang/slang_label.h
+++ b/src/mesa/slang/slang_label.h
@@ -1,10 +1,9 @@
#ifndef SLANG_LABEL_H
#define SLANG_LABEL_H 1
-#include "main/imports.h"
-#include "main/mtypes.h"
-#include "program/prog_instruction.h"
+#include "main/glheader.h"
+struct gl_program;
struct slang_label_
{
diff --git a/src/mesa/slang/slang_link.c b/src/mesa/slang/slang_link.c
index 94db6f918d..28ad0b74e8 100644
--- a/src/mesa/slang/slang_link.c
+++ b/src/mesa/slang/slang_link.c
@@ -42,6 +42,7 @@
#include "program/prog_statevars.h"
#include "program/prog_uniform.h"
#include "slang_builtin.h"
+#include "slang_compile.h"
#include "slang_link.h"
@@ -1206,11 +1207,11 @@ _slang_link(GLcontext *ctx,
vertNotify = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
&shProg->FragmentProgram->Base);
if (ctx->Shader.Flags & GLSL_DUMP) {
- printf("Mesa pre-link fragment program:\n");
+ fprintf(stderr, "Mesa pre-link fragment program:\n");
_mesa_print_program(&fragProg->Base);
_mesa_print_program_parameters(ctx, &fragProg->Base);
- printf("Mesa post-link fragment program:\n");
+ fprintf(stderr, "Mesa post-link fragment program:\n");
_mesa_print_program(&shProg->FragmentProgram->Base);
_mesa_print_program_parameters(ctx, &shProg->FragmentProgram->Base);
}
@@ -1229,11 +1230,11 @@ _slang_link(GLcontext *ctx,
geomNotify = ctx->Driver.ProgramStringNotify(ctx, MESA_GEOMETRY_PROGRAM,
&shProg->GeometryProgram->Base);
if (ctx->Shader.Flags & GLSL_DUMP) {
- printf("Mesa pre-link geometry program:\n");
+ fprintf(stderr, "Mesa pre-link geometry program:\n");
_mesa_print_program(&geomProg->Base);
_mesa_print_program_parameters(ctx, &geomProg->Base);
- printf("Mesa post-link geometry program:\n");
+ fprintf(stderr, "Mesa post-link geometry program:\n");
_mesa_print_program(&shProg->GeometryProgram->Base);
_mesa_print_program_parameters(ctx, &shProg->GeometryProgram->Base);
}
@@ -1247,11 +1248,11 @@ _slang_link(GLcontext *ctx,
fragNotify = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
&shProg->VertexProgram->Base);
if (ctx->Shader.Flags & GLSL_DUMP) {
- printf("Mesa pre-link vertex program:\n");
+ fprintf(stderr, "Mesa pre-link vertex program:\n");
_mesa_print_program(&vertProg->Base);
_mesa_print_program_parameters(ctx, &vertProg->Base);
- printf("Mesa post-link vertex program:\n");
+ fprintf(stderr, "Mesa post-link vertex program:\n");
_mesa_print_program(&shProg->VertexProgram->Base);
_mesa_print_program_parameters(ctx, &shProg->VertexProgram->Base);
}
@@ -1266,10 +1267,10 @@ _slang_link(GLcontext *ctx,
}
if (ctx->Shader.Flags & GLSL_DUMP) {
- printf("Varying vars:\n");
+ fprintf(stderr, "Varying vars:\n");
_mesa_print_parameter_list(shProg->Varying);
if (shProg->InfoLog) {
- printf("Info Log: %s\n", shProg->InfoLog);
+ fprintf(stderr, "Info Log: %s\n", shProg->InfoLog);
}
}
diff --git a/src/mesa/slang/slang_link.h b/src/mesa/slang/slang_link.h
index 2b44d20787..3e9fa2d743 100644
--- a/src/mesa/slang/slang_link.h
+++ b/src/mesa/slang/slang_link.h
@@ -25,7 +25,7 @@
#ifndef SLANG_LINK_H
#define SLANG_LINK_H 1
-#include "slang_compile.h"
+#include "main/mtypes.h"
extern void
diff --git a/src/mesa/slang/slang_log.h b/src/mesa/slang/slang_log.h
index dcaba0285a..544a26654e 100644
--- a/src/mesa/slang/slang_log.h
+++ b/src/mesa/slang/slang_log.h
@@ -27,6 +27,8 @@
#define SLANG_LOG_H
+#include "main/glheader.h"
+
typedef struct slang_info_log_
{
char *text;
diff --git a/src/mesa/slang/slang_mem.h b/src/mesa/slang/slang_mem.h
index b5bfae2479..0f06df3c0c 100644
--- a/src/mesa/slang/slang_mem.h
+++ b/src/mesa/slang/slang_mem.h
@@ -27,7 +27,7 @@
#define SLANG_MEM_H
-#include "main/imports.h"
+#include "main/glheader.h"
typedef struct slang_mempool_ slang_mempool;
diff --git a/src/mesa/slang/slang_print.h b/src/mesa/slang/slang_print.h
index 46605c8061..99da304143 100644
--- a/src/mesa/slang/slang_print.h
+++ b/src/mesa/slang/slang_print.h
@@ -3,6 +3,12 @@
#ifndef SLANG_PRINT
#define SLANG_PRINT
+#include "main/glheader.h"
+#include "slang_compile_function.h"
+#include "slang_compile_operation.h"
+#include "slang_compile_variable.h"
+#include "slang_typeinfo.h"
+
extern void
slang_print_function(const slang_function *f, GLboolean body);
diff --git a/src/mesa/slang/slang_simplify.h b/src/mesa/slang/slang_simplify.h
index 8689c23b1a..37fb938d4f 100644
--- a/src/mesa/slang/slang_simplify.h
+++ b/src/mesa/slang/slang_simplify.h
@@ -26,6 +26,13 @@
#define SLANG_SIMPLIFY_H
+#include "main/glheader.h"
+#include "slang_compile.h"
+#include "slang_compile_function.h"
+#include "slang_compile_operation.h"
+#include "slang_log.h"
+#include "slang_utility.h"
+
extern GLint
_slang_lookup_constant(const char *name);
diff --git a/src/mesa/slang/slang_storage.h b/src/mesa/slang/slang_storage.h
index 1876a36dd6..de1f1841a3 100644
--- a/src/mesa/slang/slang_storage.h
+++ b/src/mesa/slang/slang_storage.h
@@ -25,7 +25,12 @@
#ifndef SLANG_STORAGE_H
#define SLANG_STORAGE_H
-#include "slang_compile.h"
+#include "main/glheader.h"
+#include "slang_compile_function.h"
+#include "slang_compile_struct.h"
+#include "slang_compile_variable.h"
+#include "slang_typeinfo.h"
+#include "slang_utility.h"
/*
diff --git a/src/mesa/slang/slang_typeinfo.h b/src/mesa/slang/slang_typeinfo.h
index 2251b06325..5ddfe9612c 100644
--- a/src/mesa/slang/slang_typeinfo.h
+++ b/src/mesa/slang/slang_typeinfo.h
@@ -25,11 +25,9 @@
#ifndef SLANG_TYPEINFO_H
#define SLANG_TYPEINFO_H 1
-#include "main/imports.h"
-#include "main/mtypes.h"
+#include "main/glheader.h"
#include "slang_log.h"
#include "slang_utility.h"
-#include "slang_vartable.h"
struct slang_operation_;
diff --git a/src/mesa/slang/slang_utility.h b/src/mesa/slang/slang_utility.h
index 2c0d0bcbb2..cb9b6d2aaa 100644
--- a/src/mesa/slang/slang_utility.h
+++ b/src/mesa/slang/slang_utility.h
@@ -26,6 +26,8 @@
#define SLANG_UTILITY_H
+#include "main/glheader.h"
+
/* Compile-time assertions. If the expression is zero, try to declare an
* array of size [-1] to cause compilation error.
*/
diff --git a/src/mesa/slang/slang_vartable.h b/src/mesa/slang/slang_vartable.h
index 94bcd63f45..97945b89d0 100644
--- a/src/mesa/slang/slang_vartable.h
+++ b/src/mesa/slang/slang_vartable.h
@@ -2,6 +2,9 @@
#ifndef SLANG_VARTABLE_H
#define SLANG_VARTABLE_H
+#include "main/glheader.h"
+#include "slang_utility.h"
+
struct slang_ir_storage_;
typedef struct slang_var_table_ slang_var_table;
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index cebaad5f00..05442ef91b 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -40,7 +40,6 @@
#include "program/program.h"
#include "pipe/p_context.h"
-#include "pipe/p_shader_tokens.h"
#include "util/u_simple_shaders.h"
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 13119ce203..86bb788903 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -543,6 +543,7 @@ st_ReadBuffer(GLcontext *ctx, GLenum buffer)
void st_init_fbo_functions(struct dd_function_table *functions)
{
+#if FEATURE_EXT_framebuffer_object
functions->NewFramebuffer = st_new_framebuffer;
functions->NewRenderbuffer = st_new_renderbuffer;
functions->BindFramebuffer = st_bind_framebuffer;
@@ -550,6 +551,7 @@ void st_init_fbo_functions(struct dd_function_table *functions)
functions->RenderTexture = st_render_texture;
functions->FinishRenderTexture = st_finish_render_texture;
functions->ValidateFramebuffer = st_validate_framebuffer;
+#endif
/* no longer needed by core Mesa, drivers handle resizes...
functions->ResizeBuffers = st_resize_buffers;
*/
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 3eb4708688..2ce5f08753 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -63,7 +63,7 @@
#include "cso_cache/cso_context.h"
-DEBUG_GET_ONCE_BOOL_OPTION(mesa_mvp_dp4, "MESA_MVP_DP4", FALSE);
+DEBUG_GET_ONCE_BOOL_OPTION(mesa_mvp_dp4, "MESA_MVP_DP4", FALSE)
/**
diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c
index 1260fe04b1..df32491d04 100644
--- a/src/mesa/state_tracker/st_debug.c
+++ b/src/mesa/state_tracker/st_debug.c
@@ -56,7 +56,7 @@ static const struct debug_named_value st_debug_flags[] = {
DEBUG_NAMED_VALUE_END
};
-DEBUG_GET_ONCE_FLAGS_OPTION(st_debug, "ST_DEBUG", st_debug_flags, 0);
+DEBUG_GET_ONCE_FLAGS_OPTION(st_debug, "ST_DEBUG", st_debug_flags, 0)
#endif
diff --git a/src/mesa/swrast/s_alpha.h b/src/mesa/swrast/s_alpha.h
index 7a5b72e650..239484a974 100644
--- a/src/mesa/swrast/s_alpha.h
+++ b/src/mesa/swrast/s_alpha.h
@@ -28,7 +28,8 @@
#define S_ALPHA_H
-#include "s_context.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern GLint
diff --git a/src/mesa/swrast/s_atifragshader.c b/src/mesa/swrast/s_atifragshader.c
index 336415d57a..1338b6802d 100644
--- a/src/mesa/swrast/s_atifragshader.c
+++ b/src/mesa/swrast/s_atifragshader.c
@@ -24,6 +24,7 @@
#include "main/macros.h"
#include "main/atifragshader.h"
#include "swrast/s_atifragshader.h"
+#include "swrast/s_context.h"
/**
diff --git a/src/mesa/swrast/s_atifragshader.h b/src/mesa/swrast/s_atifragshader.h
index 871a0c0455..cce455a046 100644
--- a/src/mesa/swrast/s_atifragshader.h
+++ b/src/mesa/swrast/s_atifragshader.h
@@ -27,7 +27,8 @@
#define S_ATIFRAGSHADER_H
-#include "s_context.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
diff --git a/src/mesa/swrast/s_blend.h b/src/mesa/swrast/s_blend.h
index 8d5a81635d..9cedde3bf2 100644
--- a/src/mesa/swrast/s_blend.h
+++ b/src/mesa/swrast/s_blend.h
@@ -27,7 +27,8 @@
#define S_BLEND_H
-#include "s_context.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
diff --git a/src/mesa/swrast/s_context.h b/src/mesa/swrast/s_context.h
index c9755e6da1..6d81f74768 100644
--- a/src/mesa/swrast/s_context.h
+++ b/src/mesa/swrast/s_context.h
@@ -43,6 +43,7 @@
#ifndef S_CONTEXT_H
#define S_CONTEXT_H
+#include "main/compiler.h"
#include "main/mtypes.h"
#include "program/prog_execute.h"
#include "swrast.h"
diff --git a/src/mesa/swrast/s_depth.c b/src/mesa/swrast/s_depth.c
index ed637cac12..f952fd6baa 100644
--- a/src/mesa/swrast/s_depth.c
+++ b/src/mesa/swrast/s_depth.c
@@ -30,7 +30,6 @@
#include "main/imports.h"
#include "s_depth.h"
-#include "s_context.h"
#include "s_span.h"
diff --git a/src/mesa/swrast/s_depth.h b/src/mesa/swrast/s_depth.h
index 7eae366742..878d242f5e 100644
--- a/src/mesa/swrast/s_depth.h
+++ b/src/mesa/swrast/s_depth.h
@@ -27,7 +27,8 @@
#define S_DEPTH_H
-#include "s_context.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern GLuint
diff --git a/src/mesa/swrast/s_fog.h b/src/mesa/swrast/s_fog.h
index 06107de3f9..a496746d10 100644
--- a/src/mesa/swrast/s_fog.h
+++ b/src/mesa/swrast/s_fog.h
@@ -28,7 +28,8 @@
#define S_FOG_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern GLfloat
diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c
index a2c2a10c3d..9facb44d9b 100644
--- a/src/mesa/swrast/s_fragprog.c
+++ b/src/mesa/swrast/s_fragprog.c
@@ -26,6 +26,7 @@
#include "main/colormac.h"
#include "program/prog_instruction.h"
+#include "s_context.h"
#include "s_fragprog.h"
#include "s_span.h"
diff --git a/src/mesa/swrast/s_fragprog.h b/src/mesa/swrast/s_fragprog.h
index e1b7e67918..92b9d01e17 100644
--- a/src/mesa/swrast/s_fragprog.h
+++ b/src/mesa/swrast/s_fragprog.h
@@ -27,7 +27,8 @@
#define S_FRAGPROG_H
-#include "s_context.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
diff --git a/src/mesa/swrast/s_logic.h b/src/mesa/swrast/s_logic.h
index e8cfae33f2..d609513348 100644
--- a/src/mesa/swrast/s_logic.h
+++ b/src/mesa/swrast/s_logic.h
@@ -27,7 +27,8 @@
#define S_LOGIC_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
_swrast_logicop_rgba_span(GLcontext *ctx, struct gl_renderbuffer *rb,
diff --git a/src/mesa/swrast/s_masking.h b/src/mesa/swrast/s_masking.h
index 3ba4f8356c..cb000da0fd 100644
--- a/src/mesa/swrast/s_masking.h
+++ b/src/mesa/swrast/s_masking.h
@@ -27,7 +27,8 @@
#define S_MASKING_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
diff --git a/src/mesa/swrast/s_span.c b/src/mesa/swrast/s_span.c
index 3b3929a41b..8931cdec1b 100644
--- a/src/mesa/swrast/s_span.c
+++ b/src/mesa/swrast/s_span.c
@@ -970,6 +970,10 @@ shade_texture_span(GLcontext *ctx, SWspan *span)
if (span->primitive == GL_BITMAP && span->array->ChanType != GL_FLOAT) {
convert_color_type(span, GL_FLOAT, 0);
}
+ else {
+ span->array->rgba = (void *) span->array->attribs[FRAG_ATTRIB_COL0];
+ }
+
if (span->primitive != GL_POINT ||
(span->interpMask & SPAN_RGBA) ||
ctx->Point.PointSprite) {
@@ -1221,9 +1225,22 @@ _swrast_write_rgba_span( GLcontext *ctx, SWspan *span)
GLchan rgbaSave[MAX_WIDTH][4];
const GLuint fragOutput = multiFragOutputs ? buf : 0;
+ /* set span->array->rgba to colors for render buffer's datatype */
if (rb->DataType != span->array->ChanType || fragOutput > 0) {
convert_color_type(span, rb->DataType, fragOutput);
}
+ else {
+ if (rb->DataType == GL_UNSIGNED_BYTE) {
+ span->array->rgba = span->array->rgba8;
+ }
+ else if (rb->DataType == GL_UNSIGNED_SHORT) {
+ span->array->rgba = (void *) span->array->rgba16;
+ }
+ else {
+ span->array->rgba = (void *)
+ span->array->attribs[FRAG_ATTRIB_COL0];
+ }
+ }
if (!multiFragOutputs && numBuffers > 1) {
/* save colors for second, third renderbuffer writes */
diff --git a/src/mesa/swrast/s_stencil.h b/src/mesa/swrast/s_stencil.h
index cd6cbc57b0..c076ebbe2a 100644
--- a/src/mesa/swrast/s_stencil.h
+++ b/src/mesa/swrast/s_stencil.h
@@ -27,7 +27,8 @@
#define S_STENCIL_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_span.h"
diff --git a/src/mesa/swrast/s_texcombine.h b/src/mesa/swrast/s_texcombine.h
index 9ed96efb87..4f5dfbe1af 100644
--- a/src/mesa/swrast/s_texcombine.h
+++ b/src/mesa/swrast/s_texcombine.h
@@ -27,7 +27,8 @@
#define S_TEXCOMBINE_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
_swrast_texture_span( GLcontext *ctx, SWspan *span );
diff --git a/src/mesa/swrast/s_texfilter.h b/src/mesa/swrast/s_texfilter.h
index 2e265d685c..eceab59658 100644
--- a/src/mesa/swrast/s_texfilter.h
+++ b/src/mesa/swrast/s_texfilter.h
@@ -27,7 +27,8 @@
#define S_TEXFILTER_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_context.h"
extern texture_sample_func
diff --git a/src/mesa/swrast/s_zoom.h b/src/mesa/swrast/s_zoom.h
index 43917be65f..09f624efad 100644
--- a/src/mesa/swrast/s_zoom.h
+++ b/src/mesa/swrast/s_zoom.h
@@ -25,7 +25,8 @@
#ifndef S_ZOOM_H
#define S_ZOOM_H
-#include "swrast.h"
+#include "main/mtypes.h"
+#include "s_span.h"
extern void
diff --git a/src/mesa/swrast_setup/ss_context.h b/src/mesa/swrast_setup/ss_context.h
index 1ec293fade..56551ab273 100644
--- a/src/mesa/swrast_setup/ss_context.h
+++ b/src/mesa/swrast_setup/ss_context.h
@@ -28,9 +28,8 @@
#ifndef SS_CONTEXT_H
#define SS_CONTEXT_H
-#include "main/mtypes.h"
+#include "main/glheader.h"
#include "swrast/swrast.h"
-#include "swrast_setup.h"
#include "tnl/t_context.h"
typedef struct {
diff --git a/src/mesa/swrast_setup/ss_triangle.h b/src/mesa/swrast_setup/ss_triangle.h
index 007fa2e914..ac553cbd01 100644
--- a/src/mesa/swrast_setup/ss_triangle.h
+++ b/src/mesa/swrast_setup/ss_triangle.h
@@ -29,7 +29,7 @@
#ifndef SS_TRIANGLE_H
#define SS_TRIANGLE_H
-#include "ss_context.h"
+#include "main/mtypes.h"
void _swsetup_trifuncs_init( GLcontext *ctx );
diff --git a/src/mesa/swrast_setup/ss_vb.h b/src/mesa/swrast_setup/ss_vb.h
index 2ad1f56f39..944a3b78d8 100644
--- a/src/mesa/swrast_setup/ss_vb.h
+++ b/src/mesa/swrast_setup/ss_vb.h
@@ -30,7 +30,6 @@
#define SS_VB_H
#include "main/mtypes.h"
-#include "swrast_setup.h"
void _swsetup_vb_init( GLcontext *ctx );
void _swsetup_choose_rastersetup_func( GLcontext *ctx );
diff --git a/src/mesa/tnl/t_context.h b/src/mesa/tnl/t_context.h
index ebaae6335b..258906f795 100644
--- a/src/mesa/tnl/t_context.h
+++ b/src/mesa/tnl/t_context.h
@@ -53,9 +53,7 @@
#include "main/bitset.h"
#include "main/mtypes.h"
-#include "math/m_matrix.h"
#include "math/m_vector.h"
-#include "math/m_xform.h"
#include "vbo/vbo.h"
diff --git a/src/mesa/tnl/t_vb_program.c b/src/mesa/tnl/t_vb_program.c
index 3973df9a67..f3a338ef1e 100644
--- a/src/mesa/tnl/t_vb_program.c
+++ b/src/mesa/tnl/t_vb_program.c
@@ -35,6 +35,7 @@
#include "main/colormac.h"
#include "main/macros.h"
#include "main/imports.h"
+#include "math/m_xform.h"
#include "program/prog_instruction.h"
#include "program/prog_statevars.h"
#include "program/prog_execute.h"
diff --git a/src/mesa/tnl/t_vb_render.c b/src/mesa/tnl/t_vb_render.c
index c1bebc9942..7d991009a1 100644
--- a/src/mesa/tnl/t_vb_render.c
+++ b/src/mesa/tnl/t_vb_render.c
@@ -44,6 +44,7 @@
#include "main/macros.h"
#include "main/imports.h"
#include "main/mtypes.h"
+#include "math/m_xform.h"
#include "t_pipeline.h"
diff --git a/src/mesa/vf/vf.h b/src/mesa/vf/vf.h
index 83d7547619..5fe392bbe5 100644
--- a/src/mesa/vf/vf.h
+++ b/src/mesa/vf/vf.h
@@ -28,7 +28,7 @@
#ifndef VF_VERTEX_H
#define VF_VERTEX_H
-#include "main/mtypes.h"
+#include "main/glheader.h"
#include "math/m_vector.h"
enum {
diff --git a/src/mesa/x86/3dnow.h b/src/mesa/x86/3dnow.h
index df9f2638d7..1c1fedcd4f 100644
--- a/src/mesa/x86/3dnow.h
+++ b/src/mesa/x86/3dnow.h
@@ -31,8 +31,6 @@
#ifndef __3DNOW_H__
#define __3DNOW_H__
-#include "math/m_xform.h"
-
void _mesa_init_3dnow_transform_asm( void );
#endif
diff --git a/src/mesa/x86/mmx.h b/src/mesa/x86/mmx.h
index 5641936bdb..47a0d4b54d 100644
--- a/src/mesa/x86/mmx.h
+++ b/src/mesa/x86/mmx.h
@@ -26,6 +26,9 @@
#ifndef ASM_MMX_H
#define ASM_MMX_H
+#include "main/compiler.h"
+#include "main/mtypes.h"
+
extern void _ASMAPI
_mesa_mmx_blend_transparency( GLcontext *ctx, GLuint n, const GLubyte mask[],
GLvoid *rgba, const GLvoid *dest,
diff --git a/src/mesa/x86/sse.h b/src/mesa/x86/sse.h
index 521f91e411..e92ddc1394 100644
--- a/src/mesa/x86/sse.h
+++ b/src/mesa/x86/sse.h
@@ -31,8 +31,6 @@
#ifndef __SSE_H__
#define __SSE_H__
-#include "math/m_xform.h"
-
void _mesa_init_sse_transform_asm( void );
#endif