diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
43 files changed, 1571 insertions, 1896 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index e3ca863fe5..7c3ac0c14e 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -81,7 +81,6 @@ DRIVER_SOURCES = \ brw_wm_emit.c \ brw_wm_fp.c \ brw_wm_iz.c \ - brw_wm_glsl.c \ brw_wm_pass0.c \ brw_wm_pass1.c \ brw_wm_pass2.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index a8369b07c3..d3a1233aac 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -232,3 +232,28 @@ const struct brw_tracked_state brw_cc_unit = { .prepare = prepare_cc_unit, .emit = upload_cc_unit, }; + +static void upload_blend_constant_color(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + struct brw_blend_constant_color bcc; + + memset(&bcc, 0, sizeof(bcc)); + bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc.header.length = sizeof(bcc)/4-2; + bcc.blend_constant_color[0] = ctx->Color.BlendColor[0]; + bcc.blend_constant_color[1] = ctx->Color.BlendColor[1]; + bcc.blend_constant_color[2] = ctx->Color.BlendColor[2]; + bcc.blend_constant_color[3] = ctx->Color.BlendColor[3]; + + BRW_CACHED_BATCH_STRUCT(brw, &bcc); +} + +const struct brw_tracked_state brw_blend_constant_color = { + .dirty = { + .mesa = _NEW_COLOR, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .emit = upload_blend_constant_color +}; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index cb0a8b96c9..28549f2574 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -122,9 +122,6 @@ GLboolean brwCreateContext( int api, (i == MESA_SHADER_FRAGMENT); ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp = (i == MESA_SHADER_FRAGMENT); - - if (intel->gen == 6) - ctx->ShaderCompilerOptions[i].EmitNoIfs = (i == MESA_SHADER_VERTEX); } ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 335339515a..7069724466 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -171,7 +171,6 @@ struct brw_vertex_program { struct brw_fragment_program { struct gl_fragment_program program; GLuint id; /**< serial no. to identify frag progs, never re-used */ - GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ /** for debugging, which texture units are referenced */ GLbitfield tex_units_used; @@ -211,6 +210,7 @@ struct brw_wm_prog_data { GLuint nr_params; /**< number of float params/constants */ GLuint nr_pull_params; GLboolean error; + int dispatch_width; /* Pointer to tracked values (only valid once * _mesa_load_state_parameters has been called at runtime). diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 7b823eb201..877b22fec1 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -242,21 +242,13 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - if (vp->use_const_buffer) { - /* Load the subset of push constants that will get used when - * we also have a pull constant buffer. - */ - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - assert(brw->vs.constant_map[i] <= nr); - memcpy(buf + offset + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - } - } - } else { - for (i = 0; i < nr; i++) { - memcpy(buf + offset + i * 4, + /* Load the subset of push constants that will get used when + * we also have a pull constant buffer. + */ + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + assert(brw->vs.constant_map[i] <= nr); + memcpy(buf + offset + brw->vs.constant_map[i] * 4, vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); } diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 239586a036..b48a30d6be 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -462,6 +462,13 @@ #define BRW_COMPRESSION_2NDHALF 1 #define BRW_COMPRESSION_COMPRESSED 2 +#define GEN6_COMPRESSION_1Q 0 +#define GEN6_COMPRESSION_2Q 1 +#define GEN6_COMPRESSION_3Q 2 +#define GEN6_COMPRESSION_4Q 3 +#define GEN6_COMPRESSION_1H 0 +#define GEN6_COMPRESSION_2H 2 + #define BRW_CONDITIONAL_NONE 0 #define BRW_CONDITIONAL_Z 1 #define BRW_CONDITIONAL_NZ 2 @@ -899,6 +906,8 @@ # define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) # define GEN6_VS_SAMPLER_COUNT_SHIFT 27 # define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) /* DW4 */ # define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 # define GEN6_VS_URB_READ_LENGTH_SHIFT 11 @@ -1022,6 +1031,13 @@ # define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 # define ATTRIBUTE_0_SWIZZLE_SHIFT 6 # define ATTRIBUTE_0_SOURCE_SHIFT 0 + +# define ATTRIBUTE_SWIZZLE_INPUTATTR 0 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3 +# define ATTRIBUTE_SWIZZLE_SHIFT 6 + /* DW16: Point sprite texture coordinate enables */ /* DW17: Constant interpolation enables */ /* DW18: attr 0-7 wrap shortest enables */ @@ -1034,6 +1050,8 @@ # define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) # define GEN6_WM_SAMPLER_COUNT_SHIFT 27 # define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16) /* DW3: scratch space */ /* DW4 */ # define GEN6_WM_STATISTICS_ENABLE (1 << 31) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 962c04128b..6b61f7af15 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -899,7 +899,8 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) err |= dest (file, inst); } else if (gen >= 6 && (inst->header.opcode == BRW_OPCODE_IF || inst->header.opcode == BRW_OPCODE_ELSE || - inst->header.opcode == BRW_OPCODE_ENDIF)) { + inst->header.opcode == BRW_OPCODE_ENDIF || + inst->header.opcode == BRW_OPCODE_WHILE)) { format (file, " %d", inst->bits1.branch_gen6.jump_count); } diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index a1f403ca4e..7eb16b71f4 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -159,7 +159,7 @@ static void brw_emit_prim(struct brw_context *brw, } if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, - sizeof(prim_packet)); + sizeof(prim_packet), false); } if (intel->always_flush_cache) { intel_batchbuffer_emit_mi_flush(intel->batch); @@ -351,7 +351,8 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx, * an upper bound of how much we might emit in a single * brw_try_draw_prims(). */ - intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4); + intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4, + false); hw_prim = brw_set_prim(brw, &prim[i]); diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index 2ff39e8e64..3b5c4c071e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -72,7 +72,37 @@ void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control ) { - p->current->header.compression_control = compression_control; + p->compressed = (compression_control == BRW_COMPRESSION_COMPRESSED); + + if (p->brw->intel.gen >= 6) { + /* Since we don't use the 32-wide support in gen6, we translate + * the pre-gen6 compression control here. + */ + switch (compression_control) { + case BRW_COMPRESSION_NONE: + /* This is the "use the first set of bits of dmask/vmask/arf + * according to execsize" option. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1Q; + break; + case BRW_COMPRESSION_2NDHALF: + /* For 8-wide, this is "use the second set of 8 bits." */ + p->current->header.compression_control = GEN6_COMPRESSION_2Q; + break; + case BRW_COMPRESSION_COMPRESSED: + /* For 16-wide instruction compression, use the first set of 16 bits + * since we don't do 32-wide dispatch. + */ + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + default: + assert(!"not reached"); + p->current->header.compression_control = GEN6_COMPRESSION_1H; + break; + } + } else { + p->current->header.compression_control = compression_control; + } } void brw_set_mask_control( struct brw_compile *p, GLuint value ) @@ -95,6 +125,7 @@ void brw_push_insn_state( struct brw_compile *p ) { assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->compressed_stack[p->current - p->stack] = p->compressed; p->current++; } @@ -102,6 +133,7 @@ void brw_pop_insn_state( struct brw_compile *p ) { assert(p->current != p->stack); p->current--; + p->compressed = p->compressed_stack[p->current - p->stack]; } @@ -112,6 +144,7 @@ void brw_init_compile( struct brw_context *brw, struct brw_compile *p ) p->brw = brw; p->nr_insn = 0; p->current = p->stack; + p->compressed = false; memset(p->current, 0, sizeof(p->current[0])); /* Some defaults? diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index b4538e6e8a..4dbdc52210 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -33,6 +33,7 @@ #ifndef BRW_EU_H #define BRW_EU_H +#include <stdbool.h> #include "brw_structs.h" #include "brw_defines.h" #include "program/prog_instruction.h" @@ -106,10 +107,12 @@ struct brw_compile { /* Allow clients to push/pop instruction state: */ struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + bool compressed_stack[BRW_EU_MAX_INSN_STACK]; struct brw_instruction *current; GLuint flag_value; GLboolean single_program_flow; + bool compressed; struct brw_context *brw; struct brw_glsl_label *first_label; /**< linked list of labels */ @@ -954,6 +957,8 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *patch_insn); struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count); +struct brw_instruction *brw_CONT_gen6(struct brw_compile *p, + struct brw_instruction *do_insn); struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count); /* Forward jumps: */ @@ -1009,6 +1014,7 @@ void brw_math_invert( struct brw_compile *p, void brw_set_src1( struct brw_instruction *insn, struct brw_reg reg ); +void brw_set_uip_jip(struct brw_compile *p); /* brw_optimize.c */ void brw_optimize(struct brw_compile *p); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 9cb941dacf..9c764fe779 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -41,19 +41,20 @@ * Internal helper for constructing instructions */ -static void guess_execution_size( struct brw_instruction *insn, - struct brw_reg reg ) +static void guess_execution_size(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) { - if (reg.width == BRW_WIDTH_8 && - insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + if (reg.width == BRW_WIDTH_8 && p->compressed) insn->header.execution_size = BRW_EXECUTE_16; else insn->header.execution_size = reg.width; /* note - definitions are compatible */ } -static void brw_set_dest( struct brw_instruction *insn, - struct brw_reg dest ) +static void brw_set_dest(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg dest) { if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && dest.file != BRW_MESSAGE_REGISTER_FILE) @@ -100,7 +101,7 @@ static void brw_set_dest( struct brw_instruction *insn, /* NEW: Set the execution size based on dest.width and * insn->compression_control: */ - guess_execution_size(insn, dest); + guess_execution_size(p, insn, dest); } extern int reg_type_size[]; @@ -629,7 +630,7 @@ static struct brw_instruction *brw_alu1( struct brw_compile *p, struct brw_reg src ) { struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); return insn; } @@ -641,7 +642,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p, struct brw_reg src1 ) { struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); return insn; @@ -680,7 +681,7 @@ void brw_##OP(struct brw_compile *p, \ { \ struct brw_instruction *rnd, *add; \ rnd = next_insn(p, BRW_OPCODE_##OP); \ - brw_set_dest(rnd, dest); \ + brw_set_dest(p, rnd, dest); \ brw_set_src0(rnd, src); \ rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ \ @@ -779,7 +780,7 @@ struct brw_instruction *brw_MUL(struct brw_compile *p, void brw_NOP(struct brw_compile *p) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_ud(0x0)); } @@ -840,11 +841,11 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) /* Override the defaults for this instruction: */ if (intel->gen < 6) { - brw_set_dest(insn, brw_ip_reg()); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); @@ -870,7 +871,7 @@ brw_IF_gen6(struct brw_compile *p, uint32_t conditional, insn = next_insn(p, BRW_OPCODE_IF); - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); insn->header.execution_size = BRW_EXECUTE_8; insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(insn, src0); @@ -905,11 +906,11 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, } if (intel->gen < 6) { - brw_set_dest(insn, brw_ip_reg()); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = 0; brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); @@ -965,11 +966,11 @@ void brw_ENDIF(struct brw_compile *p, struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); if (intel->gen < 6) { - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_set_src1(insn, brw_imm_d(0x0)); } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_imm_w(0)); brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); } @@ -1029,16 +1030,44 @@ void brw_ENDIF(struct brw_compile *p, struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); - brw_set_dest(insn, brw_ip_reg()); + if (intel->gen >= 6) { + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + + return insn; +} + +struct brw_instruction *brw_CONT_gen6(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + int br = 2; + + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); + + insn->bits3.break_cont.uip = br * (do_insn - insn); + insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = BRW_EXECUTE_8; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - insn->bits3.if_else.pad0 = 0; - insn->bits3.if_else.pop_count = pop_count; return insn; } @@ -1046,7 +1075,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) { struct brw_instruction *insn; insn = next_insn(p, BRW_OPCODE_CONTINUE); - brw_set_dest(insn, brw_ip_reg()); + brw_set_dest(p, insn, brw_ip_reg()); brw_set_src0(insn, brw_ip_reg()); brw_set_src1(insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; @@ -1058,17 +1087,33 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) } /* DO/WHILE loop: + * + * The DO/WHILE is just an unterminated loop -- break or continue are + * used for control within the loop. We have a few ways they can be + * done. + * + * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, + * jip and no DO instruction. + * + * For non-uniform control flow pre-gen6, there's a DO instruction to + * push the mask, and a WHILE to jump back, and BREAK to get out and + * pop the mask. + * + * For gen6, there's no more mask stack, so no need for DO. WHILE + * just points back to the first instruction of the loop. */ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) { - if (p->single_program_flow) { + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6 || p->single_program_flow) { return &p->store[p->nr_insn]; } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); /* Override the defaults for this instruction: */ - brw_set_dest(insn, brw_null_reg()); + brw_set_dest(p, insn, brw_null_reg()); brw_set_src0(insn, brw_null_reg()); brw_set_src1(insn, brw_null_reg()); @@ -1094,34 +1139,42 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, if (intel->gen >= 5) br = 2; - if (p->single_program_flow) - insn = next_insn(p, BRW_OPCODE_ADD); - else + if (intel->gen >= 6) { insn = next_insn(p, BRW_OPCODE_WHILE); - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); + brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = do_insn->header.execution_size; + assert(insn->header.execution_size == BRW_EXECUTE_8); + } else { + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); - if (p->single_program_flow) { - insn->header.execution_size = BRW_EXECUTE_1; + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d((do_insn - insn) * 16)); + insn->header.execution_size = BRW_EXECUTE_1; + } else { + insn = next_insn(p, BRW_OPCODE_WHILE); - insn->bits3.d = (do_insn - insn) * 16; - } else { - insn->header.execution_size = do_insn->header.execution_size; + assert(do_insn->header.opcode == BRW_OPCODE_DO); - assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); - insn->bits3.if_else.pop_count = 0; - insn->bits3.if_else.pad0 = 0; - } + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0)); -/* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.execution_size = do_insn->header.execution_size; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; } @@ -1159,7 +1212,7 @@ void brw_CMP(struct brw_compile *p, struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); insn->header.destreg__conditionalmod = conditional; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); @@ -1184,7 +1237,7 @@ void brw_WAIT (struct brw_compile *p) struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); struct brw_reg src = brw_notification_1_reg(); - brw_set_dest(insn, src); + brw_set_dest(p, insn, src); brw_set_src0(insn, src); brw_set_src1(insn, brw_null_reg()); insn->header.execution_size = 0; /* must */ @@ -1219,6 +1272,10 @@ void brw_math( struct brw_compile *p, assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { assert(src.type == BRW_REGISTER_TYPE_F); @@ -1228,8 +1285,9 @@ void brw_math( struct brw_compile *p, * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_src1(insn, brw_null_reg()); } else { @@ -1242,7 +1300,7 @@ void brw_math( struct brw_compile *p, insn->header.predicate_control = 0; insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, @@ -1284,12 +1342,18 @@ void brw_math2(struct brw_compile *p, assert(src1.type == BRW_REGISTER_TYPE_F); } + /* Source modifiers are ignored for extended math instructions. */ + assert(!src0.negate); + assert(!src0.abs); + assert(!src1.negate); + assert(!src1.abs); + /* Math is the same ISA format as other opcodes, except that CondModifier * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, src1); } @@ -1318,8 +1382,13 @@ void brw_math_16( struct brw_compile *p, * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; + + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_src1(insn, brw_null_reg()); return; @@ -1334,7 +1403,7 @@ void brw_math_16( struct brw_compile *p, insn = next_insn(p, BRW_OPCODE_SEND); insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, @@ -1351,7 +1420,7 @@ void brw_math_16( struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_2NDHALF; insn->header.destreg__conditionalmod = msg_reg_nr+1; - brw_set_dest(insn, offset(dest,1)); + brw_set_dest(p, insn, offset(dest,1)); brw_set_src0(insn, src); brw_set_math_message(p->brw, insn, @@ -1446,7 +1515,7 @@ void brw_oword_block_write_scratch(struct brw_compile *p, send_commit_msg = 1; } - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_write_message(p->brw, @@ -1516,7 +1585,7 @@ brw_oword_block_read_scratch(struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = mrf.nr; - brw_set_dest(insn, dest); /* UW? */ + brw_set_dest(p, insn, dest); /* UW? */ brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, @@ -1569,7 +1638,7 @@ void brw_oword_block_read(struct brw_compile *p, /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); if (intel->gen >= 6) { brw_set_src0(insn, mrf); } else { @@ -1614,7 +1683,7 @@ void brw_dword_scattered_read(struct brw_compile *p, /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, brw_null_reg()); brw_set_dp_read_message(p->brw, @@ -1639,29 +1708,21 @@ void brw_dp_READ_4_vs(struct brw_compile *p, GLuint location, GLuint bind_table_index) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_reg_nr = 1; - struct brw_reg b; - /* - printf("vs const read msg, location %u, msg_reg_nr %d\n", - location, msg_reg_nr); - */ + if (intel->gen >= 6) + location /= 16; /* Setup MRF[1] with location/offset into const buffer */ brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - /* XXX I think we're setting all the dwords of MRF[1] to 'location'. - * when the docs say only dword[2] should be set. Hmmm. But it works. - */ - b = brw_message_reg(msg_reg_nr); - b = retype(b, BRW_REGISTER_TYPE_UD); - /*b = get_element_ud(b, 2);*/ - brw_MOV(p, b, brw_imm_ud(location)); - + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), + BRW_REGISTER_TYPE_UD), + brw_imm_ud(location)); brw_pop_insn_state(p); insn = next_insn(p, BRW_OPCODE_SEND); @@ -1671,8 +1732,12 @@ void brw_dp_READ_4_vs(struct brw_compile *p, insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; - brw_set_dest(insn, dest); - brw_set_src0(insn, brw_null_reg()); + brw_set_dest(p, insn, dest); + if (intel->gen >= 6) { + brw_set_src0(insn, brw_message_reg(msg_reg_nr)); + } else { + brw_set_src0(insn, brw_null_reg()); + } brw_set_dp_read_message(p->brw, insn, @@ -1706,7 +1771,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, /* M1.0 is block offset 0, M1.4 is block offset 1, all other * fields ignored. */ - brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), + brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), addr_reg, brw_imm_d(offset)); brw_pop_insn_state(p); @@ -1717,7 +1782,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, insn->header.destreg__conditionalmod = 0; insn->header.mask_control = BRW_MASK_DISABLE; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, brw_vec8_grf(0, 0)); if (intel->gen == 6) @@ -1782,7 +1847,7 @@ void brw_fb_WRITE(struct brw_compile *p, else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_dp_write_message(p->brw, insn, @@ -1860,7 +1925,7 @@ void brw_SAMPLE(struct brw_compile *p, struct brw_reg m1 = brw_message_reg(msg_reg_nr); - guess_execution_size(p->current, dest); + guess_execution_size(p, p->current, dest); if (p->current->header.execution_size == BRW_EXECUTE_16) dispatch_16 = GL_TRUE; @@ -1895,12 +1960,15 @@ void brw_SAMPLE(struct brw_compile *p, * and the first message register index comes from src0. */ if (intel->gen >= 6) { - brw_push_insn_state(p); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - /* m1 contains header? */ - brw_MOV(p, brw_message_reg(msg_reg_nr), src0); - brw_pop_insn_state(p); - src0 = brw_message_reg(msg_reg_nr); + if (src0.file != BRW_ARCHITECTURE_REGISTER_FILE || + src0.nr != BRW_ARF_NULL) { + brw_push_insn_state(p); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, retype(brw_message_reg(msg_reg_nr), src0.type), src0); + brw_pop_insn_state(p); + } + src0 = brw_message_reg(msg_reg_nr); } insn = next_insn(p, BRW_OPCODE_SEND); @@ -1909,7 +1977,7 @@ void brw_SAMPLE(struct brw_compile *p, if (intel->gen < 6) insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_sampler_message(p->brw, insn, binding_table_index, @@ -1970,7 +2038,7 @@ void brw_urb_WRITE(struct brw_compile *p, assert(msg_length < BRW_MAX_MRF); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); @@ -1989,6 +2057,80 @@ void brw_urb_WRITE(struct brw_compile *p, swizzle); } +static int +brw_find_next_block_end(struct brw_compile *p, int start) +{ + int ip; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_WHILE: + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* There is no DO instruction on gen6, so to find the end of the loop + * we have to see if the loop is jumping back before our start + * instruction. + */ +static int +brw_find_loop_end(struct brw_compile *p, int start) +{ + int ip; + int br = 2; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + if (insn->header.opcode == BRW_OPCODE_WHILE) { + if (ip + insn->bits1.branch_gen6.jump_count / br < start) + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* After program generation, go back and update the UIP and JIP of + * BREAK and CONT instructions to their correct locations. + */ +void +brw_set_uip_jip(struct brw_compile *p) +{ + struct intel_context *intel = &p->brw->intel; + int ip; + int br = 2; + + if (intel->gen < 6) + return; + + for (ip = 0; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_BREAK: + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1); + break; + case BRW_OPCODE_CONTINUE: + /* JIP is set at CONTINUE emit time, since that's when we + * know where the start of the loop is. + */ + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + assert(insn->bits3.break_cont.uip != 0); + assert(insn->bits3.break_cont.jip != 0); + break; + } + } +} + void brw_ff_sync(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -2013,7 +2155,7 @@ void brw_ff_sync(struct brw_compile *p, } insn = next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(insn, dest); + brw_set_dest(p, insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 283d5aad49..4eead32cbb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -101,10 +101,12 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) clone_ir_list(mem_ctx, shader->ir, shader->base.ir); do_mat_op_to_vec(shader->ir); - do_mod_to_fract(shader->ir); - do_div_to_mul_rcp(shader->ir); - do_sub_to_add_neg(shader->ir); - do_explog_to_explog2(shader->ir); + lower_instructions(shader->ir, + MOD_TO_FRACT | + DIV_TO_MUL_RCP | + SUB_TO_ADD_NEG | + EXP_TO_EXP2 | + LOG_TO_LOG2); do_lower_texture_projection(shader->ir); brw_do_cubemap_normalize(shader->ir); @@ -130,6 +132,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) GL_TRUE, /* temp */ GL_TRUE /* uniform */ ) || progress; + progress = lower_quadop_vector(shader->ir, false) || progress; } while (progress); validate_ir_tree(shader->ir); @@ -174,6 +177,46 @@ type_size(const struct glsl_type *type) } } +/** + * Returns how many MRFs an FS opcode will write over. + * + * Note that this is not the 0 or 1 implied writes in an actual gen + * instruction -- the FS opcodes often generate MOVs in addition. + */ +int +fs_visitor::implied_mrf_writes(fs_inst *inst) +{ + if (inst->mlen == 0) + return 0; + + switch (inst->opcode) { + case FS_OPCODE_RCP: + case FS_OPCODE_RSQ: + case FS_OPCODE_SQRT: + case FS_OPCODE_EXP2: + case FS_OPCODE_LOG2: + case FS_OPCODE_SIN: + case FS_OPCODE_COS: + return 1; + case FS_OPCODE_POW: + return 2; + case FS_OPCODE_TEX: + case FS_OPCODE_TXB: + case FS_OPCODE_TXL: + return 1; + case FS_OPCODE_FB_WRITE: + return 2; + case FS_OPCODE_PULL_CONSTANT_LOAD: + case FS_OPCODE_UNSPILL: + return 1; + case FS_OPCODE_SPILL: + return 2; + default: + assert(!"not reached"); + return inst->mlen; + } +} + int fs_visitor::virtual_grf_alloc(int size) { @@ -299,6 +342,10 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type) case GLSL_TYPE_BOOL: c->prog_data.param_convert[param] = PARAM_CONVERT_F2B; break; + default: + assert(!"not reached"); + c->prog_data.param_convert[param] = PARAM_NO_CONVERT; + break; } c->prog_data.param[param] = &vec_values[i]; @@ -400,6 +447,7 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) fs_reg wpos = *reg; fs_reg neg_y = this->pixel_y; neg_y.negate = true; + bool flip = !ir->origin_upper_left ^ c->key.render_to_fbo; /* gl_FragCoord.x */ if (ir->pixel_center_integer) { @@ -410,13 +458,13 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) wpos.reg_offset++; /* gl_FragCoord.y */ - if (ir->origin_upper_left && ir->pixel_center_integer) { + if (!flip && ir->pixel_center_integer) { emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); } else { fs_reg pixel_y = this->pixel_y; float offset = (ir->pixel_center_integer ? 0.0 : 0.5); - if (!ir->origin_upper_left) { + if (flip) { pixel_y.negate = true; offset += c->key.drawable_height - 1.0; } @@ -426,8 +474,13 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) wpos.reg_offset++; /* gl_FragCoord.z */ - emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, - interp_reg(FRAG_ATTRIB_WPOS, 2))); + if (intel->gen >= 6) { + emit(fs_inst(BRW_OPCODE_MOV, wpos, + fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); + } else { + emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, + interp_reg(FRAG_ATTRIB_WPOS, 2))); + } wpos.reg_offset++; /* gl_FragCoord.w: Already set up in emit_interpolation */ @@ -552,8 +605,13 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) * might be able to do better by doing execsize = 1 math and then * expanding that result out, but we would need to be careful with * masking. + * + * The hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. */ - if (intel->gen >= 6 && src.file == UNIFORM) { + if (intel->gen >= 6 && (src.file == UNIFORM || + src.abs || + src.negate)) { fs_reg expanded = fs_reg(this, glsl_type::float_type); emit(fs_inst(BRW_OPCODE_MOV, expanded, src)); src = expanded; @@ -696,6 +754,27 @@ fs_visitor::visit(ir_dereference_array *ir) } } +/* Instruction selection: Produce a MOV.sat instead of + * MIN(MAX(val, 0), 1) when possible. + */ +bool +fs_visitor::try_emit_saturate(ir_expression *ir) +{ + ir_rvalue *sat_val = ir->as_rvalue_to_saturate(); + + if (!sat_val) + return false; + + sat_val->accept(this); + fs_reg src = this->result; + + this->result = fs_reg(this, ir->type); + fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, src)); + inst->saturate = true; + + return true; +} + void fs_visitor::visit(ir_expression *ir) { @@ -703,6 +782,11 @@ fs_visitor::visit(ir_expression *ir) fs_reg op[2], temp; fs_inst *inst; + assert(ir->get_num_operands() <= 2); + + if (try_emit_saturate(ir)) + return; + for (operand = 0; operand < ir->get_num_operands(); operand++) { ir->operands[operand]->accept(this); if (this->result.file == BAD_FILE) { @@ -773,9 +857,11 @@ fs_visitor::visit(ir_expression *ir) assert(!"not reached: should be handled by ir_explog_to_explog2"); break; case ir_unop_sin: + case ir_unop_sin_reduced: emit_math(FS_OPCODE_SIN, this->result, op[0]); break; case ir_unop_cos: + case ir_unop_cos_reduced: emit_math(FS_OPCODE_COS, this->result, op[0]); break; @@ -849,7 +935,6 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_dot: - case ir_binop_cross: case ir_unop_any: assert(!"not reached: should be handled by brw_fs_channel_expressions"); break; @@ -858,6 +943,10 @@ fs_visitor::visit(ir_expression *ir) assert(!"not reached: should be handled by lower_noise"); break; + case ir_quadop_vector: + assert(!"not reached: should be handled by lower_quadop_vector"); + break; + case ir_unop_sqrt: emit_math(FS_OPCODE_SQRT, this->result, op[0]); break; @@ -1348,28 +1437,70 @@ fs_visitor::visit(ir_discard *ir) void fs_visitor::visit(ir_constant *ir) { - fs_reg reg(this, ir->type); - this->result = reg; + /* Set this->result to reg at the bottom of the function because some code + * paths will cause this visitor to be applied to other fields. This will + * cause the value stored in this->result to be modified. + * + * Make reg constant so that it doesn't get accidentally modified along the + * way. Yes, I actually had this problem. :( + */ + const fs_reg reg(this, ir->type); + fs_reg dst_reg = reg; - for (unsigned int i = 0; i < ir->type->vector_elements; i++) { - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i]))); - break; - case GLSL_TYPE_UINT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i]))); - break; - case GLSL_TYPE_INT: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i]))); - break; - case GLSL_TYPE_BOOL: - emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i]))); - break; - default: - assert(!"Non-float/uint/int/bool constant"); + if (ir->type->is_array()) { + const unsigned size = type_size(ir->type->fields.array); + + for (unsigned i = 0; i < ir->type->length; i++) { + ir->array_elements[i]->accept(this); + fs_reg src_reg = this->result; + + dst_reg.type = src_reg.type; + for (unsigned j = 0; j < size; j++) { + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); + src_reg.reg_offset++; + dst_reg.reg_offset++; + } + } + } else if (ir->type->is_record()) { + foreach_list(node, &ir->components) { + ir_instruction *const field = (ir_instruction *) node; + const unsigned size = type_size(field->type); + + field->accept(this); + fs_reg src_reg = this->result; + + dst_reg.type = src_reg.type; + for (unsigned j = 0; j < size; j++) { + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, src_reg)); + src_reg.reg_offset++; + dst_reg.reg_offset++; + } + } + } else { + const unsigned size = type_size(ir->type); + + for (unsigned i = 0; i < size; i++) { + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.f[i]))); + break; + case GLSL_TYPE_UINT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.u[i]))); + break; + case GLSL_TYPE_INT: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg(ir->value.i[i]))); + break; + case GLSL_TYPE_BOOL: + emit(fs_inst(BRW_OPCODE_MOV, dst_reg, fs_reg((int)ir->value.b[i]))); + break; + default: + assert(!"Non-float/uint/int/bool constant"); + } + dst_reg.reg_offset++; } - reg.reg_offset++; } + + this->result = reg; } void @@ -1381,6 +1512,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) fs_reg op[2]; fs_inst *inst; + assert(expr->get_num_operands() <= 2); for (unsigned int i = 0; i < expr->get_num_operands(); i++) { assert(expr->operands[i]->type->is_scalar()); @@ -1488,6 +1620,7 @@ fs_visitor::emit_if_gen6(ir_if *ir) fs_inst *inst; fs_reg temp; + assert(expr->get_num_operands() <= 2); for (unsigned int i = 0; i < expr->get_num_operands(); i++) { assert(expr->operands[i]->type->is_scalar()); @@ -1497,7 +1630,7 @@ fs_visitor::emit_if_gen6(ir_if *ir) switch (expr->operation) { case ir_unop_logic_not: - inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(1))); + inst = emit(fs_inst(BRW_OPCODE_IF, temp, op[0], fs_reg(0))); inst->conditional_mod = BRW_CONDITIONAL_Z; return; @@ -1874,7 +2007,7 @@ fs_visitor::emit_interpolation_setup_gen6() emit(fs_inst(BRW_OPCODE_MOV, this->pixel_y, int_pixel_y)); this->current_annotation = "compute 1/pos.w"; - this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0)); + this->wpos_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); this->pixel_w = fs_reg(this, glsl_type::float_type); emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); @@ -1902,17 +2035,17 @@ fs_visitor::emit_fb_writes() nr += 2; } - if (c->key.aa_dest_stencil_reg) { + if (c->aa_dest_stencil_reg) { emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0)))); + fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0)))); } /* Reserve space for color. It'll be filled in per MRT below. */ int color_mrf = nr; nr += 4; - if (c->key.source_depth_to_render_target) { - if (c->key.computes_depth) { + if (c->source_depth_to_render_target) { + if (c->computes_depth) { /* Hand over gl_FragDepth. */ assert(this->frag_depth); fs_reg depth = *(variable_storage(this->frag_depth)); @@ -1921,20 +2054,22 @@ fs_visitor::emit_fb_writes() } else { /* Pass through the payload depth. */ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0)))); + fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); } } - if (c->key.dest_depth_reg) { + if (c->dest_depth_reg) { emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), - fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0)))); + fs_reg(brw_vec8_grf(c->dest_depth_reg, 0)))); } fs_reg color = reg_undef; if (this->frag_color) color = *(variable_storage(this->frag_color)); - else if (this->frag_data) + else if (this->frag_data) { color = *(variable_storage(this->frag_data)); + color.type = BRW_REGISTER_TYPE_F; + } for (int target = 0; target < c->key.nr_color_regions; target++) { this->current_annotation = talloc_asprintf(this->mem_ctx, @@ -2375,7 +2510,7 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) void fs_visitor::assign_curb_setup() { - c->prog_data.first_curbe_grf = c->key.nr_payload_regs; + c->prog_data.first_curbe_grf = c->nr_payload_regs; c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8; /* Map the offsets in the UNIFORM file to fixed HW regs. */ @@ -2515,6 +2650,7 @@ fs_visitor::split_virtual_grfs() for (int j = 2; j < this->virtual_grf_sizes[i]; j++) { int reg = virtual_grf_alloc(1); assert(reg == new_virtual_grf[i] + j - 1); + (void) reg; } this->virtual_grf_sizes[i] = 1; } @@ -2768,6 +2904,7 @@ fs_visitor::propagate_constants() } break; case BRW_OPCODE_CMP: + case BRW_OPCODE_SEL: if (i == 1) { scan_inst->src[i] = inst->src[0]; progress = true; @@ -2796,26 +2933,17 @@ bool fs_visitor::dead_code_eliminate() { bool progress = false; - int num_vars = this->virtual_grf_next; - bool dead[num_vars]; - - for (int i = 0; i < num_vars; i++) { - dead[i] = this->virtual_grf_def[i] >= this->virtual_grf_use[i]; - - if (dead[i]) { - /* Mark off its interval so it won't interfere with anything. */ - this->virtual_grf_def[i] = -1; - this->virtual_grf_use[i] = -1; - } - } + int pc = 0; foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); - if (inst->dst.file == GRF && dead[inst->dst.reg]) { + if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) { inst->remove(); progress = true; } + + pc++; } return progress; @@ -2933,11 +3061,60 @@ fs_visitor::compute_to_mrf() /* Found a move of a GRF to a MRF. Let's see if we can go * rewrite the thing that made this GRF to write into the MRF. */ - bool found = false; fs_inst *scan_inst; for (scan_inst = (fs_inst *)inst->prev; scan_inst->prev != NULL; scan_inst = (fs_inst *)scan_inst->prev) { + if (scan_inst->dst.file == GRF && + scan_inst->dst.reg == inst->src[0].reg) { + /* Found the last thing to write our reg we want to turn + * into a compute-to-MRF. + */ + + if (scan_inst->opcode == FS_OPCODE_TEX) { + /* texturing writes several continuous regs, so we can't + * compute-to-mrf that. + */ + break; + } + + /* If it's predicated, it (probably) didn't populate all + * the channels. + */ + if (scan_inst->predicated) + break; + + /* SEND instructions can't have MRF as a destination. */ + if (scan_inst->mlen) + break; + + if (intel->gen >= 6) { + /* gen6 math instructions must have the destination be + * GRF, so no compute-to-MRF for them. + */ + if (scan_inst->opcode == FS_OPCODE_RCP || + scan_inst->opcode == FS_OPCODE_RSQ || + scan_inst->opcode == FS_OPCODE_SQRT || + scan_inst->opcode == FS_OPCODE_EXP2 || + scan_inst->opcode == FS_OPCODE_LOG2 || + scan_inst->opcode == FS_OPCODE_SIN || + scan_inst->opcode == FS_OPCODE_COS || + scan_inst->opcode == FS_OPCODE_POW) { + break; + } + } + + if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { + /* Found the creator of our MRF's source value. */ + scan_inst->dst.file = MRF; + scan_inst->dst.hw_reg = inst->dst.hw_reg; + scan_inst->saturate |= inst->saturate; + inst->remove(); + progress = true; + } + break; + } + /* We don't handle flow control here. Most computation of * values that end up in MRFs are shortly before the MRF * write anyway. @@ -2971,71 +3148,88 @@ fs_visitor::compute_to_mrf() } if (scan_inst->mlen > 0) { - /* Found a SEND instruction, which will do some amount of - * implied write that may overwrite our MRF that we were - * hoping to compute-to-MRF somewhere above it. Nothing - * we have implied-writes more than 2 MRFs from base_mrf, - * though. + /* Found a SEND instruction, which means that there are + * live values in MRFs from base_mrf to base_mrf + + * scan_inst->mlen - 1. Don't go pushing our MRF write up + * above it. */ - int implied_write_len = MIN2(scan_inst->mlen, 2); if (inst->dst.hw_reg >= scan_inst->base_mrf && - inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) { + inst->dst.hw_reg < scan_inst->base_mrf + scan_inst->mlen) { break; } } + } + } - if (scan_inst->dst.file == GRF && - scan_inst->dst.reg == inst->src[0].reg) { - /* Found the last thing to write our reg we want to turn - * into a compute-to-MRF. - */ + return progress; +} - if (scan_inst->opcode == FS_OPCODE_TEX) { - /* texturing writes several continuous regs, so we can't - * compute-to-mrf that. - */ - break; - } +/** + * Walks through basic blocks, locking for repeated MRF writes and + * removing the later ones. + */ +bool +fs_visitor::remove_duplicate_mrf_writes() +{ + fs_inst *last_mrf_move[16]; + bool progress = false; - /* If it's predicated, it (probably) didn't populate all - * the channels. - */ - if (scan_inst->predicated) - break; + memset(last_mrf_move, 0, sizeof(last_mrf_move)); - /* SEND instructions can't have MRF as a destination. */ - if (scan_inst->mlen) - break; + foreach_iter(exec_list_iterator, iter, this->instructions) { + fs_inst *inst = (fs_inst *)iter.get(); - if (intel->gen >= 6) { - /* gen6 math instructions must have the destination be - * GRF, so no compute-to-MRF for them. - */ - if (scan_inst->opcode == FS_OPCODE_RCP || - scan_inst->opcode == FS_OPCODE_RSQ || - scan_inst->opcode == FS_OPCODE_SQRT || - scan_inst->opcode == FS_OPCODE_EXP2 || - scan_inst->opcode == FS_OPCODE_LOG2 || - scan_inst->opcode == FS_OPCODE_SIN || - scan_inst->opcode == FS_OPCODE_COS || - scan_inst->opcode == FS_OPCODE_POW) { - break; - } - } + switch (inst->opcode) { + case BRW_OPCODE_DO: + case BRW_OPCODE_WHILE: + case BRW_OPCODE_IF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_ENDIF: + memset(last_mrf_move, 0, sizeof(last_mrf_move)); + continue; + default: + break; + } - if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { - /* Found the creator of our MRF's source value. */ - found = true; - break; + if (inst->opcode == BRW_OPCODE_MOV && + inst->dst.file == MRF) { + fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg]; + if (prev_inst && inst->equals(prev_inst)) { + inst->remove(); + progress = true; + continue; + } + } + + /* Clear out the last-write records for MRFs that were overwritten. */ + if (inst->dst.file == MRF) { + last_mrf_move[inst->dst.hw_reg] = NULL; + } + + if (inst->mlen > 0) { + /* Found a SEND instruction, which will include two of fewer + * implied MRF writes. We could do better here. + */ + for (int i = 0; i < implied_mrf_writes(inst); i++) { + last_mrf_move[inst->base_mrf + i] = NULL; + } + } + + /* Clear out any MRF move records whose sources got overwritten. */ + if (inst->dst.file == GRF) { + for (unsigned int i = 0; i < Elements(last_mrf_move); i++) { + if (last_mrf_move[i] && + last_mrf_move[i]->src[0].reg == inst->dst.reg) { + last_mrf_move[i] = NULL; } } } - if (found) { - scan_inst->dst.file = MRF; - scan_inst->dst.hw_reg = inst->dst.hw_reg; - scan_inst->saturate |= inst->saturate; - inst->remove(); - progress = true; + + if (inst->opcode == BRW_OPCODE_MOV && + inst->dst.file == MRF && + inst->src[0].file == GRF && + !inst->predicated) { + last_mrf_move[inst->dst.hw_reg] = inst; } } @@ -3091,6 +3285,7 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) break; default: assert(!"not reached"); + brw_reg = brw_null_reg(); break; } break; @@ -3105,6 +3300,10 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) assert(!"not reached"); brw_reg = brw_null_reg(); break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; } if (reg->abs) brw_reg = brw_abs(brw_reg); @@ -3159,6 +3358,7 @@ fs_visitor::generate_code() brw_set_conditionalmod(p, inst->conditional_mod); brw_set_predicate_control(p, inst->predicated); + brw_set_saturate(p, inst->saturate); switch (inst->opcode) { case BRW_OPCODE_MOV: @@ -3245,7 +3445,11 @@ fs_visitor::generate_code() brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case BRW_OPCODE_CONTINUE: - brw_CONT(p, if_depth_in_loop[loop_stack_depth]); + /* FINISHME: We need to write the loop instruction support still. */ + if (intel->gen >= 6) + brw_CONT_gen6(p, loop_stack[loop_stack_depth - 1]); + else + brw_CONT(p, if_depth_in_loop[loop_stack_depth]); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; @@ -3259,16 +3463,18 @@ fs_visitor::generate_code() assert(loop_stack_depth > 0); loop_stack_depth--; inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]); - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_stack[loop_stack_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_stack[loop_stack_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } } } } @@ -3340,12 +3546,31 @@ fs_visitor::generate_code() ((uint32_t *)&p->store[i])[0]); } brw_disasm(stdout, &p->store[i], intel->gen); - printf("\n"); } } last_native_inst = p->nr_insn; } + + brw_set_uip_jip(p); + + /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS + * emit issues, it doesn't get the jump distances into the output, + * which is often something we want to debug. So this is here in + * case you're doing that. + */ + if (0) { + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { + for (unsigned int i = 0; i < p->nr_insn; i++) { + printf("0x%08x 0x%08x 0x%08x 0x%08x ", + ((uint32_t *)&p->store[i])[3], + ((uint32_t *)&p->store[i])[2], + ((uint32_t *)&p->store[i])[1], + ((uint32_t *)&p->store[i])[0]); + brw_disasm(stdout, &p->store[i], intel->gen); + } + } + } } GLboolean @@ -3410,6 +3635,9 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) bool progress; do { progress = false; + + progress = v.remove_duplicate_mrf_writes() || progress; + v.calculate_live_intervals(); progress = v.propagate_constants() || progress; progress = v.register_coalesce() || progress; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 9b7fcde858..de7b15312a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -96,10 +96,7 @@ public: void init() { - this->reg = 0; - this->reg_offset = 0; - this->negate = 0; - this->abs = 0; + memset(this, 0, sizeof(*this)); this->hw_reg = -1; this->smear = -1; } @@ -151,6 +148,21 @@ public: fs_reg(enum register_file file, int hw_reg, uint32_t type); fs_reg(class fs_visitor *v, const struct glsl_type *type); + bool equals(fs_reg *r) + { + return (file == r->file && + reg == r->reg && + reg_offset == r->reg_offset && + hw_reg == r->hw_reg && + type == r->type && + negate == r->negate && + abs == r->abs && + memcmp(&fixed_hw_reg, &r->fixed_hw_reg, + sizeof(fixed_hw_reg)) == 0 && + smear == r->smear && + imm.u == r->imm.u); + } + /** Register file: ARF, GRF, MRF, IMM. */ enum register_file file; /** virtual register number. 0 = fixed hw reg */ @@ -174,6 +186,10 @@ public: } imm; }; +static const fs_reg reg_undef; +static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); +static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); + class fs_inst : public exec_node { public: /* Callers of this talloc-based new need not call delete. It's @@ -190,18 +206,14 @@ public: void init() { + memset(this, 0, sizeof(*this)); this->opcode = BRW_OPCODE_NOP; - this->saturate = false; this->conditional_mod = BRW_CONDITIONAL_NONE; - this->predicated = false; - this->sampler = 0; - this->target = 0; - this->eot = false; - this->header_present = false; - this->shadow_compare = false; - this->mlen = 0; - this->base_mrf = 0; - this->offset = 0; + + this->dst = reg_undef; + this->src[0] = reg_undef; + this->src[1] = reg_undef; + this->src[2] = reg_undef; } fs_inst() @@ -273,6 +285,26 @@ public: assert(src[2].reg_offset >= 0); } + bool equals(fs_inst *inst) + { + return (opcode == inst->opcode && + dst.equals(&inst->dst) && + src[0].equals(&inst->src[0]) && + src[1].equals(&inst->src[1]) && + src[2].equals(&inst->src[2]) && + saturate == inst->saturate && + predicated == inst->predicated && + conditional_mod == inst->conditional_mod && + mlen == inst->mlen && + base_mrf == inst->base_mrf && + sampler == inst->sampler && + target == inst->target && + eot == inst->eot && + header_present == inst->header_present && + shadow_compare == inst->shadow_compare && + offset == inst->offset); + } + int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ fs_reg dst; fs_reg src[3]; @@ -375,6 +407,7 @@ public: bool register_coalesce(); bool compute_to_mrf(); bool dead_code_eliminate(); + bool remove_duplicate_mrf_writes(); bool virtual_grf_interferes(int a, int b); void generate_code(); void generate_fb_write(fs_inst *inst); @@ -400,6 +433,7 @@ public: fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate); fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0); fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1); + bool try_emit_saturate(ir_expression *ir); void emit_bool_to_cond_code(ir_rvalue *condition); void emit_if_gen6(ir_if *ir); void emit_unspill(fs_inst *inst, fs_reg reg, uint32_t spill_offset); @@ -411,6 +445,7 @@ public: struct brw_reg interp_reg(int location, int channel); int setup_uniform_values(int loc, const glsl_type *type); void setup_builtin_uniform_values(ir_variable *ir); + int implied_mrf_writes(fs_inst *inst); struct brw_context *brw; const struct gl_fragment_program *fp; @@ -454,9 +489,5 @@ public: int grf_used; }; -static const fs_reg reg_undef; -static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F); -static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); - GLboolean brw_do_channel_expressions(struct exec_list *instructions); GLboolean brw_do_vector_splitting(struct exec_list *instructions); diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 2a6da4058b..20bfa4c3ea 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -205,6 +205,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_round_even: case ir_unop_sin: case ir_unop_cos: + case ir_unop_sin_reduced: + case ir_unop_cos_reduced: case ir_unop_dFdx: case ir_unop_dFdy: for (i = 0; i < vector_elements; i++) { @@ -288,34 +290,6 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) break; } - case ir_binop_cross: { - for (i = 0; i < vector_elements; i++) { - int swiz0 = (i + 1) % 3; - int swiz1 = (i + 2) % 3; - ir_expression *temp1, *temp2; - - temp1 = new(mem_ctx) ir_expression(ir_binop_mul, - element_type, - get_element(op_var[0], swiz0), - get_element(op_var[1], swiz1)); - - temp2 = new(mem_ctx) ir_expression(ir_binop_mul, - element_type, - get_element(op_var[1], swiz0), - get_element(op_var[0], swiz1)); - - temp2 = new(mem_ctx) ir_expression(ir_unop_neg, - element_type, - temp2, - NULL); - - assign(ir, i, new(mem_ctx) ir_expression(ir_binop_add, - element_type, - temp1, temp2)); - } - break; - } - case ir_binop_logic_and: case ir_binop_logic_xor: case ir_binop_logic_or: @@ -356,6 +330,9 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_noise: assert(!"noise should have been broken down to function call"); break; + case ir_quadop_vector: + assert(!"should have been lowered"); + break; } ir->remove(); diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index b0c76f4094..73b41fdbce 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -166,6 +166,9 @@ static void populate_key( struct brw_context *brw, struct brw_gs_prog_key *key ) { struct gl_context *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + int prim_gs_always; + memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG */ @@ -185,10 +188,14 @@ static void populate_key( struct brw_context *brw, key->pv_first = GL_TRUE; } - key->need_gs_prog = (key->hint_gs_always || - brw->primitive == GL_QUADS || + if (intel->gen == 6) + prim_gs_always = brw->primitive == GL_LINE_LOOP; + else + prim_gs_always = brw->primitive == GL_QUADS || brw->primitive == GL_QUAD_STRIP || - brw->primitive == GL_LINE_LOOP); + brw->primitive == GL_LINE_LOOP; + + key->need_gs_prog = (key->hint_gs_always || prim_gs_always); } /* Calculate interpolants for triangle and line rasterization. @@ -205,8 +212,10 @@ static void prepare_gs_prog(struct brw_context *brw) brw->gs.prog_active = key.need_gs_prog; } + drm_intel_bo_unreference(brw->gs.prog_bo); + brw->gs.prog_bo = NULL; + if (brw->gs.prog_active) { - drm_intel_bo_unreference(brw->gs.prog_bo); brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, &key, sizeof(key), NULL, 0, diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 1d350bc041..a91b0528fa 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -38,40 +38,6 @@ #include "brw_state.h" #include "brw_defines.h" - - - - -/*********************************************************************** - * Blend color - */ - -static void upload_blend_constant_color(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->intel.ctx; - struct brw_blend_constant_color bcc; - - memset(&bcc, 0, sizeof(bcc)); - bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; - bcc.header.length = sizeof(bcc)/4-2; - bcc.blend_constant_color[0] = ctx->Color.BlendColor[0]; - bcc.blend_constant_color[1] = ctx->Color.BlendColor[1]; - bcc.blend_constant_color[2] = ctx->Color.BlendColor[2]; - bcc.blend_constant_color[3] = ctx->Color.BlendColor[3]; - - BRW_CACHED_BATCH_STRUCT(brw, &bcc); -} - - -const struct brw_tracked_state brw_blend_constant_color = { - .dirty = { - .mesa = _NEW_COLOR, - .brw = BRW_NEW_CONTEXT, - .cache = 0 - }, - .emit = upload_blend_constant_color -}; - /* Constant single cliprect for framebuffer object or DRI2 drawing */ static void upload_drawing_rect(struct brw_context *brw) { @@ -339,6 +305,9 @@ static void upload_polygon_stipple(struct brw_context *brw) struct brw_polygon_stipple bps; GLuint i; + if (!ctx->Polygon.StippleFlag) + return; + memset(&bps, 0, sizeof(bps)); bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; bps.header.length = sizeof(bps)/4-2; @@ -381,6 +350,9 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) struct gl_context *ctx = &brw->intel.ctx; struct brw_polygon_stipple_offset bpso; + if (!ctx->Polygon.StippleFlag) + return; + memset(&bpso, 0, sizeof(bpso)); bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; bpso.header.length = sizeof(bpso)/4-2; @@ -409,7 +381,7 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) const struct brw_tracked_state brw_polygon_stipple_offset = { .dirty = { - .mesa = _NEW_WINDOW_POS, + .mesa = _NEW_WINDOW_POS | _NEW_POLYGONSTIPPLE, .brw = BRW_NEW_CONTEXT, .cache = 0 }, @@ -421,9 +393,10 @@ const struct brw_tracked_state brw_polygon_stipple_offset = { */ static void upload_aa_line_parameters(struct brw_context *brw) { + struct gl_context *ctx = &brw->intel.ctx; struct brw_aa_line_parameters balp; - if (!brw->has_aa_line_parameters) + if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters) return; /* use legacy aa line coverage computation */ @@ -436,7 +409,7 @@ static void upload_aa_line_parameters(struct brw_context *brw) const struct brw_tracked_state brw_aa_line_parameters = { .dirty = { - .mesa = 0, + .mesa = _NEW_LINE, .brw = BRW_NEW_CONTEXT, .cache = 0 }, @@ -454,6 +427,9 @@ static void upload_line_stipple(struct brw_context *brw) GLfloat tmp; GLint tmpi; + if (!ctx->Line.StippleFlag) + return; + memset(&bls, 0, sizeof(bls)); bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; bls.header.length = sizeof(bls)/4 - 2; diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 1367d81469..94efa79109 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -142,7 +142,6 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx, if (newFP == curFP) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; - newFP->isGLSL = brw_wm_is_glsl(fprog); /* Don't reject fragment shaders for their Mesa IR state when we're * using the new FS backend. diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 3beed16945..4bb93e7336 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -164,7 +164,8 @@ void brw_destroy_caches( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c */ -#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s))) +#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data(brw->intel.batch, (s), \ + sizeof(*(s)), false) #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) GLboolean brw_cached_batch_struct( struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index be3989eb7d..a21af13caa 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -48,7 +48,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, struct header *newheader = (struct header *)data; if (brw->emit_state_always) { - intel_batchbuffer_data(brw->intel.batch, data, sz); + intel_batchbuffer_data(brw->intel.batch, data, sz, false); return GL_TRUE; } @@ -75,7 +75,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, emit: memcpy(item->header, newheader, sz); - intel_batchbuffer_data(brw->intel.batch, data, sz); + intel_batchbuffer_data(brw->intel.batch, data, sz, false); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index b79b33c2e3..e262887471 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -107,14 +107,14 @@ static void dump_wm_surface_state(struct brw_context *brw) char name[20]; if (surf_bo == NULL) { - fprintf(stderr, " WM SS%d: NULL\n", i); + fprintf(stderr, "WM SURF%d: NULL\n", i); continue; } drm_intel_bo_map(surf_bo, GL_FALSE); surfoff = surf_bo->offset + brw->wm.surf_offset[i]; surf = (struct brw_surface_state *)(surf_bo->virtual + brw->wm.surf_offset[i]); - sprintf(name, "WM SS%d", i); + sprintf(name, "WM SURF%d", i); state_out(name, surf, surfoff, 0, "%s %s\n", get_965_surfacetype(surf->ss0.surface_type), get_965_surface_format(surf->ss0.surface_format)); @@ -132,6 +132,53 @@ static void dump_wm_surface_state(struct brw_context *brw) } } + +static void dump_wm_sampler_state(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->intel.ctx; + int i; + + if (!brw->wm.sampler_bo) { + fprintf(stderr, "WM_SAMPLER: NULL\n"); + return; + } + + drm_intel_bo_map(brw->wm.sampler_bo, GL_FALSE); + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + unsigned int offset; + struct brw_sampler_state *samp; + struct brw_sampler_default_color *sdc; + char name[20]; + + if (!ctx->Texture.Unit[i]._ReallyEnabled) { + fprintf(stderr, "WM SAMP%d: disabled\n", i); + continue; + } + + offset = brw->wm.sampler_bo->offset + + i * sizeof(struct brw_sampler_state); + samp = (struct brw_sampler_state *)(brw->wm.sampler_bo->virtual + + i * sizeof(struct brw_sampler_state)); + + sprintf(name, "WM SAMP%d", i); + state_out(name, samp, offset, 0, "filtering\n"); + state_out(name, samp, offset, 1, "wrapping, lod\n"); + state_out(name, samp, offset, 2, "default color pointer\n"); + state_out(name, samp, offset, 3, "chroma key, aniso\n"); + + sprintf(name, " WM SDC%d", i); + + drm_intel_bo_map(brw->wm.sdc_bo[i], GL_FALSE); + sdc = (struct brw_sampler_default_color *)(brw->wm.sdc_bo[i]->virtual); + state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 0, "r\n"); + state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 1, "g\n"); + state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 2, "b\n"); + state_out(name, sdc, brw->wm.sdc_bo[i]->offset, 3, "a\n"); + drm_intel_bo_unmap(brw->wm.sdc_bo[i]); + } + drm_intel_bo_unmap(brw->wm.sampler_bo); +} + static void dump_sf_viewport_state(struct brw_context *brw) { const char *name = "SF VP"; @@ -324,6 +371,7 @@ void brw_debug_batch(struct intel_context *intel) state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces); dump_wm_surface_state(brw); + dump_wm_sampler_state(brw); if (intel->gen < 6) state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state)); diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 338f3876b3..eba4411ca7 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -129,7 +129,7 @@ const struct brw_tracked_state *gen6_atoms[] = &brw_vs_constants, /* Before vs_surfaces and constant_buffer */ &brw_wm_constants, /* Before wm_surfaces and constant_buffer */ - &gen6_wm_constants, /* Before wm_surfaces and constant_buffer */ + &gen6_wm_constants, /* Before wm_state */ &brw_vs_surfaces, /* must do before unit */ &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 8ce9af9c4f..461f27048c 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1064,6 +1064,15 @@ struct brw_sampler_default_color { GLfloat color[4]; }; +struct gen5_sampler_default_color { + uint8_t ub[4]; + float f[4]; + uint16_t hf[4]; + uint16_t us[4]; + int16_t s[4]; + uint8_t b[4]; +}; + struct brw_sampler_state { @@ -1169,7 +1178,12 @@ struct brw_surface_state GLuint cube_neg_y:1; GLuint cube_pos_x:1; GLuint cube_neg_x:1; - GLuint pad:4; + GLuint pad:2; + /* Required on gen6 for surfaces accessed through render cache messages. + */ + GLuint render_cache_read_write:1; + /* Ironlake and newer: instead of replicating one of the texels */ + GLuint cube_corner_average:1; GLuint mipmap_layout_mode:1; GLuint vert_line_stride_ofs:1; GLuint vert_line_stride:1; @@ -1539,6 +1553,21 @@ struct brw_instruction GLuint pad0:12; } if_else; + struct + { + /* Signed jump distance to the ip to jump to if all channels + * are disabled after the break or continue. It should point + * to the end of the innermost control flow block, as that's + * where some channel could get re-enabled. + */ + int jip:16; + + /* Signed jump distance to the location to resume execution + * of this channel if it's enabled for the break or continue. + */ + int uip:16; + } break_cont; + struct { GLuint function:4; GLuint int_type:1; diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 4a41c7a517..6ae75d22c1 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -99,8 +99,8 @@ static void do_vs_prog( struct brw_context *brw, (void) ctx; aux_size = sizeof(c.prog_data); - if (c.vp->use_const_buffer) - aux_size += c.vp->program.Base.Parameters->NumParameters; + /* constant_map */ + aux_size += c.vp->program.Base.Parameters->NumParameters; drm_intel_bo_unreference(brw->vs.prog_bo); brw->vs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_VS_PROG, @@ -130,6 +130,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); + key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_POINT */ if (ctx->Point.PointSprite) { @@ -157,7 +158,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT, + .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 9338a6b7db..0b88cc1ec7 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -44,6 +44,7 @@ struct brw_vs_prog_key { GLuint nr_userclip:4; GLuint copy_edgeflag:1; GLuint point_coord_replace:8; + GLuint two_side_color: 1; }; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 7e43324a1f..326bb1e562 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -140,9 +140,13 @@ clear_current_const(struct brw_vs_compile *c) static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { struct intel_context *intel = &c->func.brw->intel; - GLuint i, reg = 0, mrf; + GLuint i, reg = 0, mrf, j; int attributes_in_vue; int first_reladdr_output; + int max_constant; + int constant = 0; + int vert_result_reoder[VERT_RESULT_MAX]; + int bfc = 0; /* Determine whether to use a real constant buffer or use a block * of GRF registers for constants. The later is faster but only @@ -181,62 +185,81 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } - /* Vertex program parameters from curbe: + /* Assign some (probably all) of the vertex program constants to + * the push constant buffer/CURBE. + * + * There's an obvious limit to the numer of push constants equal to + * the number of register available, and that number is smaller + * than the minimum maximum number of vertex program parameters, so + * support for pull constants is required if we overflow. + * Additionally, on gen6 the number of push constants is even + * lower. + * + * When there's relative addressing, we don't know what range of + * Mesa IR registers can be accessed. And generally, when relative + * addressing is used we also have too many constants to load them + * all as push constants. So, we'll just support relative + * addressing out of the pull constant buffers, and try to load as + * many statically-accessed constants into the push constant buffer + * as we can. */ - if (c->vp->use_const_buffer) { - int max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; - int constant = 0; - - /* We've got more constants than we can load with the push - * mechanism. This is often correlated with reladdr loads where - * we should probably be using a pull mechanism anyway to avoid - * excessive reading. However, the pull mechanism is slow in - * general. So, we try to allocate as many non-reladdr-loaded - * constants through the push buffer as we can before giving up. - */ - memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); - for (i = 0; - i < c->vp->program.Base.NumInstructions && constant < max_constant; - i++) { - struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; - int arg; - - for (arg = 0; arg < 3 && constant < max_constant; arg++) { - if ((inst->SrcReg[arg].File != PROGRAM_STATE_VAR && - inst->SrcReg[arg].File != PROGRAM_CONSTANT && - inst->SrcReg[arg].File != PROGRAM_UNIFORM && - inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && - inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) || - inst->SrcReg[arg].RelAddr) - continue; - - if (c->constant_map[inst->SrcReg[arg].Index] == -1) { - c->constant_map[inst->SrcReg[arg].Index] = constant++; - } + if (intel->gen >= 6) { + /* We can only load 32 regs of push constants. */ + max_constant = 32 * 2 - c->key.nr_userclip; + } else { + max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries; + } + + /* constant_map maps from ParameterValues[] index to index in the + * push constant buffer, or -1 if it's only in the pull constant + * buffer. + */ + memset(c->constant_map, -1, c->vp->program.Base.Parameters->NumParameters); + for (i = 0; + i < c->vp->program.Base.NumInstructions && constant < max_constant; + i++) { + struct prog_instruction *inst = &c->vp->program.Base.Instructions[i]; + int arg; + + for (arg = 0; arg < 3 && constant < max_constant; arg++) { + if (inst->SrcReg[arg].File != PROGRAM_STATE_VAR && + inst->SrcReg[arg].File != PROGRAM_CONSTANT && + inst->SrcReg[arg].File != PROGRAM_UNIFORM && + inst->SrcReg[arg].File != PROGRAM_ENV_PARAM && + inst->SrcReg[arg].File != PROGRAM_LOCAL_PARAM) { + continue; } - } - for (i = 0; i < constant; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, - (i%2) * 4), - 0, 4, 1); + if (inst->SrcReg[arg].RelAddr) { + c->vp->use_const_buffer = GL_TRUE; + continue; + } + + if (c->constant_map[inst->SrcReg[arg].Index] == -1) { + c->constant_map[inst->SrcReg[arg].Index] = constant++; + } } - reg += (constant + 1) / 2; - c->prog_data.curb_read_length = reg - 1; - /* XXX 0 causes a bug elsewhere... */ - c->prog_data.nr_params = MAX2(constant * 4, 4); } - else { - /* use a section of the GRF for constants */ - GLuint nr_params = c->vp->program.Base.Parameters->NumParameters; - for (i = 0; i < nr_params; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); - } - reg += (nr_params + 1) / 2; - c->prog_data.curb_read_length = reg - 1; - c->prog_data.nr_params = nr_params * 4; + /* If we ran out of push constant space, then we'll also upload all + * constants through the pull constant buffer so that they can be + * accessed no matter what. For relative addressing (the common + * case) we need them all in place anyway. + */ + if (constant == max_constant) + c->vp->use_const_buffer = GL_TRUE; + + for (i = 0; i < constant; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride(brw_vec4_grf(reg + i / 2, + (i % 2) * 4), + 0, 4, 1); } + reg += (constant + 1) / 2; + c->prog_data.curb_read_length = reg - 1; + c->prog_data.nr_params = constant * 4; + /* XXX 0 causes a bug elsewhere... */ + if (intel->gen < 6 && c->prog_data.nr_params == 0) + c->prog_data.nr_params = 4; /* Allocate input regs: */ @@ -270,7 +293,36 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) mrf = 4; first_reladdr_output = get_first_reladdr_output(&c->vp->program); - for (i = 0; i < VERT_RESULT_MAX; i++) { + + for (i = 0; i < VERT_RESULT_MAX; i++) + vert_result_reoder[i] = i; + + /* adjust attribute order in VUE for BFC0/BFC1 on Gen6+ */ + if (intel->gen >= 6 && c->key.two_side_color) { + if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) && + (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) { + assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)); + assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)); + bfc = 2; + } else if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) && + (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))) + bfc = 1; + + if (bfc) { + for (i = 0; i < bfc; i++) { + vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 0] = VERT_RESULT_COL0 + i; + vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 1] = VERT_RESULT_BFC0 + i; + } + + for (i = VERT_RESULT_COL0 + bfc * 2; i < VERT_RESULT_BFC0 + bfc; i++) { + vert_result_reoder[i] = i - bfc; + } + } + } + + for (j = 0; j < VERT_RESULT_MAX; j++) { + i = vert_result_reoder[j]; + if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { c->nr_outputs++; assert(i < Elements(c->regs[PROGRAM_OUTPUT])); @@ -281,7 +333,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) else if (i == VERT_RESULT_PSIZ) { c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; - mrf++; /* just a placeholder? XXX fix later stages & remove this */ } else { /* Two restrictions on our compute-to-MRF here. The @@ -574,9 +625,18 @@ static void emit_max( struct brw_compile *p, struct brw_reg arg0, struct brw_reg arg1 ) { - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); - brw_SEL(p, dst, arg0, arg1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); + brw_SEL(p, dst, arg0, arg1); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } else { + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0, arg1); + brw_SEL(p, dst, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } } static void emit_min( struct brw_compile *p, @@ -584,9 +644,34 @@ static void emit_min( struct brw_compile *p, struct brw_reg arg0, struct brw_reg arg1 ) { - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); - brw_SEL(p, dst, arg0, arg1); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_SEL(p, dst, arg0, arg1); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } else { + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } +} + +static void emit_arl(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) { + struct brw_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F); + + brw_RNDD(p, dst_f, src); + brw_MOV(p, dst, dst_f); + } else { + brw_RNDD(p, dst, src); + } } static void emit_math1_gen4(struct brw_vs_compile *c, @@ -680,7 +765,7 @@ emit_math1(struct brw_vs_compile *c, emit_math1_gen4(c, function, dst, arg0, precision); } -static void emit_math2( struct brw_vs_compile *c, +static void emit_math2_gen4( struct brw_vs_compile *c, GLuint function, struct brw_reg dst, struct brw_reg arg0, @@ -688,14 +773,11 @@ static void emit_math2( struct brw_vs_compile *c, GLuint precision) { struct brw_compile *p = &c->func; - struct intel_context *intel = &p->brw->intel; struct brw_reg tmp = dst; GLboolean need_tmp = GL_FALSE; - if (dst.file != BRW_GENERAL_REGISTER_FILE) - need_tmp = GL_TRUE; - - if (intel->gen < 6 && dst.dw1.bits.writemask != 0xf) + if (dst.file != BRW_GENERAL_REGISTER_FILE || + dst.dw1.bits.writemask != 0xf) need_tmp = GL_TRUE; if (need_tmp) @@ -718,6 +800,53 @@ static void emit_math2( struct brw_vs_compile *c, } } +static void emit_math2_gen6( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint precision) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp_src0, tmp_src1, tmp_dst; + + tmp_src0 = get_tmp(c); + tmp_src1 = get_tmp(c); + tmp_dst = get_tmp(c); + + brw_MOV(p, tmp_src0, arg0); + brw_MOV(p, tmp_src1, arg1); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_math2(p, + tmp_dst, + function, + tmp_src0, + tmp_src1); + brw_set_access_mode(p, BRW_ALIGN_16); + + brw_MOV(p, dst, tmp_dst); + + release_tmp(c, tmp_src0); + release_tmp(c, tmp_src1); + release_tmp(c, tmp_dst); +} + +static void emit_math2( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint precision) +{ + struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) + emit_math2_gen6(c, function, dst, arg0, arg1, precision); + else + emit_math2_gen4(c, function, dst, arg0, arg1, precision); +} static void emit_exp_noalias( struct brw_vs_compile *c, struct brw_reg dst, @@ -990,8 +1119,6 @@ get_constant(struct brw_vs_compile *c, assert(argIndex < 3); - assert(c->func.brw->intel.gen < 6); /* FINISHME */ - if (c->current_const[argIndex].index != src->Index) { /* Keep track of the last constant loaded in this slot, for reuse. */ c->current_const[argIndex].index = src->Index; @@ -1022,14 +1149,14 @@ get_reladdr_constant(struct brw_vs_compile *c, { const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; struct brw_reg const_reg = c->current_const[argIndex].reg; - struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; - struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + uint32_t offset; assert(argIndex < 3); - assert(c->func.brw->intel.gen < 6); /* FINISHME */ - /* Can't reuse a reladdr constant load. */ c->current_const[argIndex].index = -1; @@ -1038,15 +1165,21 @@ get_reladdr_constant(struct brw_vs_compile *c, src->Index, argIndex, c->current_const[argIndex].reg.nr); #endif - brw_MUL(p, byte_addr_reg, addrReg, brw_imm_ud(16)); + if (intel->gen >= 6) { + offset = src->Index; + } else { + struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D); + brw_MUL(p, byte_addr_reg, addr_reg, brw_imm_d(16)); + addr_reg = byte_addr_reg; + offset = 16 * src->Index; + } /* fetch the first vec4 */ brw_dp_READ_4_vs_relative(p, - const_reg, /* writeback dest */ - byte_addr_reg, /* address register */ - 16 * src->Index, /* byte offset */ - SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ - ); + const_reg, + addr_reg, + offset, + SURF_INDEX_VERT_CONST_BUFFER); return const_reg; } @@ -1241,22 +1374,18 @@ get_src_reg( struct brw_vs_compile *c, case PROGRAM_UNIFORM: case PROGRAM_ENV_PARAM: case PROGRAM_LOCAL_PARAM: - if (c->vp->use_const_buffer) { - if (!relAddr && c->constant_map[index] != -1) { - assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); - return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; - } else if (relAddr) + if (!relAddr && c->constant_map[index] != -1) { + /* Take from the push constant buffer if possible. */ + assert(c->regs[PROGRAM_STATE_VAR][c->constant_map[index]].nr != 0); + return c->regs[PROGRAM_STATE_VAR][c->constant_map[index]]; + } else { + /* Must be in the pull constant buffer then .*/ + assert(c->vp->use_const_buffer); + if (relAddr) return get_reladdr_constant(c, inst, argIndex); else return get_constant(c, inst, argIndex); } - else if (relAddr) { - return deref(c, c->regs[PROGRAM_STATE_VAR][0], index, 16); - } - else { - assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); - return c->regs[PROGRAM_STATE_VAR][index]; - } case PROGRAM_ADDRESS: assert(index == 0); return c->regs[file][index]; @@ -1585,6 +1714,8 @@ static void emit_vertex_write( struct brw_vs_compile *c) break; if (!(c->prog_data.outputs_written & BITFIELD64_BIT(i))) continue; + if (i == VERT_RESULT_PSIZ) + continue; if (i >= VERT_RESULT_TEX0 && c->regs[PROGRAM_OUTPUT][i].file == BRW_GENERAL_REGISTER_FILE) { @@ -1848,7 +1979,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); break; case OPCODE_ARL: - brw_RNDD(p, dst, args[0]); + emit_arl(p, dst, args[0]); break; case OPCODE_FLR: brw_RNDD(p, dst, args[0]); @@ -1895,7 +2026,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); break; case OPCODE_RSQ: - emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, brw_abs(args[0]), BRW_MATH_PRECISION_FULL); break; case OPCODE_SEQ: @@ -1969,35 +2100,42 @@ void brw_vs_emit(struct brw_vs_compile *c ) break; case OPCODE_CONT: brw_set_predicate_control(p, get_predicate(inst)); - brw_CONT(p, if_depth_in_loop[loop_depth]); + if (intel->gen >= 6) { + brw_CONT_gen6(p, loop_inst[loop_depth - 1]); + } else { + brw_CONT(p, if_depth_in_loop[loop_depth]); + } brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_ENDLOOP: - { - clear_current_const(c); - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - - loop_depth--; - - if (intel->gen == 5) - br = 2; - - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); - /* patch all the BREAK/CONT instructions from last BEGINLOOP */ - while (inst0 > loop_inst[loop_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && + + case OPCODE_ENDLOOP: { + clear_current_const(c); + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + loop_depth--; + + if (intel->gen == 5) + br = 2; + + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + + if (intel->gen < 6) { + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK && inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - } - } - } + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + } + } + } + } break; + case OPCODE_BRA: brw_set_predicate_control(p, get_predicate(inst)); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); @@ -2088,6 +2226,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) } brw_resolve_cals(p); + brw_set_uip_jip(p); brw_optimize(p); diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index a6d2a2377f..656501b4f7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -119,6 +119,62 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) brw_wm_emit(c); } +static void +brw_wm_payload_setup(struct brw_context *brw, + struct brw_wm_compile *c) +{ + struct intel_context *intel = &brw->intel; + bool uses_depth = (c->fp->program.Base.InputsRead & + (1 << FRAG_ATTRIB_WPOS)) != 0; + + if (intel->gen >= 6) { + /* R0-1: masks, pixel X/Y coordinates. */ + c->nr_payload_regs = 2; + /* R2: only for 32-pixel dispatch.*/ + /* R3-4: perspective pixel location barycentric */ + c->nr_payload_regs += 2; + /* R5-6: perspective pixel location bary for dispatch width != 8 */ + if (c->dispatch_width == 16) { + c->nr_payload_regs += 2; + } + /* R7-10: perspective centroid barycentric */ + /* R11-14: perspective sample barycentric */ + /* R15-18: linear pixel location barycentric */ + /* R19-22: linear centroid barycentric */ + /* R23-26: linear sample barycentric */ + + /* R27: interpolated depth if uses source depth */ + if (uses_depth) { + c->source_depth_reg = c->nr_payload_regs; + c->nr_payload_regs++; + if (c->dispatch_width == 16) { + /* R28: interpolated depth if not 8-wide. */ + c->nr_payload_regs++; + } + } + /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. + */ + if (uses_depth) { + c->source_w_reg = c->nr_payload_regs; + c->nr_payload_regs++; + if (c->dispatch_width == 16) { + /* R30: interpolated W if not 8-wide. */ + c->nr_payload_regs++; + } + } + /* R31: MSAA position offsets. */ + /* R32-: bary for 32-pixel. */ + /* R58-59: interp W for 32-pixel. */ + + if (c->fp->program.Base.OutputsWritten & + BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + c->source_depth_to_render_target = GL_TRUE; + c->computes_depth = GL_TRUE; + } + } else { + brw_wm_lookup_iz(intel, c); + } +} /** * All Mesa program -> GPU code generation goes through this function. @@ -167,23 +223,18 @@ static void do_wm_prog( struct brw_context *brw, brw_init_compile(brw, &c->func); - /* temporary sanity check assertion */ - ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + brw_wm_payload_setup(brw, c); if (!brw_wm_fs_emit(brw, c)) { /* * Shader which use GLSL features such as flow control are handled * differently from "simple" shaders. */ - if (fp->isGLSL) { - c->dispatch_width = 8; - brw_wm_glsl_emit(brw, c); - } - else { - c->dispatch_width = 16; - brw_wm_non_glsl_emit(brw, c); - } + c->dispatch_width = 16; + brw_wm_payload_setup(brw, c); + brw_wm_non_glsl_emit(brw, c); } + c->prog_data.dispatch_width = c->dispatch_width; /* Scratch space is used for register spilling */ if (c->last_scratch) { @@ -220,12 +271,10 @@ static void do_wm_prog( struct brw_context *brw, static void brw_wm_populate_key( struct brw_context *brw, struct brw_wm_prog_key *key ) { - struct intel_context *intel = &brw->intel; struct gl_context *ctx = &brw->intel.ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; - GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; GLuint lookup = 0; GLuint line_aa; GLuint i; @@ -285,57 +334,9 @@ static void brw_wm_populate_key( struct brw_context *brw, } } - if (intel->gen >= 6) { - /* R0-1: masks, pixel X/Y coordinates. */ - key->nr_payload_regs = 2; - /* R2: only for 32-pixel dispatch.*/ - /* R3-4: perspective pixel location barycentric */ - key->nr_payload_regs += 2; - /* R5-6: perspective pixel location bary for dispatch width != 8 */ - if (!fp->isGLSL) { /* dispatch_width != 8 */ - key->nr_payload_regs += 2; - } - /* R7-10: perspective centroid barycentric */ - /* R11-14: perspective sample barycentric */ - /* R15-18: linear pixel location barycentric */ - /* R19-22: linear centroid barycentric */ - /* R23-26: linear sample barycentric */ - - /* R27: interpolated depth if uses source depth */ - if (uses_depth) { - key->source_depth_reg = key->nr_payload_regs; - key->nr_payload_regs++; - if (!fp->isGLSL) { /* dispatch_width != 8 */ - /* R28: interpolated depth if not 8-wide. */ - key->nr_payload_regs++; - } - } - /* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. - */ - if (uses_depth) { - key->source_w_reg = key->nr_payload_regs; - key->nr_payload_regs++; - if (!fp->isGLSL) { /* dispatch_width != 8 */ - /* R30: interpolated W if not 8-wide. */ - key->nr_payload_regs++; - } - } - /* R31: MSAA position offsets. */ - /* R32-: bary for 32-pixel. */ - /* R58-59: interp W for 32-pixel. */ - - if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { - key->source_depth_to_render_target = GL_TRUE; - key->computes_depth = GL_TRUE; - } - - } else { - brw_wm_lookup_iz(intel, - line_aa, - lookup, - uses_depth, - key); - } + key->iz_lookup = lookup; + key->line_aa = line_aa; + key->stats_wm = brw->intel.stats_wm; /* BRW_NEW_WM_INPUT_DIMENSIONS */ key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; @@ -377,6 +378,10 @@ static void brw_wm_populate_key( struct brw_context *brw, swizzles[2] = SWIZZLE_ZERO; } else if (t->DepthMode == GL_LUMINANCE) { swizzles[3] = SWIZZLE_ONE; + } else if (t->DepthMode == GL_RED) { + swizzles[1] = SWIZZLE_ZERO; + swizzles[2] = SWIZZLE_ZERO; + swizzles[3] = SWIZZLE_ZERO; } } @@ -423,6 +428,7 @@ static void brw_wm_populate_key( struct brw_context *brw, */ if (fp->program.Base.InputsRead & FRAG_BIT_WPOS) { key->drawable_height = ctx->DrawBuffer->Height; + key->render_to_fbo = ctx->DrawBuffer->Name != 0; } key->nr_color_regions = brw->state.nr_color_regions; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 99bd15c187..e7f3cfbb75 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -59,18 +59,12 @@ #define AA_ALWAYS 2 struct brw_wm_prog_key { - GLuint source_depth_reg:3; - GLuint source_w_reg:3; - GLuint aa_dest_stencil_reg:3; - GLuint dest_depth_reg:3; - GLuint nr_payload_regs:4; - GLuint computes_depth:1; /* could be derived from program string */ - GLuint source_depth_to_render_target:1; + GLuint stats_wm:1; GLuint flat_shade:1; GLuint linear_color:1; /**< linear interpolation vs perspective interp */ - GLuint runtime_check_aads_emit:1; GLuint nr_color_regions:5; - + GLuint render_to_fbo:1; + GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ GLuint shadowtex_mask:16; GLuint yuvtex_mask:16; @@ -80,6 +74,8 @@ struct brw_wm_prog_key { GLushort drawable_height; GLbitfield64 vp_outputs_written; + GLuint iz_lookup; + GLuint line_aa; GLuint program_string_id:32; }; @@ -203,6 +199,15 @@ struct brw_wm_compile { PASS2_DONE } state; + GLuint source_depth_reg:3; + GLuint source_w_reg:3; + GLuint aa_dest_stencil_reg:3; + GLuint dest_depth_reg:3; + GLuint nr_payload_regs:4; + GLuint computes_depth:1; /* could be derived from program string */ + GLuint source_depth_to_render_target:1; + GLuint runtime_check_aads_emit:1; + /* Initial pass - translate fp instructions to fp instructions, * simplifying and adding instructions for interpolation and * framebuffer writes. @@ -305,14 +310,9 @@ void brw_wm_print_insn( struct brw_wm_compile *c, void brw_wm_print_program( struct brw_wm_compile *c, const char *stage ); -void brw_wm_lookup_iz( struct intel_context *intel, - GLuint line_aa, - GLuint lookup, - GLboolean ps_uses_depth, - struct brw_wm_prog_key *key ); +void brw_wm_lookup_iz(struct intel_context *intel, + struct brw_wm_compile *c); -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c); /* brw_wm_emit.c */ @@ -380,7 +380,6 @@ void emit_fb_write(struct brw_wm_compile *c, void emit_frontfacing(struct brw_compile *p, const struct brw_reg *dst, GLuint mask); -void emit_kil_nv(struct brw_wm_compile *c); void emit_linterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 96fecc97ee..be86e0e128 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -219,43 +219,45 @@ void emit_wpos_xy(struct brw_wm_compile *c, const struct brw_reg *arg0) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; + struct brw_reg delta_x = retype(arg0[0], BRW_REGISTER_TYPE_W); + struct brw_reg delta_y = retype(arg0[1], BRW_REGISTER_TYPE_W); if (mask & WRITEMASK_X) { + if (intel->gen >= 6) { + struct brw_reg delta_x_f = retype(delta_x, BRW_REGISTER_TYPE_F); + brw_MOV(p, delta_x_f, delta_x); + delta_x = delta_x_f; + } + if (c->fp->program.PixelCenterInteger) { /* X' = X */ - brw_MOV(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W)); + brw_MOV(p, dst[0], delta_x); } else { /* X' = X + 0.5 */ - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W), - brw_imm_f(0.5)); + brw_ADD(p, dst[0], delta_x, brw_imm_f(0.5)); } } if (mask & WRITEMASK_Y) { + if (intel->gen >= 6) { + struct brw_reg delta_y_f = retype(delta_y, BRW_REGISTER_TYPE_F); + brw_MOV(p, delta_y_f, delta_y); + delta_y = delta_y_f; + } + if (c->fp->program.OriginUpperLeft) { if (c->fp->program.PixelCenterInteger) { /* Y' = Y */ - brw_MOV(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_W)); + brw_MOV(p, dst[1], delta_y); } else { - /* Y' = Y + 0.5 */ - brw_ADD(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_W), - brw_imm_f(0.5)); + brw_ADD(p, dst[1], delta_y, brw_imm_f(0.5)); } } else { float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5; /* Y' = (height - 1) - Y + center */ - brw_ADD(p, - dst[1], - negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), + brw_ADD(p, dst[1], negate(delta_y), brw_imm_f(c->key.drawable_height - 1 + center_offset)); } } @@ -896,10 +898,14 @@ void emit_math1(struct brw_wm_compile *c, BRW_MATH_SATURATE_NONE); struct brw_reg src; - if (intel->gen >= 6 && (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || - arg0[0].file != BRW_GENERAL_REGISTER_FILE)) { + if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 || + arg0[0].file != BRW_GENERAL_REGISTER_FILE) || + arg0[0].negate || arg0[0].abs)) { /* Gen6 math requires that source and dst horizontal stride be 1, * and that the argument be in the GRF. + * + * The hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. */ src = dst[dst_chan]; brw_MOV(p, src, arg0[0]); @@ -967,34 +973,23 @@ void emit_math2(struct brw_wm_compile *c, struct brw_reg temp_dst = dst[dst_chan]; if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - /* Both scalar arguments. Do scalar calc. */ - src0.hstride = BRW_HORIZONTAL_STRIDE_1; - src1.hstride = BRW_HORIZONTAL_STRIDE_1; - temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1; - temp_dst.width = BRW_WIDTH_1; - - if (arg0[0].subnr != 0) { - brw_MOV(p, temp_dst, src0); - src0 = temp_dst; - - /* Ouch. We've used the temp as a dst, and we still - * need a temp to store arg1 in, because src and dst - * offsets have to be equal. Leaving this up to - * glsl2-965 to handle correctly. - */ - assert(arg1[0].subnr == 0); - } else if (arg1[0].subnr != 0) { - brw_MOV(p, temp_dst, src1); - src1 = temp_dst; - } - } else { - brw_MOV(p, temp_dst, src0); - src0 = temp_dst; - } - } else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - brw_MOV(p, temp_dst, src1); - src1 = temp_dst; + brw_MOV(p, temp_dst, src0); + src0 = temp_dst; + } + + if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { + /* This is a heinous hack to get a temporary register for use + * in case both arg0 and arg1 are constants. Why you're + * doing exponentiation on constant values in the shader, we + * don't know. + * + * max_wm_grf is almost surely less than the maximum GRF, and + * gen6 doesn't care about the number of GRFs used in a + * shader like pre-gen6 did. + */ + struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0); + brw_MOV(p, temp, src1); + src1 = temp; } brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); @@ -1012,14 +1007,6 @@ void emit_math2(struct brw_wm_compile *c, sechalf(src0), sechalf(src1)); } - - /* Splat a scalar result into all the channels. */ - if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 && - arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0; - temp_dst.vstride = BRW_VERTICAL_STRIDE_0; - brw_MOV(p, dst[dst_chan], temp_dst); - } } else { GLuint saturate = ((mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : @@ -1301,9 +1288,15 @@ static void emit_kil( struct brw_wm_compile *c, struct brw_reg *arg0) { struct brw_compile *p = &c->func; - struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + struct intel_context *intel = &p->brw->intel; + struct brw_reg pixelmask; GLuint i, j; + if (intel->gen >= 6) + pixelmask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); + else + pixelmask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + for (i = 0; i < 4; i++) { /* Check if we've already done the comparison for this reg * -- common when someone does KIL TEMP.wwww. @@ -1319,26 +1312,11 @@ static void emit_kil( struct brw_wm_compile *c, brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); brw_set_predicate_control_flag_value(p, 0xff); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_AND(p, r0uw, brw_flag_reg(), r0uw); + brw_AND(p, pixelmask, brw_flag_reg(), pixelmask); brw_pop_insn_state(p); } } -/* KIL_NV kills the pixels that are currently executing, not based on a test - * of the arguments. - */ -void emit_kil_nv( struct brw_wm_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); - - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ - brw_AND(p, r0uw, c->emit_mask_reg, r0uw); - brw_pop_insn_state(p); -} - static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, @@ -1387,8 +1365,8 @@ static void emit_aa( struct brw_wm_compile *c, GLuint reg ) { struct brw_compile *p = &c->func; - GLuint comp = c->key.aa_dest_stencil_reg / 2; - GLuint off = c->key.aa_dest_stencil_reg % 2; + GLuint comp = c->aa_dest_stencil_reg / 2; + GLuint off = c->aa_dest_stencil_reg % 2; struct brw_reg aa = offset(arg1[comp], off); brw_push_insn_state(p); @@ -1416,11 +1394,10 @@ void emit_fb_write(struct brw_wm_compile *c, struct intel_context *intel = &brw->intel; GLuint nr = 2; GLuint channel; - int base_reg; /* For gen6 fb write with no header, starting from color payload directly!. */ /* Reserve a space for AA - may not be needed: */ - if (c->key.aa_dest_stencil_reg) + if (c->aa_dest_stencil_reg) nr += 1; /* I don't really understand how this achieves the color interleave @@ -1428,11 +1405,6 @@ void emit_fb_write(struct brw_wm_compile *c, */ brw_push_insn_state(p); - if (intel->gen >= 6) - base_reg = nr; - else - base_reg = 0; - for (channel = 0; channel < 4; channel++) { if (intel->gen >= 6) { /* gen6 SIMD16 single source DP write looks like: @@ -1493,9 +1465,9 @@ void emit_fb_write(struct brw_wm_compile *c, brw_pop_insn_state(p); - if (c->key.source_depth_to_render_target) + if (c->source_depth_to_render_target) { - if (c->key.computes_depth) + if (c->computes_depth) brw_MOV(p, brw_message_reg(nr), arg2[2]); else brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */ @@ -1503,10 +1475,10 @@ void emit_fb_write(struct brw_wm_compile *c, nr += 2; } - if (c->key.dest_depth_reg) + if (c->dest_depth_reg) { - GLuint comp = c->key.dest_depth_reg / 2; - GLuint off = c->key.dest_depth_reg % 2; + GLuint comp = c->dest_depth_reg / 2; + GLuint off = c->dest_depth_reg % 2; if (off != 0) { brw_push_insn_state(p); @@ -1524,15 +1496,27 @@ void emit_fb_write(struct brw_wm_compile *c, } if (intel->gen >= 6) { - /* Subtract off the message header, since we send headerless. */ - nr -= 2; + /* Load the message header. There's no implied move from src0 + * to the base mrf on gen6. + */ + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, brw_message_reg(0), brw_vec8_grf(0, 0)); + brw_pop_insn_state(p); + + if (target != 0) { + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, + 0, + 2), BRW_REGISTER_TYPE_UD), + brw_imm_ud(target)); + } } - if (!c->key.runtime_check_aads_emit) { - if (c->key.aa_dest_stencil_reg) + if (!c->runtime_check_aads_emit) { + if (c->aa_dest_stencil_reg) emit_aa(c, arg1, 2); - fire_fb_write(c, base_reg, nr, target, eot); + fire_fb_write(c, 0, nr, target, eot); } else { struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); @@ -1897,10 +1881,6 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_kil(c, args[0]); break; - case OPCODE_KIL_NV: - emit_kil_nv(c); - break; - default: printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 2cae698880..4759b289a0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -338,11 +338,13 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) { - /* This is only called for producing 1/w in pre-gen6 interp. for - * gen6, the interp opcodes don't use this argument. + /* This is called for producing 1/w in pre-gen6 interp. for gen6, + * the interp opcodes don't use this argument. But to keep the + * nr_args = 3 expectations of pinterp happy, just stuff delta_xy + * into the slot. */ if (c->func.brw->intel.gen >= 6) - return src_undef(); + return c->delta_xy; if (src_is_undef(c->pixel_w)) { struct prog_dst_register pixel_w = get_temp(c); @@ -373,11 +375,7 @@ static void emit_interp( struct brw_wm_compile *c, struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); struct prog_src_register deltas; - if (c->func.brw->intel.gen < 6) { - deltas = get_delta_xy(c); - } else { - deltas = src_undef(); - } + deltas = get_delta_xy(c); /* Need to use PINTERP on attributes which have been * multiplied by 1/W in the SF program, and LINTERP on those @@ -1133,6 +1131,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) precalc_lit(c, inst); break; + case OPCODE_RSQ: + out = emit_scalar_insn(c, inst); + out->SrcReg[0].Abs = GL_TRUE; + break; + case OPCODE_TEX: precalc_tex(c, inst); break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c deleted file mode 100644 index 7fe8ab1f33..0000000000 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ /dev/null @@ -1,1035 +0,0 @@ -#include "main/macros.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_optimize.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" - -static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint component); - -/** - * Determine if the given fragment program uses GLSL features such - * as flow conditionals, loops, subroutines. - * Some GLSL shaders may use these features, others might not. - */ -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) -{ - int i; - - if (unlikely(INTEL_DEBUG & DEBUG_GLSL_FORCE)) - return GL_TRUE; - - for (i = 0; i < fp->Base.NumInstructions; i++) { - const struct prog_instruction *inst = &fp->Base.Instructions[i]; - switch (inst->Opcode) { - case OPCODE_ARL: - case OPCODE_IF: - case OPCODE_ENDIF: - case OPCODE_CAL: - case OPCODE_BRK: - case OPCODE_RET: - case OPCODE_BGNLOOP: - return GL_TRUE; - default: - break; - } - } - return GL_FALSE; -} - - - -static void -reclaim_temps(struct brw_wm_compile *c); - - -/** Mark GRF register as used. */ -static void -prealloc_grf(struct brw_wm_compile *c, int r) -{ - c->used_grf[r] = GL_TRUE; -} - - -/** Mark given GRF register as not in use. */ -static void -release_grf(struct brw_wm_compile *c, int r) -{ - /*assert(c->used_grf[r]);*/ - c->used_grf[r] = GL_FALSE; - c->first_free_grf = MIN2(c->first_free_grf, r); -} - - -/** Return index of a free GRF, mark it as used. */ -static int -alloc_grf(struct brw_wm_compile *c) -{ - GLuint r; - for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { - if (!c->used_grf[r]) { - c->used_grf[r] = GL_TRUE; - c->first_free_grf = r + 1; /* a guess */ - return r; - } - } - - /* no free temps, try to reclaim some */ - reclaim_temps(c); - c->first_free_grf = 0; - - /* try alloc again */ - for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { - if (!c->used_grf[r]) { - c->used_grf[r] = GL_TRUE; - c->first_free_grf = r + 1; /* a guess */ - return r; - } - } - - for (r = 0; r < BRW_WM_MAX_GRF; r++) { - assert(c->used_grf[r]); - } - - /* really, no free GRF regs found */ - if (!c->out_of_regs) { - /* print warning once per compilation */ - _mesa_warning(NULL, "i965: ran out of registers for fragment program"); - c->out_of_regs = GL_TRUE; - } - - return -1; -} - - -/** Return number of GRF registers used */ -static int -num_grf_used(const struct brw_wm_compile *c) -{ - int r; - for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) - if (c->used_grf[r]) - return r + 1; - return 0; -} - - - -/** - * Record the mapping of a Mesa register to a hardware register. - */ -static void set_reg(struct brw_wm_compile *c, int file, int index, - int component, struct brw_reg reg) -{ - c->wm_regs[file][index][component].reg = reg; - c->wm_regs[file][index][component].inited = GL_TRUE; -} - -static struct brw_reg alloc_tmp(struct brw_wm_compile *c) -{ - struct brw_reg reg; - - /* if we need to allocate another temp, grow the tmp_regs[] array */ - if (c->tmp_index == c->tmp_max) { - int r = alloc_grf(c); - if (r < 0) { - /*printf("Out of temps in %s\n", __FUNCTION__);*/ - r = 50; /* XXX random register! */ - } - c->tmp_regs[ c->tmp_max++ ] = r; - } - - /* form the GRF register */ - reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); - /*printf("alloc_temp %d\n", reg.nr);*/ - assert(reg.nr < BRW_WM_MAX_GRF); - return reg; - -} - -/** - * Save current temp register info. - * There must be a matching call to release_tmps(). - */ -static int mark_tmps(struct brw_wm_compile *c) -{ - return c->tmp_index; -} - -static void release_tmps(struct brw_wm_compile *c, int mark) -{ - c->tmp_index = mark; -} - -/** - * Convert Mesa src register to brw register. - * - * Since we're running in SOA mode each Mesa register corresponds to four - * hardware registers. We allocate the hardware registers as needed here. - * - * \param file register file, one of PROGRAM_x - * \param index register number - * \param component src component (X=0, Y=1, Z=2, W=3) - * \param nr not used?!? - * \param neg negate value? - * \param abs take absolute value? - */ -static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component, - int nr, GLuint neg, GLuint abs) -{ - struct brw_reg reg; - switch (file) { - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - case PROGRAM_UNIFORM: - file = PROGRAM_STATE_VAR; - break; - case PROGRAM_UNDEFINED: - return brw_null_reg(); - case PROGRAM_TEMPORARY: - case PROGRAM_INPUT: - case PROGRAM_OUTPUT: - case PROGRAM_PAYLOAD: - break; - default: - _mesa_problem(NULL, "Unexpected file in get_reg()"); - return brw_null_reg(); - } - - assert(index < 256); - assert(component < 4); - - /* see if we've already allocated a HW register for this Mesa register */ - if (c->wm_regs[file][index][component].inited) { - /* yes, re-use */ - reg = c->wm_regs[file][index][component].reg; - } - else { - /* no, allocate new register */ - int grf = alloc_grf(c); - /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ - if (grf < 0) { - /* totally out of temps */ - grf = 51; /* XXX random register! */ - } - - reg = brw_vec8_grf(grf, 0); - /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ - - set_reg(c, file, index, component, reg); - } - - if (neg & (1 << component)) { - reg = negate(reg); - } - if (abs) - reg = brw_abs(reg); - return reg; -} - - - -/** - * This is called if we run out of GRF registers. Examine the live intervals - * of temp regs in the program and free those which won't be used again. - */ -static void -reclaim_temps(struct brw_wm_compile *c) -{ - GLint intBegin[MAX_PROGRAM_TEMPS]; - GLint intEnd[MAX_PROGRAM_TEMPS]; - int index; - - /*printf("Reclaim temps:\n");*/ - - _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, - intBegin, intEnd); - - for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { - if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { - /* program temp[i] can be freed */ - int component; - /*printf(" temp[%d] is dead\n", index);*/ - for (component = 0; component < 4; component++) { - if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { - int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; - release_grf(c, r); - /* - printf(" Reclaim temp %d, reg %d at inst %d\n", - index, r, c->cur_inst); - */ - c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; - } - } - } - } -} - - - - -/** - * Preallocate registers. This sets up the Mesa to hardware register - * mapping for certain registers, such as constants (uniforms/state vars) - * and shader inputs. - */ -static void prealloc_reg(struct brw_wm_compile *c) -{ - struct intel_context *intel = &c->func.brw->intel; - int i, j; - struct brw_reg reg; - int urb_read_length = 0; - GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; - GLuint reg_index = 0; - - memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); - c->first_free_grf = 0; - - for (i = 0; i < 4; i++) { - if (i < (c->key.nr_payload_regs + 1) / 2) - reg = brw_vec8_grf(i * 2, 0); - else - reg = brw_vec8_grf(0, 0); - set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); - } - set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_W, 0, - brw_vec8_grf(c->key.source_w_reg, 0)); - reg_index += c->key.nr_payload_regs; - - /* constants */ - { - const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; - const GLuint nr_temps = c->fp->program.Base.NumTemporaries; - - /* use a real constant buffer, or just use a section of the GRF? */ - /* XXX this heuristic may need adjustment... */ - if ((nr_params + nr_temps) * 4 + reg_index > 80) { - for (i = 0; i < nr_params; i++) { - float *pv = c->fp->program.Base.Parameters->ParameterValues[i]; - for (j = 0; j < 4; j++) { - c->prog_data.pull_param[c->prog_data.nr_pull_params] = &pv[j]; - c->prog_data.nr_pull_params++; - } - } - - c->prog_data.nr_params = 0; - } - /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ - - if (!c->prog_data.nr_pull_params) { - const struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - - /* number of float constants in CURBE */ - c->prog_data.nr_params = 4 * nr_params; - - /* loop over program constants (float[4]) */ - for (i = 0; i < nr_params; i++) { - /* loop over XYZW channels */ - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(reg_index + index / 8, index % 8); - /* Save pointer to parameter/constant value. - * Constants will be copied in prepare_constant_buffer() - */ - c->prog_data.param[index] = &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - /* number of constant regs used (each reg is float[8]) */ - c->nr_creg = ALIGN(nr_params, 2) / 2; - reg_index += c->nr_creg; - } - } - - /* fragment shader inputs: One 2-reg pair of interpolation - * coefficients for each vec4 to be set up. - */ - if (intel->gen >= 6) { - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - if (!(c->fp->program.Base.InputsRead & BITFIELD64_BIT(i))) - continue; - - reg = brw_vec8_grf(reg_index, 0); - for (j = 0; j < 4; j++) { - set_reg(c, PROGRAM_PAYLOAD, i, j, reg); - } - reg_index += 2; - } - urb_read_length = reg_index; - } else { - for (i = 0; i < VERT_RESULT_MAX; i++) { - int fp_input; - - if (i >= VERT_RESULT_VAR0) - fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; - else if (i <= VERT_RESULT_TEX7) - fp_input = i; - else - fp_input = -1; - - if (fp_input >= 0 && inputs & (1 << fp_input)) { - urb_read_length = reg_index; - reg = brw_vec8_grf(reg_index, 0); - for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); - } - if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { - reg_index += 2; - } - } - } - - c->prog_data.first_curbe_grf = c->key.nr_payload_regs; - c->prog_data.urb_read_length = urb_read_length; - c->prog_data.curb_read_length = c->nr_creg; - c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); - reg_index++; - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); - reg_index += 2; - - /* mark GRF regs [0..reg_index-1] as in-use */ - for (i = 0; i < reg_index; i++) - prealloc_grf(c, i); - - /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */ - prealloc_grf(c, 126); - prealloc_grf(c, 127); - - for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; - struct brw_reg dst[4]; - - switch (inst->Opcode) { - case OPCODE_TEX: - case OPCODE_TXB: - /* Allocate the channels of texture results contiguously, - * since they are written out that way by the sampler unit. - */ - for (j = 0; j < 4; j++) { - dst[j] = get_dst_reg(c, inst, j); - if (j != 0) - assert(dst[j].nr == dst[j - 1].nr + 1); - } - break; - default: - break; - } - } - - for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; - - switch (inst->Opcode) { - case WM_DELTAXY: - /* Allocate WM_DELTAXY destination on G45/GM45 to an - * even-numbered GRF if possible so that we can use the PLN - * instruction. - */ - if (inst->DstReg.WriteMask == WRITEMASK_XY && - !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited && - !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited && - (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) { - int grf; - - for (grf = c->first_free_grf & ~1; - grf < BRW_WM_MAX_GRF; - grf += 2) - { - if (!c->used_grf[grf] && !c->used_grf[grf + 1]) { - c->used_grf[grf] = GL_TRUE; - c->used_grf[grf + 1] = GL_TRUE; - c->first_free_grf = grf + 2; /* a guess */ - - set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0, - brw_vec8_grf(grf, 0)); - set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1, - brw_vec8_grf(grf + 1, 0)); - break; - } - } - } - default: - break; - } - } - - /* An instruction may reference up to three constants. - * They'll be found in these registers. - * XXX alloc these on demand! - */ - if (c->prog_data.nr_pull_params) { - for (i = 0; i < 3; i++) { - c->current_const[i].index = -1; - c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); - } - } -#if 0 - printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); - printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); -#endif -} - - -/** - * Check if any of the instruction's src registers are constants, uniforms, - * or statevars. If so, fetch any constants that we don't already have in - * the three GRF slots. - */ -static void fetch_constants(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint i; - - /* loop over instruction src regs */ - for (i = 0; i < 3; i++) { - const struct prog_src_register *src = &inst->SrcReg[i]; - if (src->File == PROGRAM_STATE_VAR || - src->File == PROGRAM_CONSTANT || - src->File == PROGRAM_UNIFORM) { - c->current_const[i].index = src->Index; - -#if 0 - printf(" fetch const[%d] for arg %d into reg %d\n", - src->Index, i, c->current_const[i].reg.nr); -#endif - - /* need to fetch the constant now */ - brw_oword_block_read(p, - c->current_const[i].reg, - brw_message_reg(1), - 16 * src->Index, - SURF_INDEX_FRAG_CONST_BUFFER); - } - } -} - - -/** - * Convert Mesa dst register to brw register. - */ -static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint component) -{ - const int nr = 1; - return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, - 0, 0); -} - - -static struct brw_reg -get_src_reg_const(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint srcRegIndex, GLuint component) -{ - /* We should have already fetched the constant from the constant - * buffer in fetch_constants(). Now we just have to return a - * register description that extracts the needed component and - * smears it across all eight vector components. - */ - const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - struct brw_reg const_reg; - - assert(component < 4); - assert(srcRegIndex < 3); - assert(c->current_const[srcRegIndex].index != -1); - const_reg = c->current_const[srcRegIndex].reg; - - /* extract desired float from the const_reg, and smear */ - const_reg = stride(const_reg, 0, 1, 0); - const_reg.subnr = component * 4; - - if (src->Negate & (1 << component)) - const_reg = negate(const_reg); - if (src->Abs) - const_reg = brw_abs(const_reg); - -#if 0 - printf(" form const[%d].%d for arg %d, reg %d\n", - c->current_const[srcRegIndex].index, - component, - srcRegIndex, - const_reg.nr); -#endif - - return const_reg; -} - - -/** - * Convert Mesa src register to brw register. - */ -static struct brw_reg get_src_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint srcRegIndex, GLuint channel) -{ - const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - const GLuint nr = 1; - const GLuint component = GET_SWZ(src->Swizzle, channel); - - /* Only one immediate value can be used per native opcode, and it - * has be in the src1 slot, so not all Mesa instructions will get - * to take advantage of immediate constants. - */ - if (brw_wm_arg_can_be_immediate(inst->Opcode, srcRegIndex)) { - const struct gl_program_parameter_list *params; - - params = c->fp->program.Base.Parameters; - - /* Extended swizzle terms */ - if (component == SWIZZLE_ZERO) { - return brw_imm_f(0.0F); - } else if (component == SWIZZLE_ONE) { - if (src->Negate) - return brw_imm_f(-1.0F); - else - return brw_imm_f(1.0F); - } - - if (src->File == PROGRAM_CONSTANT) { - float f = params->ParameterValues[src->Index][component]; - - if (src->Abs) - f = fabs(f); - if (src->Negate) - f = -f; - - return brw_imm_f(f); - } - } - - if (c->prog_data.nr_pull_params && - (src->File == PROGRAM_STATE_VAR || - src->File == PROGRAM_CONSTANT || - src->File == PROGRAM_UNIFORM)) { - return get_src_reg_const(c, inst, srcRegIndex, component); - } - else { - /* other type of source register */ - return get_reg(c, src->File, src->Index, component, nr, - src->Negate, src->Abs); - } -} - -static void emit_arl(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, addr_reg; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_ADDRESS, 0); - src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ - brw_MOV(p, addr_reg, src0); - brw_set_saturate(p, 0); -} - -static INLINE struct brw_reg high_words( struct brw_reg reg ) -{ - return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), - 0, 8, 2 ); -} - -static INLINE struct brw_reg low_words( struct brw_reg reg ) -{ - return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); -} - -static INLINE struct brw_reg even_bytes( struct brw_reg reg ) -{ - return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 ); -} - -static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) -{ - return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ), - 0, 16, 2 ); -} - -/** - * Resolve subroutine calls after code emit is done. - */ -static void post_wm_emit( struct brw_wm_compile *c ) -{ - brw_resolve_cals(&c->func); -} - -static void -get_argument_regs(struct brw_wm_compile *c, - const struct prog_instruction *inst, - int index, - struct brw_reg *dst, - struct brw_reg *regs, - int mask) -{ - struct brw_compile *p = &c->func; - int i, j; - - for (i = 0; i < 4; i++) { - if (mask & (1 << i)) { - regs[i] = get_src_reg(c, inst, index, i); - - /* Unalias destination registers from our sources. */ - if (regs[i].file == BRW_GENERAL_REGISTER_FILE) { - for (j = 0; j < 4; j++) { - if (memcmp(®s[i], &dst[j], sizeof(regs[0])) == 0) { - struct brw_reg tmp = alloc_tmp(c); - brw_MOV(p, tmp, regs[i]); - regs[i] = tmp; - break; - } - } - } - } - } -} - -static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) -{ - struct intel_context *intel = &brw->intel; -#define MAX_IF_DEPTH 32 -#define MAX_LOOP_DEPTH 32 - struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; - int if_depth_in_loop[MAX_LOOP_DEPTH]; - GLuint i, if_depth = 0, loop_depth = 0; - struct brw_compile *p = &c->func; - struct brw_indirect stack_index = brw_indirect(0, 0); - - c->out_of_regs = GL_FALSE; - - if_depth_in_loop[loop_depth] = 0; - - prealloc_reg(c); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); - - if (intel->gen >= 6) - brw_set_acc_write_control(p, 1); - - for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; - int dst_flags; - struct brw_reg args[3][4], dst[4]; - int j; - int mark = mark_tmps( c ); - - c->cur_inst = i; - -#if 0 - printf("Inst %d: ", i); - _mesa_print_instruction(inst); -#endif - - /* fetch any constants that this instruction needs */ - if (c->prog_data.nr_pull_params) - fetch_constants(c, inst); - - if (inst->Opcode != OPCODE_ARL) { - for (j = 0; j < 4; j++) { - if (inst->DstReg.WriteMask & (1 << j)) - dst[j] = get_dst_reg(c, inst, j); - else - dst[j] = brw_null_reg(); - } - } - for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++) - get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW); - - dst_flags = inst->DstReg.WriteMask; - if (inst->SaturateMode == SATURATE_ZERO_ONE) - dst_flags |= SATURATE; - - if (inst->CondUpdate) - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - else - brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); - - switch (inst->Opcode) { - case WM_PIXELXY: - emit_pixel_xy(c, dst, dst_flags); - break; - case WM_DELTAXY: - emit_delta_xy(p, dst, dst_flags, args[0]); - break; - case WM_PIXELW: - emit_pixel_w(c, dst, dst_flags, args[0], args[1]); - break; - case WM_LINTERP: - emit_linterp(p, dst, dst_flags, args[0], args[1]); - break; - case WM_PINTERP: - emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case WM_CINTERP: - emit_cinterp(p, dst, dst_flags, args[0]); - break; - case WM_WPOSXY: - emit_wpos_xy(c, dst, dst_flags, args[0]); - break; - case WM_FB_WRITE: - emit_fb_write(c, args[0], args[1], args[2], - INST_AUX_GET_TARGET(inst->Aux), - inst->Aux & INST_AUX_EOT); - break; - case WM_FRONTFACING: - emit_frontfacing(p, dst, dst_flags); - break; - case OPCODE_ADD: - emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_ARL: - emit_arl(c, inst); - break; - case OPCODE_FRC: - emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); - break; - case OPCODE_FLR: - emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); - break; - case OPCODE_LRP: - emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case OPCODE_TRUNC: - emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); - break; - case OPCODE_MOV: - case OPCODE_SWZ: - emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); - break; - case OPCODE_DP2: - emit_dp2(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DP3: - emit_dp3(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DP4: - emit_dp4(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_XPD: - emit_xpd(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DPH: - emit_dph(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_RCP: - emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); - break; - case OPCODE_RSQ: - emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); - break; - case OPCODE_SIN: - emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); - break; - case OPCODE_COS: - emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); - break; - case OPCODE_EX2: - emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); - break; - case OPCODE_LG2: - emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); - break; - case OPCODE_CMP: - emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case OPCODE_MIN: - emit_min(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_MAX: - emit_max(p, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_DDX: - case OPCODE_DDY: - emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), - args[0]); - break; - case OPCODE_SLT: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_L, args[0], args[1]); - break; - case OPCODE_SLE: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_LE, args[0], args[1]); - break; - case OPCODE_SGT: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_G, args[0], args[1]); - break; - case OPCODE_SGE: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_GE, args[0], args[1]); - break; - case OPCODE_SEQ: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_EQ, args[0], args[1]); - break; - case OPCODE_SNE: - emit_sop(p, dst, dst_flags, - BRW_CONDITIONAL_NEQ, args[0], args[1]); - break; - case OPCODE_SSG: - emit_sign(p, dst, dst_flags, args[0]); - break; - case OPCODE_MUL: - emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); - break; - case OPCODE_POW: - emit_math2(c, BRW_MATH_FUNCTION_POW, - dst, dst_flags, args[0], args[1]); - break; - case OPCODE_MAD: - emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); - break; - case OPCODE_TEX: - emit_tex(c, dst, dst_flags, args[0], - get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, - 0, 1, 0, 0), - inst->TexSrcTarget, - inst->TexSrcUnit, - (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0); - break; - case OPCODE_TXB: - emit_txb(c, dst, dst_flags, args[0], - get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, - 0, 1, 0, 0), - inst->TexSrcTarget, - c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]); - break; - case OPCODE_KIL_NV: - emit_kil_nv(c); - break; - case OPCODE_IF: - assert(if_depth < MAX_IF_DEPTH); - if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); - if_depth_in_loop[loop_depth]++; - break; - case OPCODE_ELSE: - assert(if_depth > 0); - if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); - break; - case OPCODE_ENDIF: - assert(if_depth > 0); - brw_ENDIF(p, if_inst[--if_depth]); - if_depth_in_loop[loop_depth]--; - break; - case OPCODE_BGNSUB: - brw_save_label(p, inst->Comment, p->nr_insn); - break; - case OPCODE_ENDSUB: - /* no-op */ - break; - case OPCODE_CAL: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(4)); - brw_save_call(&c->func, inst->Comment, p->nr_insn); - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_pop_insn_state(p); - break; - - case OPCODE_RET: - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(-4)); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_pop_insn_state(p); - - break; - case OPCODE_BGNLOOP: - /* XXX may need to invalidate the current_constant regs */ - loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); - if_depth_in_loop[loop_depth] = 0; - break; - case OPCODE_BRK: - brw_BREAK(p, if_depth_in_loop[loop_depth]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case OPCODE_CONT: - brw_CONT(p, if_depth_in_loop[loop_depth]); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case OPCODE_ENDLOOP: - { - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - - if (intel->gen == 5) - br = 2; - - assert(loop_depth > 0); - loop_depth--; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); - /* patch all the BREAK/CONT instructions from last BGNLOOP */ - while (inst0 > loop_inst[loop_depth]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && - inst0->bits3.if_else.jump_count == 0) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - } - } - } - break; - default: - printf("unsupported opcode %d (%s) in fragment shader\n", - inst->Opcode, inst->Opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->Opcode) : "unknown"); - } - - /* Release temporaries containing any unaliased source regs. */ - release_tmps( c, mark ); - - if (inst->CondUpdate) - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - else - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - post_wm_emit(c); - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("wm-native:\n"); - for (i = 0; i < p->nr_insn; i++) - brw_disasm(stdout, &p->store[i], intel->gen); - printf("\n"); - } -} - -/** - * Do GPU code generation for shaders that use GLSL features such as - * flow control. Other shaders will be compiled with the - */ -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) -{ - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - printf("brw_wm_glsl_emit:\n"); - } - - /* initial instruction translation/simplification */ - brw_wm_pass_fp(c); - - /* actual code generation */ - brw_wm_emit_glsl(brw, c); - - if (unlikely(INTEL_DEBUG & DEBUG_WM)) { - brw_wm_print_program(c, "brw_wm_glsl_emit done"); - } - - c->prog_data.total_grf = num_grf_used(c); - c->prog_data.total_scratch = 0; -} diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c index 62e556698b..471ea1c18d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.c +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c @@ -120,14 +120,14 @@ const struct { * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES * \param lookup bitmask of IZ_* flags */ -void brw_wm_lookup_iz( struct intel_context *intel, - GLuint line_aa, - GLuint lookup, - GLboolean ps_uses_depth, - struct brw_wm_prog_key *key ) +void brw_wm_lookup_iz(struct intel_context *intel, + struct brw_wm_compile *c) { GLuint reg = 2; GLboolean kill_stats_promoted_workaround = GL_FALSE; + int lookup = c->key.iz_lookup; + bool uses_depth = (c->fp->program.Base.InputsRead & + (1 << FRAG_ATTRIB_WPOS)) != 0; assert (lookup < IZ_BIT_MAX); @@ -136,36 +136,36 @@ void brw_wm_lookup_iz( struct intel_context *intel, * statistics are enabled..." paragraph of 11.5.3.2: Early Depth * Test Cases [Pre-DevGT] of the 3D Pipeline - Windower B-Spec. */ - if (intel->stats_wm && + if (c->key.stats_wm && (lookup & IZ_PS_KILL_ALPHATEST_BIT) && wm_iz_table[lookup].mode == P) { kill_stats_promoted_workaround = GL_TRUE; } if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) - key->computes_depth = 1; + c->computes_depth = 1; - if (wm_iz_table[lookup].sd_present || ps_uses_depth || + if (wm_iz_table[lookup].sd_present || uses_depth || kill_stats_promoted_workaround) { - key->source_depth_reg = reg; + c->source_depth_reg = reg; reg += 2; } if (wm_iz_table[lookup].sd_to_rt || kill_stats_promoted_workaround) - key->source_depth_to_render_target = 1; + c->source_depth_to_render_target = 1; - if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { - key->aa_dest_stencil_reg = reg; - key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && - line_aa == AA_SOMETIMES); + if (wm_iz_table[lookup].ds_present || c->key.line_aa != AA_NEVER) { + c->aa_dest_stencil_reg = reg; + c->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && + c->key.line_aa == AA_SOMETIMES); reg++; } if (wm_iz_table[lookup].dd_present) { - key->dest_depth_reg = reg; + c->dest_depth_reg = reg; reg+=2; } - key->nr_payload_regs = reg; + c->nr_payload_regs = reg; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 83152526b3..f78bdc3186 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -380,7 +380,7 @@ static void pass0_init_payload( struct brw_wm_compile *c ) GLuint i; for (i = 0; i < 4; i++) { - GLuint j = i >= (c->key.nr_payload_regs + 1) / 2 ? 0 : i; + GLuint j = i >= (c->nr_payload_regs + 1) / 2 ? 0 : i; pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, &c->payload.depth[j] ); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index 3a2874b6dd..7d6a3fa9f1 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -128,8 +128,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) if (inst->opcode == WM_FB_WRITE) { track_arg(c, inst, 0, WRITEMASK_XYZW); track_arg(c, inst, 1, WRITEMASK_XYZW); - if (c->key.source_depth_to_render_target && - c->key.computes_depth) + if (c->source_depth_to_render_target && c->computes_depth) track_arg(c, inst, 2, WRITEMASK_Z); else track_arg(c, inst, 2, 0); @@ -281,7 +280,6 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case OPCODE_DST: case WM_FRONTFACING: - case OPCODE_KIL_NV: default: break; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index 44e3953814..8c2b9e7020 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -69,6 +69,8 @@ static void prealloc_reg(struct brw_wm_compile *c, */ static void init_registers( struct brw_wm_compile *c ) { + struct brw_context *brw = c->func.brw; + struct intel_context *intel = &brw->intel; GLuint nr_interp_regs = 0; GLuint i = 0; GLuint j; @@ -76,32 +78,41 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->grf_limit; j++) c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN; - for (j = 0; j < (c->key.nr_payload_regs + 1) / 2; j++) + for (j = 0; j < (c->nr_payload_regs + 1) / 2; j++) prealloc_reg(c, &c->payload.depth[j], i++); for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], i++); - for (j = 0; j < VERT_RESULT_MAX; j++) { - if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) { - int fp_index; - - if (j >= VERT_RESULT_VAR0) - fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); - else if (j <= VERT_RESULT_TEX7) - fp_index = j; - else - fp_index = -1; - - nr_interp_regs++; - if (fp_index >= 0) - prealloc_reg(c, &c->payload.input_interp[fp_index], i++); + if (intel->gen >= 6) { + for (unsigned int j = 0; j < FRAG_ATTRIB_MAX; j++) { + if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(j)) { + nr_interp_regs++; + prealloc_reg(c, &c->payload.input_interp[j], i++); + } + } + } else { + for (j = 0; j < VERT_RESULT_MAX; j++) { + if (c->key.vp_outputs_written & BITFIELD64_BIT(j)) { + int fp_index; + + if (j >= VERT_RESULT_VAR0) + fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + else if (j <= VERT_RESULT_TEX7) + fp_index = j; + else + fp_index = -1; + + nr_interp_regs++; + if (fp_index >= 0) + prealloc_reg(c, &c->payload.input_interp[fp_index], i++); + } } + assert(nr_interp_regs >= 1); } - assert(nr_interp_regs >= 1); - c->prog_data.first_curbe_grf = ALIGN(c->key.nr_payload_regs, 2); + c->prog_data.first_curbe_grf = ALIGN(c->nr_payload_regs, 2); c->prog_data.urb_read_length = nr_interp_regs * 2; c->prog_data.curb_read_length = c->nr_creg * 2; diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index fea96d3538..e7c97a1cb0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -69,12 +69,43 @@ static GLuint translate_wrap_mode( GLenum wrap ) static drm_intel_bo *upload_default_color( struct brw_context *brw, const GLfloat *color ) { - struct brw_sampler_default_color sdc; + struct intel_context *intel = &brw->intel; - COPY_4V(sdc.color, color); - - return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, - &sdc, sizeof(sdc)); + if (intel->gen >= 5) { + struct gen5_sampler_default_color sdc; + + memset(&sdc, 0, sizeof(sdc)); + + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[0], color[0]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[1], color[1]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[2], color[2]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[3], color[3]); + + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[0], color[0]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[1], color[1]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[2], color[2]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[3], color[3]); + + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[0], color[0]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[1], color[1]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[2], color[2]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[3], color[3]); + + /* XXX: Fill in half floats */ + /* XXX: Fill in signed bytes */ + + COPY_4V(sdc.f, color); + + return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, + &sdc, sizeof(sdc)); + } else { + struct brw_sampler_default_color sdc; + + COPY_4V(sdc.color, color); + + return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, + &sdc, sizeof(sdc)); + } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 76de7b7b6f..e9ef635bca 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -87,7 +87,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { struct gl_context *ctx = &brw->intel.ctx; const struct gl_fragment_program *fp = brw->fragment_program; - const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; struct intel_context *intel = &brw->intel; memset(key, 0, sizeof(*key)); @@ -132,7 +131,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; - key->is_glsl = bfp->isGLSL; /* If using the fragment shader backend, the program is always * 8-wide. diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 76fc94df1f..1cd736a111 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -68,104 +68,54 @@ static GLuint translate_tex_target( GLenum target ) } } +static uint32_t brw_format_for_mesa_format[MESA_FORMAT_COUNT] = +{ + [MESA_FORMAT_L8] = BRW_SURFACEFORMAT_L8_UNORM, + [MESA_FORMAT_I8] = BRW_SURFACEFORMAT_I8_UNORM, + [MESA_FORMAT_A8] = BRW_SURFACEFORMAT_A8_UNORM, + [MESA_FORMAT_AL88] = BRW_SURFACEFORMAT_L8A8_UNORM, + [MESA_FORMAT_AL1616] = BRW_SURFACEFORMAT_L16A16_UNORM, + [MESA_FORMAT_R8] = BRW_SURFACEFORMAT_R8_UNORM, + [MESA_FORMAT_R16] = BRW_SURFACEFORMAT_R16_UNORM, + [MESA_FORMAT_RG88] = BRW_SURFACEFORMAT_R8G8_UNORM, + [MESA_FORMAT_RG1616] = BRW_SURFACEFORMAT_R16G16_UNORM, + [MESA_FORMAT_ARGB8888] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM, + [MESA_FORMAT_XRGB8888] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM, + [MESA_FORMAT_RGB565] = BRW_SURFACEFORMAT_B5G6R5_UNORM, + [MESA_FORMAT_ARGB1555] = BRW_SURFACEFORMAT_B5G5R5A1_UNORM, + [MESA_FORMAT_ARGB4444] = BRW_SURFACEFORMAT_B4G4R4A4_UNORM, + [MESA_FORMAT_YCBCR_REV] = BRW_SURFACEFORMAT_YCRCB_NORMAL, + [MESA_FORMAT_YCBCR] = BRW_SURFACEFORMAT_YCRCB_SWAPUVY, + [MESA_FORMAT_RGB_FXT1] = BRW_SURFACEFORMAT_FXT1, + [MESA_FORMAT_RGBA_FXT1] = BRW_SURFACEFORMAT_FXT1, + [MESA_FORMAT_RGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB, + [MESA_FORMAT_RGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM, + [MESA_FORMAT_RGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM, + [MESA_FORMAT_RGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM, + [MESA_FORMAT_SRGB_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM_SRGB, + [MESA_FORMAT_SARGB8] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB, + [MESA_FORMAT_SLA8] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB, + [MESA_FORMAT_SL8] = BRW_SURFACEFORMAT_L8_UNORM_SRGB, + [MESA_FORMAT_DUDV8] = BRW_SURFACEFORMAT_R8G8_SNORM, + [MESA_FORMAT_SIGNED_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM, +}; static GLuint translate_tex_format( gl_format mesa_format, GLenum internal_format, GLenum depth_mode ) { switch( mesa_format ) { - case MESA_FORMAT_L8: - return BRW_SURFACEFORMAT_L8_UNORM; - - case MESA_FORMAT_I8: - return BRW_SURFACEFORMAT_I8_UNORM; - - case MESA_FORMAT_A8: - return BRW_SURFACEFORMAT_A8_UNORM; - - case MESA_FORMAT_AL88: - return BRW_SURFACEFORMAT_L8A8_UNORM; - - case MESA_FORMAT_AL1616: - return BRW_SURFACEFORMAT_L16A16_UNORM; - - case MESA_FORMAT_R8: - return BRW_SURFACEFORMAT_R8_UNORM; - - case MESA_FORMAT_R16: - return BRW_SURFACEFORMAT_R16_UNORM; - - case MESA_FORMAT_RG88: - return BRW_SURFACEFORMAT_R8G8_UNORM; - - case MESA_FORMAT_RG1616: - return BRW_SURFACEFORMAT_R16G16_UNORM; - - case MESA_FORMAT_RGB888: - assert(0); /* not supported for sampling */ - return BRW_SURFACEFORMAT_R8G8B8_UNORM; - - case MESA_FORMAT_ARGB8888: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - - case MESA_FORMAT_XRGB8888: - return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; - - case MESA_FORMAT_RGBA8888_REV: - _mesa_problem(NULL, "unexpected format in i965:translate_tex_format()"); - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; - - case MESA_FORMAT_RGB565: - return BRW_SURFACEFORMAT_B5G6R5_UNORM; - - case MESA_FORMAT_ARGB1555: - return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; - - case MESA_FORMAT_ARGB4444: - return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; - - case MESA_FORMAT_YCBCR_REV: - return BRW_SURFACEFORMAT_YCRCB_NORMAL; - - case MESA_FORMAT_YCBCR: - return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; - - case MESA_FORMAT_RGB_FXT1: - case MESA_FORMAT_RGBA_FXT1: - return BRW_SURFACEFORMAT_FXT1; case MESA_FORMAT_Z16: if (depth_mode == GL_INTENSITY) return BRW_SURFACEFORMAT_I16_UNORM; else if (depth_mode == GL_ALPHA) return BRW_SURFACEFORMAT_A16_UNORM; + else if (depth_mode == GL_RED) + return BRW_SURFACEFORMAT_R16_UNORM; else return BRW_SURFACEFORMAT_L16_UNORM; - case MESA_FORMAT_RGB_DXT1: - return BRW_SURFACEFORMAT_DXT1_RGB; - - case MESA_FORMAT_RGBA_DXT1: - return BRW_SURFACEFORMAT_BC1_UNORM; - - case MESA_FORMAT_RGBA_DXT3: - return BRW_SURFACEFORMAT_BC2_UNORM; - - case MESA_FORMAT_RGBA_DXT5: - return BRW_SURFACEFORMAT_BC3_UNORM; - - case MESA_FORMAT_SARGB8: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; - - case MESA_FORMAT_SLA8: - return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; - - case MESA_FORMAT_SL8: - return BRW_SURFACEFORMAT_L8_UNORM_SRGB; - - case MESA_FORMAT_SRGB_DXT1: - return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; - case MESA_FORMAT_S8_Z24: /* XXX: these different surface formats don't seem to * make any difference for shadow sampler/compares. @@ -174,18 +124,14 @@ static GLuint translate_tex_format( gl_format mesa_format, return BRW_SURFACEFORMAT_I24X8_UNORM; else if (depth_mode == GL_ALPHA) return BRW_SURFACEFORMAT_A24X8_UNORM; + else if (depth_mode == GL_RED) + return BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS; else return BRW_SURFACEFORMAT_L24X8_UNORM; - case MESA_FORMAT_DUDV8: - return BRW_SURFACEFORMAT_R8G8_SNORM; - - case MESA_FORMAT_SIGNED_RGBA8888_REV: - return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; - default: - assert(0); - return 0; + assert(brw_format_for_mesa_format[mesa_format] != 0); + return brw_format_for_mesa_format[mesa_format]; } } @@ -274,6 +220,7 @@ brw_create_constant_surface(struct brw_context *brw, drm_intel_bo **out_bo, uint32_t *out_offset) { + struct intel_context *intel = &brw->intel; const GLint w = width - 1; struct brw_surface_state surf; void *map; @@ -284,6 +231,9 @@ brw_create_constant_surface(struct brw_context *brw, surf.ss0.surface_type = BRW_SURFACE_BUFFER; surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + if (intel->gen >= 6) + surf.ss0.render_cache_read_write = 1; + assert(bo); surf.ss1.base_addr = bo->offset; /* reloc */ @@ -440,45 +390,19 @@ brw_update_renderbuffer_surface(struct brw_context *brw, key.surface_type = BRW_SURFACE_2D; switch (irb->Base.Format) { - /* XRGB and ARGB are treated the same here because the chips in this - * family cannot render to XRGB targets. This means that we have to - * mask writes to alpha (ala glColorMask) and reconfigure the alpha - * blending hardware to use GL_ONE (or GL_ZERO) for cases where - * GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is used. - */ - case MESA_FORMAT_ARGB8888: case MESA_FORMAT_XRGB8888: + /* XRGB is handled as ARGB because the chips in this family + * cannot render to XRGB targets. This means that we have to + * mask writes to alpha (ala glColorMask) and reconfigure the + * alpha blending hardware to use GL_ONE (or GL_ZERO) for + * cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is + * used. + */ key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; - case MESA_FORMAT_SARGB8: - key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; - break; - case MESA_FORMAT_RGB565: - key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - break; - case MESA_FORMAT_ARGB1555: - key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; - break; - case MESA_FORMAT_ARGB4444: - key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; - break; - case MESA_FORMAT_A8: - key.surface_format = BRW_SURFACEFORMAT_A8_UNORM; - break; - case MESA_FORMAT_R8: - key.surface_format = BRW_SURFACEFORMAT_R8_UNORM; - break; - case MESA_FORMAT_R16: - key.surface_format = BRW_SURFACEFORMAT_R16_UNORM; - break; - case MESA_FORMAT_RG88: - key.surface_format = BRW_SURFACEFORMAT_R8G8_UNORM; - break; - case MESA_FORMAT_RG1616: - key.surface_format = BRW_SURFACEFORMAT_R16G16_UNORM; - break; default: - _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->Base.Format); + key.surface_format = brw_format_for_mesa_format[irb->Base.Format]; + assert(key.surface_format != 0); } key.tiling = region->tiling; key.width = rb->Width; diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index 800a255521..c2631a7b4d 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -35,6 +35,7 @@ struct gen6_blend_state_key { GLboolean color_blend, alpha_enabled; GLboolean dither; + GLboolean color_mask[BRW_MAX_DRAW_BUFFERS][4]; GLenum logic_op; @@ -54,6 +55,9 @@ blend_state_populate_key(struct brw_context *brw, memset(key, 0, sizeof(*key)); /* _NEW_COLOR */ + memcpy(key->color_mask, ctx->Color.ColorMask, sizeof(key->color_mask)); + + /* _NEW_COLOR */ if (ctx->Color._LogicOpEnabled) key->logic_op = ctx->Color.LogicOp; else @@ -87,54 +91,62 @@ static drm_intel_bo * blend_state_create_from_key(struct brw_context *brw, struct gen6_blend_state_key *key) { - struct gen6_blend_state blend; + struct gen6_blend_state blend[BRW_MAX_DRAW_BUFFERS]; drm_intel_bo *bo; + int b; memset(&blend, 0, sizeof(blend)); - if (key->logic_op != GL_COPY) { - blend.blend1.logic_op_enable = 1; - blend.blend1.logic_op_func = intel_translate_logic_op(key->logic_op); - } else if (key->color_blend) { - GLenum eqRGB = key->blend_eq_rgb; - GLenum eqA = key->blend_eq_a; - GLenum srcRGB = key->blend_src_rgb; - GLenum dstRGB = key->blend_dst_rgb; - GLenum srcA = key->blend_src_a; - GLenum dstA = key->blend_dst_a; - - if (eqRGB == GL_MIN || eqRGB == GL_MAX) { - srcRGB = dstRGB = GL_ONE; - } - - if (eqA == GL_MIN || eqA == GL_MAX) { - srcA = dstA = GL_ONE; + for (b = 0; b < BRW_MAX_DRAW_BUFFERS; b++) { + if (key->logic_op != GL_COPY) { + blend[b].blend1.logic_op_enable = 1; + blend[b].blend1.logic_op_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend & (1 << b)) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; + + if (eqRGB == GL_MIN || eqRGB == GL_MAX) { + srcRGB = dstRGB = GL_ONE; + } + + if (eqA == GL_MIN || eqA == GL_MAX) { + srcA = dstA = GL_ONE; + } + + blend[b].blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB); + blend[b].blend0.source_blend_factor = brw_translate_blend_factor(srcRGB); + blend[b].blend0.blend_func = brw_translate_blend_equation(eqRGB); + + blend[b].blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + blend[b].blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA); + blend[b].blend0.ia_blend_func = brw_translate_blend_equation(eqA); + + blend[b].blend0.blend_enable = 1; + blend[b].blend0.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); } - blend.blend0.dest_blend_factor = brw_translate_blend_factor(dstRGB); - blend.blend0.source_blend_factor = brw_translate_blend_factor(srcRGB); - blend.blend0.blend_func = brw_translate_blend_equation(eqRGB); - - blend.blend0.ia_dest_blend_factor = brw_translate_blend_factor(dstA); - blend.blend0.ia_source_blend_factor = brw_translate_blend_factor(srcA); - blend.blend0.ia_blend_func = brw_translate_blend_equation(eqA); + if (key->alpha_enabled) { + blend[b].blend1.alpha_test_enable = 1; + blend[b].blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func); - blend.blend0.blend_enable = 1; - blend.blend0.ia_blend_enable = (srcA != srcRGB || - dstA != dstRGB || - eqA != eqRGB); - } - - if (key->alpha_enabled) { - blend.blend1.alpha_test_enable = 1; - blend.blend1.alpha_test_func = intel_translate_compare_func(key->alpha_func); + } - } + if (key->dither) { + blend[b].blend1.dither_enable = 1; + blend[b].blend1.y_dither_offset = 0; + blend[b].blend1.x_dither_offset = 0; + } - if (key->dither) { - blend.blend1.dither_enable = 1; - blend.blend1.y_dither_offset = 0; - blend.blend1.x_dither_offset = 0; + blend[b].blend1.write_disable_r = !key->color_mask[b][0]; + blend[b].blend1.write_disable_g = !key->color_mask[b][1]; + blend[b].blend1.write_disable_b = !key->color_mask[b][2]; + blend[b].blend1.write_disable_a = !key->color_mask[b][3]; } bo = brw_upload_cache(&brw->cache, BRW_BLEND_STATE, @@ -172,7 +184,7 @@ const struct brw_tracked_state gen6_blend_state = { }; struct gen6_color_calc_state_key { - GLubyte blend_constant_color[4]; + float blend_constant_color[4]; GLclampf alpha_ref; GLubyte stencil_ref[2]; }; diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index c65b41e2b6..c7c4eb1f27 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -64,7 +64,9 @@ upload_clip_state(struct brw_context *brw) userclip << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT | depth_clamp | provoking); - OUT_BATCH(GEN6_CLIP_FORCE_ZERO_RTAINDEX); + OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | + U_FIXED(225.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | + GEN6_CLIP_FORCE_ZERO_RTAINDEX); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 471067e8f0..45c148baed 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -33,9 +33,10 @@ #include "intel_batchbuffer.h" static uint32_t -get_attr_override(struct brw_context *brw, int fs_attr) +get_attr_override(struct brw_context *brw, int fs_attr, int two_side_color) { int attr_index = 0, i, vs_attr; + int bfc = 0; if (fs_attr <= FRAG_ATTRIB_TEX7) vs_attr = fs_attr; @@ -57,6 +58,30 @@ get_attr_override(struct brw_context *brw, int fs_attr) attr_index++; } + assert(attr_index < 32); + + if (two_side_color) { + if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) && + (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) { + assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)); + assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)); + bfc = 2; + } else if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) && + (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))) + bfc = 1; + } + + if (bfc && (fs_attr <= FRAG_ATTRIB_TEX7 && fs_attr > FRAG_ATTRIB_WPOS)) { + if (fs_attr == FRAG_ATTRIB_COL0) + attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT); + else if (fs_attr == FRAG_ATTRIB_COL1 && bfc == 2) { + attr_index++; + attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT); + } else { + attr_index += bfc; + } + } + return attr_index; } @@ -67,13 +92,15 @@ upload_sf_state(struct brw_context *brw) struct gl_context *ctx = &intel->ctx; /* CACHE_NEW_VS_PROG */ uint32_t num_inputs = brw_count_bits(brw->vs.prog_data->outputs_written); + /* BRW_NEW_FRAGMENT_PROGRAM */ uint32_t num_outputs = brw_count_bits(brw->fragment_program->Base.InputsRead); - uint32_t dw1, dw2, dw3, dw4, dw16; + uint32_t dw1, dw2, dw3, dw4, dw16, dw17; int i; /* _NEW_BUFFER */ GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; int attr = 0; int urb_start; + int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) @@ -91,6 +118,7 @@ upload_sf_state(struct brw_context *brw) dw3 = 0; dw4 = 0; dw16 = 0; + dw17 = 0; /* _NEW_POLYGON */ if ((ctx->Polygon.FrontFace == GL_CCW) ^ render_to_fbo) @@ -99,6 +127,48 @@ upload_sf_state(struct brw_context *brw) if (ctx->Polygon.OffsetFill) dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID; + if (ctx->Polygon.OffsetLine) + dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME; + + if (ctx->Polygon.OffsetPoint) + dw2 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT; + + switch (ctx->Polygon.FrontMode) { + case GL_FILL: + dw2 |= GEN6_SF_FRONT_SOLID; + break; + + case GL_LINE: + dw2 |= GEN6_SF_FRONT_WIREFRAME; + break; + + case GL_POINT: + dw2 |= GEN6_SF_FRONT_POINT; + break; + + default: + assert(0); + break; + } + + switch (ctx->Polygon.BackMode) { + case GL_FILL: + dw2 |= GEN6_SF_BACK_SOLID; + break; + + case GL_LINE: + dw2 |= GEN6_SF_BACK_WIREFRAME; + break; + + case GL_POINT: + dw2 |= GEN6_SF_BACK_POINT; + break; + + default: + assert(0); + break; + } + /* _NEW_SCISSOR */ if (ctx->Scissor.Enabled) dw3 |= GEN6_SF_SCISSOR_ENABLE; @@ -160,6 +230,12 @@ upload_sf_state(struct brw_context *brw) } } + /* flat shading */ + if (ctx->Light.ShadeModel == GL_FLAT) { + dw17 |= ((brw->fragment_program->Base.InputsRead & (FRAG_BIT_COL0 | FRAG_BIT_COL1)) >> + ((brw->fragment_program->Base.InputsRead & FRAG_BIT_WPOS) ? 0 : 1)); + } + BEGIN_BATCH(20); OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2)); OUT_BATCH(dw1); @@ -174,7 +250,7 @@ upload_sf_state(struct brw_context *brw) for (; attr < 64; attr++) { if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr); + attr_overrides |= get_attr_override(brw, attr, two_side_color); attr++; break; } @@ -182,7 +258,7 @@ upload_sf_state(struct brw_context *brw) for (; attr < 64; attr++) { if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr) << 16; + attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16; attr++; break; } @@ -190,7 +266,7 @@ upload_sf_state(struct brw_context *brw) OUT_BATCH(attr_overrides); } OUT_BATCH(dw16); /* point sprite texcoord bitmask */ - OUT_BATCH(0); /* constant interp bitmask */ + OUT_BATCH(dw17); /* constant interp bitmask */ OUT_BATCH(0); /* wrapshortest enables 0-7 */ OUT_BATCH(0); /* wrapshortest enables 8-15 */ ADVANCE_BATCH(); @@ -205,7 +281,8 @@ const struct brw_tracked_state gen6_sf_state = { _NEW_BUFFERS | _NEW_POINT | _NEW_TRANSFORM), - .brw = BRW_NEW_CONTEXT, + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_FRAGMENT_PROGRAM), .cache = CACHE_NEW_VS_PROG }, .emit = upload_sf_state, diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c index a34123478f..de97fd3783 100644 --- a/src/mesa/drivers/dri/i965/gen6_urb.c +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -72,7 +72,7 @@ const struct brw_tracked_state gen6_urb = { .dirty = { .mesa = 0, .brw = BRW_NEW_CONTEXT, - .cache = CACHE_NEW_VS_PROG, + .cache = (CACHE_NEW_VS_PROG | CACHE_NEW_GS_PROG), }, .prepare = prepare_urb, .emit = upload_urb, diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index e94d0c0ddb..ed132bdbd9 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -54,7 +54,7 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH(0); ADVANCE_BATCH(); } else { - int params_uploaded = 0; + int params_uploaded = 0, param_regs; float *param; if (brw->vertex_program->IsNVProgram) @@ -88,20 +88,11 @@ upload_vs_state(struct brw_context *brw) params_uploaded++; } - if (vp->use_const_buffer) { - for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { - if (brw->vs.constant_map[i] != -1) { - memcpy(param + brw->vs.constant_map[i] * 4, - vp->program.Base.Parameters->ParameterValues[i], - 4 * sizeof(float)); - params_uploaded++; - } - } - } else { - for (i = 0; i < nr_params; i++) { - memcpy(param, vp->program.Base.Parameters->ParameterValues[i], + for (i = 0; i < vp->program.Base.Parameters->NumParameters; i++) { + if (brw->vs.constant_map[i] != -1) { + memcpy(param + brw->vs.constant_map[i] * 4, + vp->program.Base.Parameters->ParameterValues[i], 4 * sizeof(float)); - param += 4; params_uploaded++; } } @@ -117,13 +108,16 @@ upload_vs_state(struct brw_context *brw) drm_intel_gem_bo_unmap_gtt(constant_bo); + param_regs = (params_uploaded + 1) / 2; + assert(param_regs <= 32); + BEGIN_BATCH(5); OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); OUT_RELOC(constant_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - ALIGN(params_uploaded, 2) / 2 - 1); + param_regs - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -136,6 +130,7 @@ upload_vs_state(struct brw_context *brw) OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2)); OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | + GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index ea5418bacf..2ae0c093eb 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -66,6 +66,21 @@ prepare_wm_constants(struct brw_context *brw) constants[i] = convert_param(brw->wm.prog_data->param_convert[i], *brw->wm.prog_data->param[i]); } + + if (0) { + printf("WM constants:\n"); + for (i = 0; i < brw->wm.prog_data->nr_params; i++) { + if ((i & 7) == 0) + printf("g%d: ", brw->wm.prog_data->first_curbe_grf + i / 8); + printf("%8f ", constants[i]); + if ((i & 7) == 7) + printf("\n"); + } + if ((i & 7) != 0) + printf("\n"); + printf("\n"); + } + drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo); } } @@ -88,6 +103,7 @@ upload_wm_state(struct brw_context *brw) brw_fragment_program_const(brw->fragment_program); uint32_t dw2, dw4, dw5, dw6; + /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->nr_params == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); @@ -104,7 +120,8 @@ upload_wm_state(struct brw_context *brw) (5 - 2)); OUT_RELOC(brw->wm.push_const_bo, I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */ - ALIGN(brw->wm.prog_data->nr_params, 8) / 8 - 1); + ALIGN(brw->wm.prog_data->nr_params, + brw->wm.prog_data->dispatch_width) / 8 - 1); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -116,6 +133,9 @@ upload_wm_state(struct brw_context *brw) dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; + /* OpenGL non-ieee floating point mode */ + dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT; + /* BRW_NEW_NR_WM_SURFACES */ dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT; @@ -126,8 +146,8 @@ upload_wm_state(struct brw_context *brw) dw5 |= (40 - 1) << GEN6_WM_MAX_THREADS_SHIFT; - /* BRW_NEW_FRAGMENT_PROGRAM */ - if (fp->isGLSL) + /* CACHE_NEW_WM_PROG */ + if (brw->wm.prog_data->dispatch_width == 8) dw5 |= GEN6_WM_8_DISPATCH_ENABLE; else dw5 |= GEN6_WM_16_DISPATCH_ENABLE; @@ -176,13 +196,14 @@ upload_wm_state(struct brw_context *brw) const struct brw_tracked_state gen6_wm_state = { .dirty = { .mesa = (_NEW_LINE | _NEW_POLYGONSTIPPLE | _NEW_COLOR | _NEW_BUFFERS | - _NEW_PROGRAM_CONSTANTS), + _NEW_PROGRAM_CONSTANTS | _NEW_POLYGON), .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_NR_WM_SURFACES | BRW_NEW_URB_FENCE | BRW_NEW_BATCH), - .cache = CACHE_NEW_SAMPLER + .cache = (CACHE_NEW_SAMPLER | + CACHE_NEW_WM_PROG) }, .emit = upload_wm_state, }; |