diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
33 files changed, 1040 insertions, 491 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 82370162f5..c724218cf5 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -88,7 +88,7 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) memset(key, 0, sizeof(*key)); - key->stencil = ctx->Stencil.Enabled; + key->stencil = ctx->Stencil._Enabled; key->stencil_two_side = ctx->Stencil._TestTwoSide; if (key->stencil) { diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index c45d48dff8..d830e49e50 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -181,34 +181,54 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); is_negative = brw_IF(p, BRW_EXECUTE_1); { - brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); - brw_MOV(p, c->reg.t1, c->reg.t); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /* + * Both can be negative on GM965/G965 due to RHW workaround + * if so, this object should be rejected. + */ + if (!BRW_IS_G4X(p->brw)) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_neg2); + } + + brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); + brw_MOV(p, c->reg.t1, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); } is_negative = brw_ELSE(p, is_negative); { - /* Coming back in. We know that both cannot be negative - * because the line would have been culled in that case. - */ + /* Coming back in. We know that both cannot be negative + * because the line would have been culled in that case. + */ + + /* If both are positive, do nothing */ + /* Only on GM965/G965 */ + if (!BRW_IS_G4X(p->brw)) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + } - /* If both are positive, do nothing */ - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); - is_neg2 = brw_IF(p, BRW_EXECUTE_1); { - brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); - brw_math_invert(p, c->reg.t, c->reg.t); - brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); - - brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); - brw_MOV(p, c->reg.t0, c->reg.t); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } - brw_ENDIF(p, is_neg2); - } + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + if (!BRW_IS_G4X(p->brw)) { + brw_ENDIF(p, is_neg2); + } + } brw_ENDIF(p, is_negative); } brw_ENDIF(p, plane_active); diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index 1dbba37fe7..7fd37bd05f 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -455,6 +455,8 @@ static void brw_clip_test( struct brw_clip_compile *c ) struct brw_indirect vt2 = brw_indirect(2, 0); struct brw_compile *p = &c->func; + struct brw_instruction *is_outside; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); @@ -462,53 +464,87 @@ static void brw_clip_test( struct brw_clip_compile *c ) brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); /* test nearz, xmin, ymin plane */ - brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); + /* clip.xyz < -clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + is_outside = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_outside); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 0), brw_imm_ud(0)); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 1), brw_imm_ud(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 2), brw_imm_ud(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); /* test farz, xmax, ymax plane */ - brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); + /* clip.xyz > clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); + brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); + brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + is_outside = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_outside); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* some vertices are inside a plane, some are outside,need to clip */ brw_XOR(p, t, t1, t2); brw_XOR(p, t1, t2, t3); brw_OR(p, t, t, t1); - - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 0), brw_imm_ud(0)); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 1), brw_imm_ud(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, - get_element(t, 2), brw_imm_ud(0)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); brw_set_predicate_control(p, BRW_PREDICATE_NONE); diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index eaac6224f6..d96ff29310 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -121,6 +121,9 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /* if conformance mode is set, swrast can handle any size AA point */ ctx->Const.MaxPointSizeAA = 255.0; + /* We want the GLSL compiler to emit code that uses condition codes */ + ctx->Shader.EmitCondCodes = GL_TRUE; + /* ctx->Const.MaxNativeVertexProgramTemps = 32; */ brw_init_state( brw ); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index df90c2027f..48ed4325be 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -46,7 +46,7 @@ * * CURBE - constant URB entry. An urb region (entry) used to hold * constant values which the fixed function units can be instructed to - * preload into the GRF when spawining a thread. + * preload into the GRF when spawning a thread. * * VUE - vertex URB entry. An urb entry holding a vertex and usually * a vertex header. The header contains control information and @@ -63,7 +63,7 @@ * special and may be overwritten. * * MRF - message register file. Threads communicate (and terminate) - * by sending messages. Message parameters are placed in contigous + * by sending messages. Message parameters are placed in contiguous * MRF registers. All program output is via these messages. URB * entries are populated by sending a message to the shared URB * function containing the new data, together with a control word, @@ -154,21 +154,22 @@ struct brw_state_flags { GLuint cache; }; + +/** Subclass of Mesa vertex program */ struct brw_vertex_program { struct gl_vertex_program program; GLuint id; }; - +/** Subclass of Mesa fragment program */ struct brw_fragment_program { struct gl_fragment_program program; - GLuint id; + GLuint id; /**< serial no. to identify frag progs, never re-used */ + GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ }; - - /* Data about a particular attempt to compile a program. Note that * there can be many of these, each in a different GL state * corresponding to a different brw_wm_prog_key struct, with different @@ -418,8 +419,8 @@ struct brw_context struct brw_tracked_state **atoms; GLuint nr_atoms; - GLuint nr_draw_regions; - struct intel_region *draw_regions[MAX_DRAW_BUFFERS]; + GLuint nr_color_regions; + struct intel_region *color_regions[MAX_DRAW_BUFFERS]; struct intel_region *depth_region; /** @@ -627,8 +628,6 @@ struct brw_context * brw_vtbl.c */ void brwInitVtbl( struct brw_context *brw ); -void brw_do_flush( struct brw_context *brw, - GLuint flags ); /*====================================================================== * brw_context.c @@ -670,7 +669,9 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ); */ void brw_upload_urb_fence(struct brw_context *brw); -void brw_upload_constant_buffer_state(struct brw_context *brw); +/* brw_curbe.c + */ +void brw_upload_cs_urb_state(struct brw_context *brw); /*====================================================================== @@ -683,6 +684,32 @@ brw_context( GLcontext *ctx ) return (struct brw_context *)ctx; } +static INLINE struct brw_vertex_program * +brw_vertex_program(struct gl_vertex_program *p) +{ + return (struct brw_vertex_program *) p; +} + +static INLINE const struct brw_vertex_program * +brw_vertex_program_const(const struct gl_vertex_program *p) +{ + return (const struct brw_vertex_program *) p; +} + +static INLINE struct brw_fragment_program * +brw_fragment_program(struct gl_fragment_program *p) +{ + return (struct brw_fragment_program *) p; +} + +static INLINE const struct brw_fragment_program * +brw_fragment_program_const(const struct gl_fragment_program *p) +{ + return (const struct brw_fragment_program *) p; +} + + + #define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) #endif diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 4eaaa5f871..545dedd34b 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -53,7 +53,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; + const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); GLuint nr_vp_regs = (vp->program.Base.Parameters->NumParameters * 4 + 15) / 16; GLuint nr_clip_regs = 0; GLuint total_regs; @@ -138,24 +138,24 @@ const struct brw_tracked_state brw_curbe_offsets = { * fixed-function hardware in a double-buffering scheme to avoid a * pipeline stall each time the contents of the curbe is changed. */ -void brw_upload_constant_buffer_state(struct brw_context *brw) +void brw_upload_cs_urb_state(struct brw_context *brw) { - struct brw_constant_buffer_state cbs; - memset(&cbs, 0, sizeof(cbs)); + struct brw_cs_urb_state cs_urb; + memset(&cs_urb, 0, sizeof(cs_urb)); /* It appears that this is the state packet for the CS unit, ie. the * urb entries detailed here are housed in the CS range from the * URB_FENCE command. */ - cbs.header.opcode = CMD_CONST_BUFFER_STATE; - cbs.header.length = sizeof(cbs)/4 - 2; + cs_urb.header.opcode = CMD_CS_URB_STATE; + cs_urb.header.length = sizeof(cs_urb)/4 - 2; /* BRW_NEW_URB_FENCE */ - cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries; - cbs.bits0.urb_entry_size = brw->urb.csize - 1; + cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries; + cs_urb.bits0.urb_entry_size = brw->urb.csize - 1; assert(brw->urb.nr_cs_entries); - BRW_CACHED_BATCH_STRUCT(brw, &cbs); + BRW_CACHED_BATCH_STRUCT(brw, &cs_urb); } static GLfloat fixed_plane[6][4] = { @@ -174,10 +174,12 @@ static GLfloat fixed_plane[6][4] = { static void prepare_constant_buffer(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; - GLuint sz = brw->curbe.total_size; - GLuint bufsz = sz * 16 * sizeof(GLfloat); + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + const GLuint sz = brw->curbe.total_size; + const GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; GLuint i; @@ -189,27 +191,25 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.tracked_state.dirty.mesa |= fp->program.Base.Parameters->StateFlags; if (sz == 0) { - if (brw->curbe.last_buf) { free(brw->curbe.last_buf); brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } - return; } - buf = (GLfloat *)malloc(bufsz); - - memset(buf, 0, bufsz); + buf = (GLfloat *) _mesa_calloc(bufsz); + /* fragment shader constants */ if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) - buf[offset + i] = brw->wm.prog_data->param[i][0]; + buf[offset + i] = *brw->wm.prog_data->param[i]; } @@ -244,7 +244,7 @@ static void prepare_constant_buffer(struct brw_context *brw) } } - + /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; GLuint nr = vp->program.Base.Parameters->NumParameters; @@ -252,10 +252,11 @@ static void prepare_constant_buffer(struct brw_context *brw) _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); for (i = 0; i < nr; i++) { - buf[offset + i * 4 + 0] = vp->program.Base.Parameters->ParameterValues[i][0]; - buf[offset + i * 4 + 1] = vp->program.Base.Parameters->ParameterValues[i][1]; - buf[offset + i * 4 + 2] = vp->program.Base.Parameters->ParameterValues[i][2]; - buf[offset + i * 4 + 3] = vp->program.Base.Parameters->ParameterValues[i][3]; + const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; + buf[offset + i * 4 + 0] = value[0]; + buf[offset + i * 4 + 1] = value[1]; + buf[offset + i * 4 + 2] = value[2]; + buf[offset + i * 4 + 3] = value[3]; } } @@ -274,11 +275,14 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.last_buf && bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { - free(buf); + /* constants have not changed */ + _mesa_free(buf); } else { + /* constants have changed */ if (brw->curbe.last_buf) - free(brw->curbe.last_buf); + _mesa_free(brw->curbe.last_buf); + brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 39c32255f8..590b064c7e 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -734,7 +734,7 @@ #define CMD_URB_FENCE 0x6000 -#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CS_URB_STATE 0x6001 #define CMD_CONST_BUFFER 0x6002 #define CMD_STATE_BASE_ADDRESS 0x6101 diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 99fd587e9f..5342622a73 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -127,6 +127,7 @@ static void brw_emit_prim(struct brw_context *brw, uint32_t hw_prim) { struct brw_3d_primitive prim_packet; + struct intel_context *intel = &brw->intel; if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), @@ -146,10 +147,27 @@ static void brw_emit_prim(struct brw_context *brw, /* Can't wrap here, since we rely on the validated state. */ brw->no_batch_wrap = GL_TRUE; + + /* If we're set to always flush, do it before and after the primitive emit. + * We want to catch both missed flushes that hurt instruction/state cache + * and missed flushes of the render cache as it heads to other parts of + * the besides the draw code. + */ + if (intel->always_flush_cache) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(intel->vtbl.flush_cmd()); + ADVANCE_BATCH(); + } if (prim_packet.verts_per_instance) { intel_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); } + if (intel->always_flush_cache) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(intel->vtbl.flush_cmd()); + ADVANCE_BATCH(); + } + brw->no_batch_wrap = GL_FALSE; } @@ -194,9 +212,16 @@ static GLboolean check_fallbacks( struct brw_context *brw, GLcontext *ctx = &brw->intel.ctx; GLuint i; - if (!brw->intel.strict_conformance) + /* If we don't require strict OpenGL conformance, never + * use fallbacks. If we're forcing fallbacks, always + * use fallfacks. + */ + if (brw->intel.conformance_mode == 0) return GL_FALSE; + if (brw->intel.conformance_mode == 2) + return GL_TRUE; + if (ctx->Polygon.SmoothFlag) { for (i = 0; i < nr_prims; i++) if (reduced_prim[prim[i].mode] == GL_TRIANGLES) @@ -220,7 +245,7 @@ static GLboolean check_fallbacks( struct brw_context *brw, /* GS doesn't get enough information to know when to reset * the stipple counter?!? */ - if (prim[i].mode == GL_LINE_LOOP) + if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) return GL_TRUE; if (prim[i].mode == GL_POLYGON && @@ -230,13 +255,46 @@ static GLboolean check_fallbacks( struct brw_context *brw, } } - if (ctx->Point.SmoothFlag) { for (i = 0; i < nr_prims; i++) if (prim[i].mode == GL_POINTS) return GL_TRUE; } + + /* BRW hardware doesn't handle GL_CLAMP texturing correctly; + * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP + * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and + * we want strict conformance, force the fallback. + * Right now, we only do this for 2D textures. + */ + { + int u; + for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; + if (texUnit->Enabled) { + if (texUnit->Enabled & TEXTURE_1D_BIT) { + if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_2D_BIT) { + if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_3D_BIT) { + if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { + return GL_TRUE; + } + } + } + } + } + /* Nothing stopping us from the fast path now */ return GL_FALSE; } @@ -261,11 +319,18 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, if (ctx->NewState) _mesa_update_state( ctx ); + /* We have to validate the textures *before* checking for fallbacks; + * otherwise, the software fallback won't be able to rely on the + * texture state, the firstLevel and lastLevel fields won't be + * set in the intel texture object (they'll both be 0), and the + * software fallback will segfault if it attempts to access any + * texture level other than level 0. + */ + brw_validate_textures( brw ); + if (check_fallbacks(brw, prim, nr_prims)) return GL_FALSE; - brw_validate_textures( brw ); - /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); @@ -346,6 +411,8 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, retval = GL_TRUE; } + if (intel->always_flush_batch) + intel_batchbuffer_flush(intel->batch); out: UNLOCK_HARDWARE(intel); diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index b3ae4eef33..c53efba599 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -129,3 +129,126 @@ const GLuint *brw_get_program( struct brw_compile *p, return (const GLuint *)p->store; } + + +/** + * Subroutine calls require special attention. + * Mesa instructions may be expanded into multiple hardware instructions + * so the prog_instruction::BranchTarget field can't be used as an index + * into the hardware instructions. + * + * The BranchTarget field isn't needed, however. Mesa's GLSL compiler + * emits CAL and BGNSUB instructions with labels that can be used to map + * subroutine calls to actual subroutine code blocks. + * + * The structures and function here implement patching of CAL instructions + * so they jump to the right subroutine code... + */ + + +/** + * For each OPCODE_BGNSUB we create one of these. + */ +struct brw_glsl_label +{ + const char *name; /**< the label string */ + GLuint position; /**< the position of the brw instruction for this label */ + struct brw_glsl_label *next; /**< next in linked list */ +}; + + +/** + * For each OPCODE_CAL we create one of these. + */ +struct brw_glsl_call +{ + GLuint call_inst_pos; /**< location of the CAL instruction */ + const char *sub_name; /**< name of subroutine to call */ + struct brw_glsl_call *next; /**< next in linked list */ +}; + + +/** + * Called for each OPCODE_BGNSUB. + */ +void +brw_save_label(struct brw_compile *c, const char *name, GLuint position) +{ + struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label); + label->name = name; + label->position = position; + label->next = c->first_label; + c->first_label = label; +} + + +/** + * Called for each OPCODE_CAL. + */ +void +brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos) +{ + struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call); + call->call_inst_pos = call_pos; + call->sub_name = name; + call->next = c->first_call; + c->first_call = call; +} + + +/** + * Lookup a label, return label's position/offset. + */ +static GLuint +brw_lookup_label(struct brw_compile *c, const char *name) +{ + const struct brw_glsl_label *label; + for (label = c->first_label; label; label = label->next) { + if (strcmp(name, label->name) == 0) { + return label->position; + } + } + abort(); /* should never happen */ + return ~0; +} + + +/** + * When we're done generating code, this function is called to resolve + * subroutine calls. + */ +void +brw_resolve_cals(struct brw_compile *c) +{ + const struct brw_glsl_call *call; + + for (call = c->first_call; call; call = call->next) { + const GLuint sub_loc = brw_lookup_label(c, call->sub_name); + struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos]; + struct brw_instruction *brw_sub_inst = &c->store[sub_loc]; + GLint offset = brw_sub_inst - brw_call_inst; + + /* patch brw_inst1 to point to brw_inst2 */ + brw_set_src1(brw_call_inst, brw_imm_d(offset * 16)); + } + + /* free linked list of calls */ + { + struct brw_glsl_call *call, *next; + for (call = c->first_call; call; call = next) { + next = call->next; + _mesa_free(call); + } + c->first_call = NULL; + } + + /* free linked list of labels */ + { + struct brw_glsl_label *label, *next; + for (label = c->first_label; label; label = next) { + next = label->next; + _mesa_free(label); + } + c->first_label = NULL; + } +} diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 9e2b39af9b..eb99c21711 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -91,8 +91,13 @@ struct brw_indirect { }; +struct brw_glsl_label; +struct brw_glsl_call; + + + #define BRW_EU_MAX_INSN_STACK 5 -#define BRW_EU_MAX_INSN 1200 +#define BRW_EU_MAX_INSN 4000 struct brw_compile { struct brw_instruction store[BRW_EU_MAX_INSN]; @@ -106,9 +111,22 @@ struct brw_compile { GLuint flag_value; GLboolean single_program_flow; struct brw_context *brw; + + struct brw_glsl_label *first_label; /**< linked list of labels */ + struct brw_glsl_call *first_call; /**< linked list of CALs */ }; +void +brw_save_label(struct brw_compile *c, const char *name, GLuint position); + +void +brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos); + +void +brw_resolve_cals(struct brw_compile *c); + + static INLINE int type_sz( GLuint type ) { @@ -152,6 +170,13 @@ static INLINE struct brw_reg brw_reg( GLuint file, GLuint writemask ) { struct brw_reg reg; + if (type == BRW_GENERAL_REGISTER_FILE) + assert(nr < 128); + else if (type == BRW_MESSAGE_REGISTER_FILE) + assert(nr < 9); + else if (type == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_IP); + reg.type = type; reg.file = file; reg.nr = nr; diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 4e099b5945..6dce1ca48e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -55,6 +55,9 @@ static void guess_execution_size( struct brw_instruction *insn, static void brw_set_dest( struct brw_instruction *insn, struct brw_reg dest ) { + if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(dest.nr < 128); + insn->bits1.da1.dest_reg_file = dest.file; insn->bits1.da1.dest_reg_type = dest.type; insn->bits1.da1.dest_address_mode = dest.address_mode; @@ -96,10 +99,13 @@ static void brw_set_dest( struct brw_instruction *insn, } static void brw_set_src0( struct brw_instruction *insn, - struct brw_reg reg ) + struct brw_reg reg ) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); + insn->bits1.da1.src0_reg_file = reg.file; insn->bits1.da1.src0_reg_type = reg.type; insn->bits2.da1.src0_abs = reg.abs; @@ -169,10 +175,12 @@ static void brw_set_src0( struct brw_instruction *insn, void brw_set_src1( struct brw_instruction *insn, - struct brw_reg reg ) + struct brw_reg reg ) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + assert(reg.nr < 128); + insn->bits1.da1.src1_reg_file = reg.file; insn->bits1.da1.src1_reg_type = reg.type; insn->bits3.da1.src1_abs = reg.abs; @@ -323,13 +331,13 @@ static void brw_set_dp_read_message( struct brw_instruction *insn, } static void brw_set_sampler_message(struct brw_context *brw, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint sampler, - GLuint msg_type, - GLuint response_length, - GLuint msg_length, - GLboolean eot) + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint sampler, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot) { brw_set_src1(insn, brw_imm_d(0)); @@ -407,7 +415,7 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p, * Convenience routines. */ #define ALU1(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ struct brw_reg dest, \ struct brw_reg src0) \ { \ @@ -415,7 +423,7 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \ } #define ALU2(OP) \ -struct brw_instruction *brw_##OP(struct brw_compile *p, \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ struct brw_reg dest, \ struct brw_reg src0, \ struct brw_reg src1) \ @@ -469,9 +477,9 @@ void brw_NOP(struct brw_compile *p) */ struct brw_instruction *brw_JMPI(struct brw_compile *p, - struct brw_reg dest, - struct brw_reg src0, - struct brw_reg src1) + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) { struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); @@ -674,7 +682,7 @@ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) struct brw_instruction *brw_WHILE(struct brw_compile *p, - struct brw_instruction *do_insn) + struct brw_instruction *do_insn) { struct brw_instruction *insn; @@ -931,13 +939,13 @@ void brw_dp_READ_16( struct brw_compile *p, void brw_fb_WRITE(struct brw_compile *p, - struct brw_reg dest, - GLuint msg_reg_nr, - struct brw_reg src0, - GLuint binding_table_index, - GLuint msg_length, - GLuint response_length, - GLboolean eot) + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + GLboolean eot) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); @@ -973,8 +981,8 @@ void brw_SAMPLE(struct brw_compile *p, { GLboolean need_stall = 0; - if(writemask == 0) { -/* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ + if (writemask == 0) { + /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ return; } @@ -1006,7 +1014,7 @@ void brw_SAMPLE(struct brw_compile *p, if (newmask != writemask) { need_stall = 1; -/* _mesa_printf("need stall %x %x\n", newmask , writemask); */ + /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); @@ -1047,8 +1055,7 @@ void brw_SAMPLE(struct brw_compile *p, eot); } - if (need_stall) - { + if (need_stall) { struct brw_reg reg = vec8(offset(dest, response_length-1)); /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c index e63098fdd4..299357409c 100644 --- a/src/mesa/drivers/dri/i965/brw_fallback.c +++ b/src/mesa/drivers/dri/i965/brw_fallback.c @@ -75,8 +75,8 @@ static GLboolean do_check_fallback(struct brw_context *brw) /* _NEW_STENCIL */ - if (ctx->Stencil.Enabled && - !brw->intel.hw_stencil) { + if (ctx->Stencil._Enabled && + (ctx->DrawBuffer->Name == 0 && !brw->intel.hw_stencil)) { DBG("FALLBACK: stencil\n"); return GL_TRUE; } diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 9dcdad7b4e..5c94a49f60 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -178,7 +178,7 @@ static void upload_psp_urb_cbs(struct brw_context *brw ) { upload_pipelined_state_pointers(brw); brw_upload_urb_fence(brw); - brw_upload_constant_buffer_state(brw); + brw_upload_cs_urb_state(brw); } const struct brw_tracked_state brw_psp_urb_cbs = { @@ -290,8 +290,21 @@ static void upload_polygon_stipple(struct brw_context *brw) bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; bps.header.length = sizeof(bps)/4-2; - for (i = 0; i < 32; i++) - bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */ + /* Polygon stipple is provided in OpenGL order, i.e. bottom + * row first. If we're rendering to a window (i.e. the + * default frame buffer object, 0), then we need to invert + * it to match our pixel layout. But if we're rendering + * to a FBO (i.e. any named frame buffer object), we *don't* + * need to invert - we already match the layout. + */ + if (ctx->DrawBuffer->Name == 0) { + for (i = 0; i < 32; i++) + bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */ + } + else { + for (i = 0; i < 32; i++) + bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */ + } BRW_CACHED_BATCH_STRUCT(brw, &bps); } @@ -319,8 +332,22 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; bpso.header.length = sizeof(bpso)/4-2; - bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31; - bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31; + /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0), + * we have to invert the Y axis in order to match the OpenGL + * pixel coordinate system, and our offset must be matched + * to the window position. If we're drawing to a FBO + * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate + * system works just fine, and there's no window system to + * worry about. + */ + if (brw->intel.ctx.DrawBuffer->Name == 0) { + bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31; + bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31; + } + else { + bpso.bits0.y_offset = 0; + bpso.bits0.x_offset = 0; + } BRW_CACHED_BATCH_STRUCT(brw, &bpso); } diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 0c86911044..d90bd82038 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -38,6 +38,7 @@ #include "brw_context.h" #include "brw_util.h" +#include "brw_wm.h" static void brwBindProgram( GLcontext *ctx, GLenum target, @@ -94,7 +95,6 @@ static struct gl_program *brwNewProgram( GLcontext *ctx, static void brwDeleteProgram( GLcontext *ctx, struct gl_program *prog ) { - _mesa_delete_program( ctx, prog ); } @@ -110,30 +110,35 @@ static void brwProgramStringNotify( GLcontext *ctx, GLenum target, struct gl_program *prog ) { + struct brw_context *brw = brw_context(ctx); if (target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; - struct brw_context *brw = brw_context(ctx); - struct brw_fragment_program *p = (struct brw_fragment_program *)prog; - struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; + struct brw_fragment_program *newFP = brw_fragment_program(fprog); + const struct brw_fragment_program *curFP = + brw_fragment_program_const(brw->fragment_program); + if (fprog->FogOption) { _mesa_append_fog_code(ctx, fprog); fprog->FogOption = GL_NONE; } - if (p == fp) + if (newFP == curFP) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; - p->id = brw->program_id++; + newFP->id = brw->program_id++; + newFP->isGLSL = brw_wm_is_glsl(fprog); } else if (target == GL_VERTEX_PROGRAM_ARB) { - struct brw_context *brw = brw_context(ctx); - struct brw_vertex_program *p = (struct brw_vertex_program *)prog; - struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; - if (p == vp) + struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; + struct brw_vertex_program *newVP = brw_vertex_program(vprog); + const struct brw_vertex_program *curVP = + brw_vertex_program_const(brw->vertex_program); + + if (newVP == curVP) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; - if (p->program.IsPositionInvariant) { - _mesa_insert_mvp_code(ctx, &p->program); + if (newVP->program.IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, &newVP->program); } - p->id = brw->program_id++; + newVP->id = brw->program_id++; /* Also tell tnl about it: */ diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 8c1711538a..c3c85978f4 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -167,8 +167,14 @@ static void upload_sf_prog(struct brw_context *brw) key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_POLYGON */ - if (key.do_twoside_color) - key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW); + if (key.do_twoside_color) { + /* If we're rendering to a FBO, we have to invert the polygon + * face orientation, just as we invert the viewport in + * sf_unit_create_from_key(). ctx->DrawBuffer->Name will be + * nonzero if we're rendering to such an FBO. + */ + key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0); + } dri_bo_unreference(brw->sf.prog_bo); brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG, diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index e96d5354b3..93a9686f71 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -44,6 +44,7 @@ static void upload_sf_vp(struct brw_context *brw) struct brw_sf_viewport sfv; GLfloat y_scale, y_bias; const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + const GLfloat *v = ctx->Viewport._WindowMap.m; memset(&sfv, 0, sizeof(sfv)); @@ -58,8 +59,6 @@ static void upload_sf_vp(struct brw_context *brw) /* _NEW_VIEWPORT */ - const GLfloat *v = ctx->Viewport._WindowMap.m; - sfv.viewport.m00 = v[MAT_SX]; sfv.viewport.m11 = v[MAT_SY] * y_scale; sfv.viewport.m22 = v[MAT_SZ] * depth_scale; diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index df839c5b30..81b0a45998 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -52,7 +52,6 @@ const struct brw_tracked_state brw_cc_vp; const struct brw_tracked_state brw_check_fallback; const struct brw_tracked_state brw_clip_prog; const struct brw_tracked_state brw_clip_unit; -const struct brw_tracked_state brw_constant_buffer_state; const struct brw_tracked_state brw_constant_buffer; const struct brw_tracked_state brw_curbe_offsets; const struct brw_tracked_state brw_invarient_state; diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index dc87859f3f..811940edc0 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -97,8 +97,6 @@ void brw_clear_batch_cache_flush( struct brw_context *brw ) { clear_batch_cache(brw); -/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ - brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index b28c57c2bc..5d332d010c 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -162,6 +162,14 @@ static void brw_debug_prog(const char *name, dri_bo *prog) fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", name, (unsigned int)prog->offset + i * 4 * 4, data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); + /* Stop at the end of the program. It'd be nice to keep track of the actual + * intended program size instead of guessing like this. + */ + if (data[i * 4 + 0] == 0 && + data[i * 4 + 1] == 0 && + data[i * 4 + 2] == 0 && + data[i * 4 + 3] == 0) + break; } dri_bo_unmap(prog); diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index d97ff27f0a..89e2981203 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -439,7 +439,7 @@ struct brw_urb_fence } bits1; }; -struct brw_constant_buffer_state /* previously brw_command_streamer */ +struct brw_cs_urb_state { struct header header; diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index 9977677fd7..d29eb17f8c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -170,8 +170,8 @@ static void calc_wm_input_sizes( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; /* BRW_NEW_VERTEX_PROGRAM */ - struct brw_vertex_program *vp = - (struct brw_vertex_program *)brw->vertex_program; + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); /* BRW_NEW_INPUT_DIMENSIONS */ struct tracker t; GLuint insn; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 24b7dc30fe..3807dff991 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -156,6 +156,12 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; c->prog_data.total_grf = reg; + + if (INTEL_DEBUG & DEBUG_VS) { + _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); + _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); + _mesa_printf("%s reg = %d\n", __FUNCTION__, reg); + } } @@ -658,7 +664,7 @@ static void emit_nrm( struct brw_vs_compile *c, /* TODO: relative addressing! */ static struct brw_reg get_reg( struct brw_vs_compile *c, - GLuint file, + gl_register_file file, GLuint index ) { @@ -954,36 +960,27 @@ static void emit_vertex_write( struct brw_vs_compile *c) } +/** + * Called after code generation to resolve subroutine calls and the + * END instruction. + * \param end_inst points to brw code for END instruction + * \param last_inst points to last instruction emitted before vertex write + */ static void -post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) +post_vs_emit( struct brw_vs_compile *c, + struct brw_instruction *end_inst, + struct brw_instruction *last_inst ) { - GLuint nr_insns = c->vp->program.Base.NumInstructions; - GLuint insn, target_insn; - struct prog_instruction *inst1, *inst2; - struct brw_instruction *brw_inst1, *brw_inst2; - int offset; - for (insn = 0; insn < nr_insns; insn++) { - inst1 = &c->vp->program.Base.Instructions[insn]; - brw_inst1 = inst1->Data; - switch (inst1->Opcode) { - case OPCODE_CAL: - case OPCODE_BRA: - target_insn = inst1->BranchTarget; - inst2 = &c->vp->program.Base.Instructions[target_insn]; - brw_inst2 = inst2->Data; - offset = brw_inst2 - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - case OPCODE_END: - offset = end_inst - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - default: - break; - } - } + GLint offset; + + brw_resolve_cals(&c->func); + + /* patch up the END code to jump past subroutines, etc */ + offset = last_inst - end_inst; + brw_set_src1(end_inst, brw_imm_d(offset * 16)); } + /* Emit the fragment program instructions here. */ void brw_vs_emit(struct brw_vs_compile *c ) @@ -992,7 +989,8 @@ void brw_vs_emit(struct brw_vs_compile *c ) struct brw_compile *p = &c->func; GLuint nr_insns = c->vp->program.Base.NumInstructions; GLuint insn, if_insn = 0; - struct brw_instruction *end_inst; + GLuint end_offset = 0; + struct brw_instruction *end_inst, *last_inst; struct brw_instruction *if_inst[MAX_IFSN]; struct brw_indirect stack_index = brw_indirect(0, 0); @@ -1035,7 +1033,6 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* Get argument regs. SWZ is special and does this itself. */ - inst->Data = &p->store[p->nr_insn]; if (inst->Opcode != OPCODE_SWZ) for (i = 0; i < 3; i++) { struct prog_src_register *src = &inst->SrcReg[i]; @@ -1203,7 +1200,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_set_access_mode(p, BRW_ALIGN_16); brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(4)); - inst->Data = &p->store[p->nr_insn]; + brw_save_call(p, inst->Comment, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; case OPCODE_RET: @@ -1212,14 +1209,23 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); brw_set_access_mode(p, BRW_ALIGN_16); + break; case OPCODE_END: + end_offset = p->nr_insn; + /* this instruction will get patched later to jump past subroutine + * code, etc. + */ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; case OPCODE_PRINT: + /* no-op */ + break; case OPCODE_BGNSUB: + brw_save_label(p, inst->Comment, p->nr_insn); + break; case OPCODE_ENDSUB: - /* no-op instructions */ - break; + /* no-op */ + break; default: _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader", inst->Opcode, inst->Opcode < MAX_OPCODE ? @@ -1257,9 +1263,11 @@ void brw_vs_emit(struct brw_vs_compile *c ) release_tmps(c); } - end_inst = &p->store[p->nr_insn]; + end_inst = &p->store[end_offset]; + last_inst = &p->store[p->nr_insn]; + + /* The END instruction will be patched to jump to this code */ emit_vertex_write(c); - post_vs_emit(c, end_inst); - for (insn = 0; insn < nr_insns; insn++) - c->vp->program.Base.Instructions[insn].Data = NULL; + + post_vs_emit(c, end_inst, last_inst); } diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index b501a59ccd..960bbb311e 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -67,11 +67,13 @@ static void brw_destroy_context( struct intel_context *intel ) brw_destroy_state(brw); brw_draw_destroy( brw ); + _mesa_free(brw->wm.compile_data); + brw_FrameBufferTexDestroy( brw ); - for (i = 0; i < brw->state.nr_draw_regions; i++) - intel_region_release(&brw->state.draw_regions[i]); - brw->state.nr_draw_regions = 0; + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); + brw->state.nr_color_regions = 0; intel_region_release(&brw->state.depth_region); dri_bo_release(&brw->curbe.curbe_bo); @@ -90,6 +92,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->wm.bind_bo); for (i = 0; i < BRW_WM_MAX_SURF; i++) dri_bo_release(&brw->wm.surf_bo[i]); + dri_bo_release(&brw->wm.sampler_bo); dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.state_bo); dri_bo_release(&brw->cc.prog_bo); @@ -102,25 +105,25 @@ static void brw_destroy_context( struct intel_context *intel ) * called from intelDrawBuffer() */ static void brw_set_draw_region( struct intel_context *intel, - struct intel_region *draw_regions[], + struct intel_region *color_regions[], struct intel_region *depth_region, - GLuint num_regions) + GLuint num_color_regions) { struct brw_context *brw = brw_context(&intel->ctx); - int i; + GLuint i; /* release old color/depth regions */ if (brw->state.depth_region != depth_region) brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; - for (i = 0; i < brw->state.nr_draw_regions; i++) - intel_region_release(&brw->state.draw_regions[i]); + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); intel_region_release(&brw->state.depth_region); /* reference new color/depth regions */ - for (i = 0; i < num_regions; i++) - intel_region_reference(&brw->state.draw_regions[i], draw_regions[i]); + for (i = 0; i < num_color_regions; i++) + intel_region_reference(&brw->state.color_regions[i], color_regions[i]); intel_region_reference(&brw->state.depth_region, depth_region); - brw->state.nr_draw_regions = num_regions; + brw->state.nr_color_regions = num_color_regions; } @@ -181,23 +184,6 @@ static void brw_note_unlock( struct intel_context *intel ) } -void brw_do_flush( struct brw_context *brw, GLuint flags ) -{ - struct brw_mi_flush flush; - memset(&flush, 0, sizeof(flush)); - flush.opcode = CMD_MI_FLUSH; - flush.flags = flags; - BRW_BATCH_STRUCT(brw, &flush); -} - - -static void brw_emit_flush( struct intel_context *intel, GLuint unused ) -{ - brw_do_flush(brw_context(&intel->ctx), - BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); -} - - /* called from intelWaitForIdle() and intelFlush() * * For now, just flush everything. Could be smarter later. @@ -234,6 +220,5 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.destroy = brw_destroy_context; brw->intel.vtbl.set_draw_region = brw_set_draw_region; brw->intel.vtbl.flush_cmd = brw_flush_cmd; - brw->intel.vtbl.emit_flush = brw_emit_flush; brw->intel.vtbl.debug_batch = brw_debug_batch; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index ea708a0681..1645ca0b70 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -80,6 +80,53 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) } +/** + * Do GPU code generation for non-GLSL shader. non-GLSL shaders have + * no flow control instructions so we can more readily do SSA-style + * optimizations. + */ +static void +brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + /* Augment fragment program. Add instructions for pre- and + * post-fragment-program tasks such as interpolation and fogging. + */ + brw_wm_pass_fp(c); + + /* Translate to intermediate representation. Build register usage + * chains. + */ + brw_wm_pass0(c); + + /* Dead code removal. + */ + brw_wm_pass1(c); + + /* Register allocation. + */ + c->grf_limit = BRW_WM_MAX_GRF / 2; + + brw_wm_pass2(c); + + c->prog_data.total_grf = c->max_wm_grf; + if (c->last_scratch) { + c->prog_data.total_scratch = c->last_scratch + 0x40; + } + else { + c->prog_data.total_scratch = 0; + } + + /* Emit GEN4 code. + */ + brw_wm_emit(c); +} + + +/** + * All Mesa program -> GPU code generation goes through this function. + * Depending on the instructions used (i.e. flow control instructions) + * we'll use one of two code generators. + */ static void do_wm_prog( struct brw_context *brw, struct brw_fragment_program *fp, struct brw_wm_prog_key *key) @@ -90,52 +137,32 @@ static void do_wm_prog( struct brw_context *brw, c = brw->wm.compile_data; if (c == NULL) { - brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); - c = brw->wm.compile_data; + brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); + c = brw->wm.compile_data; } else { - memset(c, 0, sizeof(*brw->wm.compile_data)); + memset(c, 0, sizeof(*brw->wm.compile_data)); } memcpy(&c->key, key, sizeof(*key)); c->fp = fp; c->env_param = brw->intel.ctx.FragmentProgram.Parameters; - brw_init_compile(brw, &c->func); - if (brw_wm_is_glsl(&c->fp->program)) { - brw_wm_glsl_emit(brw, c); - } else { - /* Augment fragment program. Add instructions for pre- and - * post-fragment-program tasks such as interpolation and fogging. - */ - brw_wm_pass_fp(c); - - /* Translate to intermediate representation. Build register usage - * chains. - */ - brw_wm_pass0(c); - - /* Dead code removal. - */ - brw_wm_pass1(c); - - /* Register allocation. - */ - c->grf_limit = BRW_WM_MAX_GRF/2; - - brw_wm_pass2(c); - - c->prog_data.total_grf = c->max_wm_grf; - if (c->last_scratch) { - c->prog_data.total_scratch = - c->last_scratch + 0x40; - } else { - c->prog_data.total_scratch = 0; - } - - /* Emit GEN4 code. - */ - brw_wm_emit(c); + brw_init_compile(brw, &c->func); + + /* temporary sanity check assertion */ + ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + + /* + * Shader which use GLSL features such as flow control are handled + * differently from "simple" shaders. + */ + if (fp->isGLSL) { + brw_wm_glsl_emit(brw, c); } + else { + brw_wm_non_glsl_emit(brw, c); + } + if (INTEL_DEBUG & DEBUG_WM) fprintf(stderr, "\n"); @@ -159,7 +186,7 @@ static void brw_wm_populate_key( struct brw_context *brw, { GLcontext *ctx = &brw->intel.ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ - struct brw_fragment_program *fp = + const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; GLuint lookup = 0; GLuint line_aa; @@ -174,7 +201,7 @@ static void brw_wm_populate_key( struct brw_context *brw, ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; - if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPR)) + if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH)) lookup |= IZ_PS_COMPUTES_DEPTH_BIT; /* _NEW_DEPTH */ @@ -186,7 +213,7 @@ static void brw_wm_populate_key( struct brw_context *brw, lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; /* _NEW_STENCIL */ - if (ctx->Stencil.Enabled) { + if (ctx->Stencil._Enabled) { lookup |= IZ_STENCIL_TEST_ENABLE_BIT; if (ctx->Stencil.WriteMask[0] || @@ -278,10 +305,8 @@ static void brw_wm_populate_key( struct brw_context *brw, key->drawable_height = brw->intel.driDrawable->h; } - /* Extra info: - */ + /* The unique fragment program ID */ key->program_string_id = fp->id; - } @@ -305,8 +330,6 @@ static void brw_prepare_wm_prog(struct brw_context *brw) } -/* See brw_wm.c: - */ const struct brw_tracked_state brw_wm_prog = { .dirty = { .mesa = (_NEW_COLOR | diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 0f46a25b1a..7f0e5702f2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -143,13 +143,12 @@ struct brw_wm_instruction { GLuint writemask:4; GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ + GLuint tex_shadow:1; /* do shadow comparison? */ GLuint eot:1; /* End of thread indicator for FB_WRITE*/ GLuint target:10; /* target binding table index for FB_WRITE*/ }; -#define PROGRAM_INTERNAL_PARAM - #define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) #define BRW_WM_MAX_GRF 128 /* hardware limit */ #define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) @@ -240,13 +239,15 @@ struct brw_wm_compile { GLuint max_wm_grf; GLuint last_scratch; + /** Mapping from Mesa registers to hardware registers */ struct { GLboolean inited; struct brw_reg reg; } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + struct brw_reg stack; struct brw_reg emit_mask_reg; - GLuint reg_index; + GLuint reg_index; /**< Index of next free GRF register */ GLuint tmp_regs[BRW_WM_MAX_GRF]; GLuint tmp_index; GLuint tmp_max; @@ -281,4 +282,6 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); + + #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index b5050a3e40..f2dca9caa6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -671,7 +671,6 @@ static void emit_tex( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; GLuint msgLength, responseLength; - GLboolean shadow = (c->key.shadowtex_mask & (1<<inst->tex_unit)) ? 1 : 0; GLuint i, nr; GLuint emit; @@ -693,7 +692,7 @@ static void emit_tex( struct brw_wm_compile *c, break; } - if (shadow) { + if (inst->tex_shadow) { nr = 4; emit |= WRITEMASK_W; } @@ -718,7 +717,7 @@ static void emit_tex( struct brw_wm_compile *c, inst->tex_unit + MAX_DRAW_BUFFERS, /* surface */ inst->tex_unit, /* sampler */ inst->writemask, - (shadow ? + (inst->tex_shadow ? BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE : BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE), responseLength, @@ -886,6 +885,9 @@ static void emit_aa( struct brw_wm_compile *c, /* Post-fragment-program processing. Send the results to the * framebuffer. + * \param arg0 the fragment color + * \param arg1 the pass-through depth value + * \param arg2 the shader-computed depth value */ static void emit_fb_write( struct brw_wm_compile *c, struct brw_reg *arg0, diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index ea3f3fc678..533be3858e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -129,7 +129,7 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx) reg.Index = idx; reg.WriteMask = WRITEMASK_XYZW; reg.RelAddr = 0; - reg.CondMask = 0; + reg.CondMask = COND_TR; reg.CondSwizzle = 0; reg.CondSrc = 0; reg.pad = 0; @@ -183,16 +183,16 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c, { struct prog_instruction *inst = get_fp_inst(c); *inst = *inst0; - inst->Data = (void *)inst0; return inst; } -static struct prog_instruction * emit_op(struct brw_wm_compile *c, +static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, GLuint op, struct prog_dst_register dest, GLuint saturate, GLuint tex_src_unit, GLuint tex_src_target, + GLuint tex_shadow, struct prog_src_register src0, struct prog_src_register src1, struct prog_src_register src2 ) @@ -206,6 +206,7 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, inst->SaturateMode = saturate; inst->TexSrcUnit = tex_src_unit; inst->TexSrcTarget = tex_src_target; + inst->TexShadow = tex_shadow; inst->SrcReg[0] = src0; inst->SrcReg[1] = src1; inst->SrcReg[2] = src2; @@ -213,6 +214,20 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, } +static struct prog_instruction * emit_op(struct brw_wm_compile *c, + GLuint op, + struct prog_dst_register dest, + GLuint saturate, + struct prog_src_register src0, + struct prog_src_register src1, + struct prog_src_register src2 ) +{ + return emit_tex_op(c, op, dest, saturate, + 0, 0, 0, /* tex unit, target, shadow */ + src0, src1, src2); +} + + /*********************************************************************** @@ -234,7 +249,7 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) emit_op(c, WM_PIXELXY, dst_mask(pixel_xy, WRITEMASK_XY), - 0, 0, 0, + 0, payload_r0_depth, src_undef(), src_undef()); @@ -257,7 +272,7 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) emit_op(c, WM_DELTAXY, dst_mask(delta_xy, WRITEMASK_XY), - 0, 0, 0, + 0, pixel_xy, payload_r0_depth, src_undef()); @@ -274,14 +289,13 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) struct prog_dst_register pixel_w = get_temp(c); struct prog_src_register deltas = get_delta_xy(c); struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); - - + /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x */ emit_op(c, WM_PIXELW, dst_mask(pixel_w, WRITEMASK_W), - 0, 0, 0, + 0, interp_wpos, deltas, src_undef()); @@ -316,7 +330,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, WM_WPOSXY, dst_mask(dst, WRITEMASK_XY), - 0, 0, 0, + 0, get_pixel_xy(c), src_undef(), src_undef()); @@ -328,7 +342,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, WM_LINTERP, dst, - 0, 0, 0, + 0, interp, deltas, arg2); @@ -339,7 +353,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, WM_CINTERP, dst, - 0, 0, 0, + 0, interp, src_undef(), src_undef()); @@ -348,7 +362,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, WM_LINTERP, dst, - 0, 0, 0, + 0, interp, deltas, src_undef()); @@ -358,7 +372,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, WM_PINTERP, dst, - 0, 0, 0, + 0, interp, deltas, get_pixel_w(c)); @@ -378,7 +392,7 @@ static void emit_ddx( struct brw_wm_compile *c, emit_op(c, OPCODE_DDX, inst->DstReg, - 0, 0, 0, + 0, interp, get_pixel_w(c), src_undef()); @@ -394,7 +408,7 @@ static void emit_ddy( struct brw_wm_compile *c, emit_op(c, OPCODE_DDY, inst->DstReg, - 0, 0, 0, + 0, interp, get_pixel_w(c), src_undef()); @@ -489,13 +503,12 @@ static void precalc_dst( struct brw_wm_compile *c, emit_op(c, OPCODE_MUL, dst_mask(dst, WRITEMASK_Y), - inst->SaturateMode, 0, 0, + inst->SaturateMode, src0, src1, src_undef()); } - if (dst.WriteMask & WRITEMASK_XZ) { struct prog_instruction *swz; GLuint z = GET_SWZ(src0.Swizzle, Z); @@ -505,7 +518,7 @@ static void precalc_dst( struct brw_wm_compile *c, swz = emit_op(c, OPCODE_SWZ, dst_mask(dst, WRITEMASK_XZ), - inst->SaturateMode, 0, 0, + inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), src_undef(), src_undef()); @@ -518,7 +531,7 @@ static void precalc_dst( struct brw_wm_compile *c, emit_op(c, OPCODE_MOV, dst_mask(dst, WRITEMASK_W), - inst->SaturateMode, 0, 0, + inst->SaturateMode, src1, src_undef(), src_undef()); @@ -540,7 +553,7 @@ static void precalc_lit( struct brw_wm_compile *c, swz = emit_op(c, OPCODE_SWZ, dst_mask(dst, WRITEMASK_XW), - 0, 0, 0, + 0, src_swizzle1(src0, SWIZZLE_ONE), src_undef(), src_undef()); @@ -548,12 +561,11 @@ static void precalc_lit( struct brw_wm_compile *c, swz->SrcReg[0].NegateBase = 0; } - if (dst.WriteMask & WRITEMASK_YZ) { emit_op(c, OPCODE_LIT, dst_mask(dst, WRITEMASK_YZ), - inst->SaturateMode, 0, 0, + inst->SaturateMode, src0, src_undef(), src_undef()); @@ -589,7 +601,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmpcoord = src0 (i.e.: coord = src0) */ out = emit_op(c, OPCODE_MOV, tmpcoord, - 0, 0, 0, + 0, src0, src_undef(), src_undef()); @@ -599,7 +611,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp0 = MAX(coord.X, coord.Y) */ emit_op(c, OPCODE_MAX, tmp0, - 0, 0, 0, + 0, src_swizzle1(coord, X), src_swizzle1(coord, Y), src_undef()); @@ -607,7 +619,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp1 = MAX(tmp0, coord.Z) */ emit_op(c, OPCODE_MAX, tmp1, - 0, 0, 0, + 0, tmp0src, src_swizzle1(coord, Z), src_undef()); @@ -615,7 +627,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp0 = 1 / tmp1 */ emit_op(c, OPCODE_RCP, tmp0, - 0, 0, 0, + 0, tmp1src, src_undef(), src_undef()); @@ -623,7 +635,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmpCoord = src0 * tmp0 */ emit_op(c, OPCODE_MUL, tmpcoord, - 0, 0, 0, + 0, src0, tmp0src, src_undef()); @@ -646,7 +658,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_MUL, tmpcoord, - 0, 0, 0, + 0, inst->SrcReg[0], scale, src_undef()); @@ -686,22 +698,23 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp = TEX ... */ - emit_op(c, - OPCODE_TEX, - tmp, - inst->SaturateMode, - unit, - inst->TexSrcTarget, - coord, - src_undef(), - src_undef()); + emit_tex_op(c, + OPCODE_TEX, + tmp, + inst->SaturateMode, + unit, + inst->TexSrcTarget, + inst->TexShadow, + coord, + src_undef(), + src_undef()); /* tmp.xyz = ADD TMP, C0 */ emit_op(c, OPCODE_ADD, dst_mask(tmp, WRITEMASK_XYZ), - 0, 0, 0, + 0, tmpsrc, C0, src_undef()); @@ -712,7 +725,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_MUL, dst_mask(tmp, WRITEMASK_Y), - 0, 0, 0, + 0, tmpsrc, src_swizzle1(C0, W), src_undef()); @@ -727,7 +740,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_MAD, dst_mask(dst, WRITEMASK_XYZ), - 0, 0, 0, + 0, swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), C1, src_swizzle1(tmpsrc, Y)); @@ -737,7 +750,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_MAD, dst_mask(dst, WRITEMASK_Y), - 0, 0, 0, + 0, src_swizzle1(tmpsrc, Z), src_swizzle1(C1, W), src_swizzle1(src_reg_from_dst(dst), Y)); @@ -746,15 +759,16 @@ static void precalc_tex( struct brw_wm_compile *c, } else { /* ordinary RGBA tex instruction */ - emit_op(c, - OPCODE_TEX, - inst->DstReg, - inst->SaturateMode, - unit, - inst->TexSrcTarget, - coord, - src_undef(), - src_undef()); + emit_tex_op(c, + OPCODE_TEX, + inst->DstReg, + inst->SaturateMode, + unit, + inst->TexSrcTarget, + inst->TexShadow, + coord, + src_undef(), + src_undef()); } /* For GL_EXT_texture_swizzle: */ @@ -764,7 +778,6 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, OPCODE_SWZ, inst->DstReg, SATURATE_OFF, /* saturate already done above */ - 0, 0, /* tex unit, target N/A */ src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), src_undef(), src_undef()); @@ -813,7 +826,7 @@ static void precalc_txp( struct brw_wm_compile *c, emit_op(c, OPCODE_RCP, dst_mask(tmp, WRITEMASK_W), - 0, 0, 0, + 0, src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), src_undef(), src_undef()); @@ -823,7 +836,7 @@ static void precalc_txp( struct brw_wm_compile *c, emit_op(c, OPCODE_MUL, dst_mask(tmp, WRITEMASK_XYZ), - 0, 0, 0, + 0, src0, src_swizzle1(src_reg_from_dst(tmp), W), src_undef()); @@ -849,42 +862,41 @@ static void precalc_txp( struct brw_wm_compile *c, static void emit_fb_write( struct brw_wm_compile *c ) { struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPR); + struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); struct prog_src_register outcolor; GLuint i; struct prog_instruction *inst, *last_inst; struct brw_context *brw = c->func.brw; - /* inst->Sampler is not used by backend, - use it for fb write target and eot */ - - if (brw->state.nr_draw_regions > 1) { - for (i = 0 ; i < brw->state.nr_draw_regions; i++) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); - last_inst = inst = emit_op(c, - WM_FB_WRITE, dst_mask(dst_undef(),0), 0, 0, 0, - outcolor, payload_r0_depth, outdepth); - inst->Sampler = (i<<1); - if (c->fp_fragcolor_emitted) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); - last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, 0, 0, outcolor, payload_r0_depth, outdepth); - inst->Sampler = (i<<1); - } - } - last_inst->Sampler |= 1; //eot + /* The inst->Aux field is used for FB write target and the EOT marker */ + + if (brw->state.nr_color_regions > 1) { + for (i = 0 ; i < brw->state.nr_color_regions; i++) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); + last_inst = inst = emit_op(c, + WM_FB_WRITE, dst_mask(dst_undef(),0), 0, + outcolor, payload_r0_depth, outdepth); + inst->Aux = (i<<1); + if (c->fp_fragcolor_emitted) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = (i<<1); + } + } + last_inst->Aux |= 1; //eot } else { /* if gl_FragData[0] is written, use it, else use gl_FragColor */ if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); else - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLR); + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, 0, 0, outcolor, payload_r0_depth, outdepth); - inst->Sampler = 1|(0<<1); + inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = 1|(0<<1); } } @@ -915,9 +927,9 @@ static void validate_dst_regs( struct brw_wm_compile *c, const struct prog_instruction *inst ) { if (inst->DstReg.File == PROGRAM_OUTPUT) { - GLuint idx = inst->DstReg.Index; - if (idx == FRAG_RESULT_COLR) - c->fp_fragcolor_emitted = 1; + GLuint idx = inst->DstReg.Index; + if (idx == FRAG_RESULT_COLOR) + c->fp_fragcolor_emitted = 1; } } @@ -937,11 +949,15 @@ static void print_insns( const struct prog_instruction *insn, 3); } else - _mesa_printf("UNKNOWN\n"); - + _mesa_printf("965 Opcode %d\n", insn->Opcode); } } + +/** + * Initial pass for fragment program code generation. + * This function is used by both the GLSL and non-GLSL paths. + */ void brw_wm_pass_fp( struct brw_wm_compile *c ) { struct brw_fragment_program *fp = c->fp; @@ -958,15 +974,19 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) c->pixel_w = src_undef(); c->nr_fp_insns = 0; - /* Emit preamble instructions: + /* Emit preamble instructions. This is where special instructions such as + * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to + * compute shader inputs from varying vars. */ - - for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; validate_src_regs(c, inst); validate_dst_regs(c, inst); } + + /* Loop over all instructions doing assorted simplifications and + * transformations. + */ for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; struct prog_instruction *out; @@ -975,7 +995,6 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) * necessary: */ - switch (inst->Opcode) { case OPCODE_SWZ: out = emit_insn(c, inst); @@ -1055,9 +1074,9 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) } if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pass_fp:\n"); - print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); + _mesa_printf("pass_fp:\n"); + print_insns( c->prog_instructions, c->nr_fp_insns ); + _mesa_printf("\n"); } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 8fd776ac39..4cf092226c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -8,12 +8,17 @@ enum _subroutine { SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 }; -/* Only guess, need a flag in gl_fragment_program later */ + +/** + * Determine if the given fragment program uses GLSL features such + * as flow conditionals, loops, subroutines. + * Some GLSL shaders may use these features, others might not. + */ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) { int i; for (i = 0; i < fp->Base.NumInstructions; i++) { - struct prog_instruction *inst = &fp->Base.Instructions[i]; + const struct prog_instruction *inst = &fp->Base.Instructions[i]; switch (inst->Opcode) { case OPCODE_IF: case OPCODE_TRUNC: @@ -36,6 +41,10 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) return GL_FALSE; } + +/** + * Record the mapping of a Mesa register to a hardware register. + */ static void set_reg(struct brw_wm_compile *c, int file, int index, int component, struct brw_reg reg) { @@ -43,6 +52,10 @@ static void set_reg(struct brw_wm_compile *c, int file, int index, c->wm_regs[file][index][component].inited = GL_TRUE; } +/** + * Examine instruction's write mask to find index of first component + * enabled for writing. + */ static int get_scalar_dst_index(struct prog_instruction *inst) { int i; @@ -62,6 +75,10 @@ static struct brw_reg alloc_tmp(struct brw_wm_compile *c) return reg; } +/** + * Save current temp register info. + * There must be a matching call to release_tmps(). + */ static int mark_tmps(struct brw_wm_compile *c) { return c->tmp_index; @@ -77,8 +94,22 @@ static void release_tmps(struct brw_wm_compile *c, int mark) c->tmp_index = mark; } +/** + * Convert Mesa src register to brw register. + * + * Since we're running in SOA mode each Mesa register corresponds to four + * hardware registers. We allocate the hardware registers as needed here. + * + * \param file register file, one of PROGRAM_x + * \param index register number + * \param component src component (X=0, Y=1, Z=2, W=3) + * \param nr not used?!? + * \param neg negate value? + * \param abs take absolute value? + */ static struct brw_reg -get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GLuint neg, GLuint abs) +get_reg(struct brw_wm_compile *c, int file, int index, int component, + int nr, GLuint neg, GLuint abs) { struct brw_reg reg; switch (file) { @@ -89,21 +120,46 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL break; case PROGRAM_UNDEFINED: return brw_null_reg(); - default: + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + case PROGRAM_PAYLOAD: break; + default: + _mesa_problem(NULL, "Unexpected file in get_reg()"); + return brw_null_reg(); } - if(c->wm_regs[file][index][component].inited) + /* see if we've already allocated a HW register for this Mesa register */ + if (c->wm_regs[file][index][component].inited) { + /* yes, re-use */ reg = c->wm_regs[file][index][component].reg; - else + } + else { + /* no, allocate new register */ reg = brw_vec8_grf(c->reg_index, 0); + } - if(!c->wm_regs[file][index][component].inited) { + /* if this is a new register allocation, record it in the table */ + if (!c->wm_regs[file][index][component].inited) { set_reg(c, file, index, component, reg); c->reg_index++; } - if (neg & (1<< component)) { + if (c->reg_index >= BRW_WM_MAX_GRF - 12) { + /* ran out of temporary registers! */ +#if 1 + /* This is a big hack for now. + * Return bad register index, just don't hang the GPU. + */ + _mesa_fprintf(stderr, "out of regs %d\n", c->reg_index); + c->reg_index = BRW_WM_MAX_GRF - 13; +#else + return brw_null_reg(); +#endif + } + + if (neg & (1 << component)) { reg = negate(reg); } if (abs) @@ -111,6 +167,12 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, int nr, GL return reg; } + +/** + * Preallocate registers. This sets up the Mesa to hardware register + * mapping for certain registers, such as constants (uniforms/state vars) + * and shader inputs. + */ static void prealloc_reg(struct brw_wm_compile *c) { int i, j; @@ -119,29 +181,42 @@ static void prealloc_reg(struct brw_wm_compile *c) GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted | c->fp_deriv_emitted; for (i = 0; i < 4; i++) { - reg = (i < c->key.nr_depth_regs) - ? brw_vec8_grf(i*2, 0) : brw_vec8_grf(0, 0); + if (i < c->key.nr_depth_regs) + reg = brw_vec8_grf(i * 2, 0); + else + reg = brw_vec8_grf(0, 0); set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); } - c->reg_index += 2*c->key.nr_depth_regs; + c->reg_index += 2 * c->key.nr_depth_regs; + + /* constants */ { - int nr_params = c->fp->program.Base.Parameters->NumParameters; - struct gl_program_parameter_list *plist = + const int nr_params = c->fp->program.Base.Parameters->NumParameters; + const struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; int index = 0; - c->prog_data.nr_params = 4*nr_params; + + /* number of float constants */ + c->prog_data.nr_params = 4 * nr_params; + + /* loop over program constants (float[4]) */ for (i = 0; i < nr_params; i++) { - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index/8, - index%8); - c->prog_data.param[index] = - &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + /* loop over XYZW channels */ + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + /* Save pointer to parameter/constant value. + * Constants will be copied in prepare_constant_buffer() + */ + c->prog_data.param[index] = &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); } } - c->nr_creg = 2*((4*nr_params+15)/16); + /* number of constant regs used (each reg is float[8]) */ + c->nr_creg = 2 * ((4 * nr_params + 15) / 16); c->reg_index += c->nr_creg; } + + /* fragment shader inputs */ for (i = 0; i < FRAG_ATTRIB_MAX; i++) { if (inputs & (1<<i)) { nr_interp_regs++; @@ -149,9 +224,9 @@ static void prealloc_reg(struct brw_wm_compile *c) for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, i, j, reg); c->reg_index += 2; - } } + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; c->prog_data.urb_read_length = nr_interp_regs * 2; c->prog_data.curb_read_length = c->nr_creg; @@ -161,6 +236,10 @@ static void prealloc_reg(struct brw_wm_compile *c) c->reg_index += 2; } + +/** + * Convert Mesa dst register to brw register. + */ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, struct prog_instruction *inst, int component, int nr) { @@ -168,6 +247,10 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, 0, 0); } + +/** + * Convert Mesa src register to brw register. + */ static struct brw_reg get_src_reg(struct brw_wm_compile *c, struct prog_src_register *src, int index, int nr) { @@ -176,13 +259,15 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, src->NegateBase, src->Abs); } -/* Subroutines are minimal support for resusable instruction sequences. - They are implemented as simply as possible to minimise overhead: there - is no explicit support for communication between the caller and callee - other than saving the return address in a temporary register, nor is - there any automatic local storage. This implies that great care is - required before attempting reentrancy or any kind of nested - subroutine invocations. */ +/** + * Subroutines are minimal support for resusable instruction sequences. + * They are implemented as simply as possible to minimise overhead: there + * is no explicit support for communication between the caller and callee + * other than saving the return address in a temporary register, nor is + * there any automatic local storage. This implies that great care is + * required before attempting reentrancy or any kind of nested + * subroutine invocations. + */ static void invoke_subroutine( struct brw_wm_compile *c, enum _subroutine subroutine, void (*emit)( struct brw_wm_compile * ) ) @@ -319,11 +404,10 @@ static void emit_pixel_xy(struct brw_wm_compile *c, stride(suboffset(r1_uw, 5), 2, 4, 0), brw_imm_v(0x11001100)); } - } static void emit_delta_xy(struct brw_wm_compile *c, - struct prog_instruction *inst) + struct prog_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg dst0, dst1, src0, src1; @@ -351,10 +435,8 @@ static void emit_delta_xy(struct brw_wm_compile *c, negate(suboffset(r1,1))); } - } - static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, @@ -397,33 +479,59 @@ static void emit_fb_write(struct brw_wm_compile *c, */ if (c->key.aa_dest_stencil_reg) nr += 1; - { - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); + + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + src0 = get_src_reg(c, &inst->SrcReg[0], channel, 1); + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); - if (c->key.source_depth_to_render_target) - { - if (c->key.computes_depth) { - src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } else { - src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } - - nr += 2; + if (c->key.source_depth_to_render_target) { + if (c->key.computes_depth) { + src0 = get_src_reg(c, &inst->SrcReg[2], 2, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } + else { + src0 = get_src_reg(c, &inst->SrcReg[1], 1, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } + + nr += 2; + } + + if (c->key.dest_depth_reg) { + GLuint comp = c->key.dest_depth_reg / 2; + GLuint off = c->key.dest_depth_reg % 2; + + assert(comp == 1); + assert(off == 0); +#if 0 + /* XXX do we need this code? comp always 1, off always 0, it seems */ + if (off != 0) { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + /* 2nd half? */ + brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_pop_insn_state(p); + } + else +#endif + { + struct brw_reg src = get_src_reg(c, &inst->SrcReg[1], 1, 1); + brw_MOV(p, brw_message_reg(nr), src); + } + nr += 2; } - target = inst->Sampler >> 1; - eot = inst->Sampler & 1; + + target = inst->Aux >> 1; + eot = inst->Aux & 1; fire_fb_write(c, 0, nr, target, eot); } @@ -465,12 +573,12 @@ static void emit_linterp(struct brw_wm_compile *c, struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0; + GLuint nr, i; src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); - GLuint nr = src0.nr; - int i; + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -494,10 +602,10 @@ static void emit_cinterp(struct brw_wm_compile *c, struct brw_reg interp[4]; struct brw_reg dst, src0; + GLuint nr, i; src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - GLuint nr = src0.nr; - int i; + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -521,13 +629,13 @@ static void emit_pinterp(struct brw_wm_compile *c, struct brw_reg interp[4]; struct brw_reg dst, delta0, delta1; struct brw_reg src0, w; + GLuint nr, i; src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); delta0 = get_src_reg(c, &inst->SrcReg[1], 0, 1); delta1 = get_src_reg(c, &inst->SrcReg[1], 1, 1); w = get_src_reg(c, &inst->SrcReg[2], 3, 1); - GLuint nr = src0.nr; - int i; + nr = src0.nr; interp[0] = brw_vec1_grf(nr, 0); interp[1] = brw_vec1_grf(nr, 4); @@ -627,23 +735,46 @@ static void emit_dph(struct brw_wm_compile *c, brw_set_saturate(p, 0); } +/** + * Emit a scalar instruction, like RCP, RSQ, LOG, EXP. + * Note that the result of the function is smeared across the dest + * register's X, Y, Z and W channels (subject to writemasking of course). + */ static void emit_math1(struct brw_wm_compile *c, struct prog_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; - struct brw_reg src0, dst; + struct brw_reg src0, dst, tmp; + const int mark = mark_tmps( c ); + int i; + + tmp = alloc_tmp(c); + /* Get first component of source register */ src0 = get_src_reg(c, &inst->SrcReg[0], 0, 1); - dst = get_dst_reg(c, inst, get_scalar_dst_index(inst), 1); + + /* tmp = func(src0) */ brw_MOV(p, brw_message_reg(2), src0); brw_math(p, - dst, - func, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); + tmp, + func, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + /*tmp.dw1.bits.swizzle = SWIZZLE_XXXX;*/ + + /* replicate tmp value across enabled dest channels */ + for (i = 0; i < 4; i++) { + if (inst->DstReg.WriteMask & (1 << i)) { + dst = get_dst_reg(c, inst, i, 1); + brw_MOV(p, dst, tmp); + } + } + + release_tmps(c, mark); } static void emit_rcp(struct brw_wm_compile *c, @@ -1045,23 +1176,23 @@ static void emit_ddy(struct brw_wm_compile *c, brw_set_saturate(p, 0); } -static __inline struct brw_reg high_words( struct brw_reg reg ) +static INLINE struct brw_reg high_words( struct brw_reg reg ) { return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ), 0, 8, 2 ); } -static __inline struct brw_reg low_words( struct brw_reg reg ) +static INLINE struct brw_reg low_words( struct brw_reg reg ) { return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 ); } -static __inline struct brw_reg even_bytes( struct brw_reg reg ) +static INLINE struct brw_reg even_bytes( struct brw_reg reg ) { return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 ); } -static __inline struct brw_reg odd_bytes( struct brw_reg reg ) +static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) { return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ), 0, 16, 2 ); @@ -1366,9 +1497,11 @@ static void emit_noise2( struct brw_wm_compile *c, release_tmps( c, mark ); } -/* The three-dimensional case is much like the one- and two- versions above, - but since the number of corners is rapidly growing we now pack 16 16-bit - hashes into each register to extract more parallelism from the EUs. */ +/** + * The three-dimensional case is much like the one- and two- versions above, + * but since the number of corners is rapidly growing we now pack 16 16-bit + * hashes into each register to extract more parallelism from the EUs. + */ static void noise3_sub( struct brw_wm_compile *c ) { struct brw_compile *p = &c->func; @@ -1670,13 +1803,15 @@ static void emit_noise3( struct brw_wm_compile *c, release_tmps( c, mark ); } -/* For the four-dimensional case, the little micro-optimisation benefits - we obtain by unrolling all the loops aren't worth the massive bloat it - now causes. Instead, we loop twice around performing a similar operation - to noise3, once for the w=0 cube and once for the w=1, with a bit more - code to glue it all together. */ -static void noise4_sub( struct brw_wm_compile *c ) { - +/** + * For the four-dimensional case, the little micro-optimisation benefits + * we obtain by unrolling all the loops aren't worth the massive bloat it + * now causes. Instead, we loop twice around performing a similar operation + * to noise3, once for the w=0 cube and once for the w=1, with a bit more + * code to glue it all together. + */ +static void noise4_sub( struct brw_wm_compile *c ) +{ struct brw_compile *p = &c->func; struct brw_reg param[ 4 ], x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ @@ -2244,28 +2379,12 @@ static void emit_tex(struct brw_wm_compile *c, brw_MOV(p, dst[3], brw_imm_f(1.0)); } +/** + * Resolve subroutine calls after code emit is done. + */ static void post_wm_emit( struct brw_wm_compile *c ) { - GLuint nr_insns = c->fp->program.Base.NumInstructions; - GLuint insn, target_insn; - struct prog_instruction *inst1, *inst2; - struct brw_instruction *brw_inst1, *brw_inst2; - int offset; - for (insn = 0; insn < nr_insns; insn++) { - inst1 = &c->fp->program.Base.Instructions[insn]; - brw_inst1 = inst1->Data; - switch (inst1->Opcode) { - case OPCODE_CAL: - target_insn = inst1->BranchTarget; - inst2 = &c->fp->program.Base.Instructions[target_insn]; - brw_inst2 = inst2->Data; - offset = brw_inst2 - brw_inst1; - brw_set_src1(brw_inst1, brw_imm_d(offset*16)); - break; - default: - break; - } - } + brw_resolve_cals(&c->func); } static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) @@ -2285,10 +2404,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) for (i = 0; i < c->nr_fp_insns; i++) { struct prog_instruction *inst = &c->prog_instructions[i]; - struct prog_instruction *orig_inst; - - if ((orig_inst = inst->Data) != 0) - orig_inst->Data = current_insn(p); if (inst->CondUpdate) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); @@ -2446,7 +2561,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_ENDIF(p, if_inst[--if_insn]); break; case OPCODE_BGNSUB: + brw_save_label(p, inst->Comment, p->nr_insn); + break; case OPCODE_ENDSUB: + /* no-op */ break; case OPCODE_CAL: brw_push_insn_state(p); @@ -2456,8 +2574,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_access_mode(p, BRW_ALIGN_16); brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(4)); - orig_inst = inst->Data; - orig_inst->Data = &p->store[p->nr_insn]; + brw_save_call(&c->func, inst->Comment, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); brw_pop_insn_state(p); break; @@ -2510,14 +2627,34 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } post_wm_emit(c); - for (i = 0; i < c->fp->program.Base.NumInstructions; i++) - c->fp->program.Base.Instructions[i].Data = NULL; + + if (c->reg_index >= BRW_WM_MAX_GRF) { + _mesa_problem(NULL, "Ran out of registers in brw_wm_emit_glsl()"); + /* XXX we need to do some proper error recovery here */ + } } + +/** + * Do GPU code generation for shaders that use GLSL features such as + * flow control. Other shaders will be compiled with the + */ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("brw_wm_glsl_emit:\n"); + } + + /* initial instruction translation/simplification */ brw_wm_pass_fp(c); + + /* actual code generation */ brw_wm_emit_glsl(brw, c); + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "brw_wm_glsl_emit done"); + } + c->prog_data.total_grf = c->reg_index; c->prog_data.total_scratch = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 590cd946ec..2debd0678a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -334,8 +334,9 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, } -static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, - const struct prog_instruction *inst ) +static void +translate_insn(struct brw_wm_compile *c, + const struct prog_instruction *inst) { struct brw_wm_instruction *out = get_instruction(c); GLuint writemask = inst->DstReg.WriteMask; @@ -348,8 +349,9 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, out->saturate = (inst->SaturateMode != SATURATE_OFF); out->tex_unit = inst->TexSrcUnit; out->tex_idx = inst->TexSrcTarget; - out->eot = inst->Sampler & 1; - out->target = inst->Sampler>>1; + out->tex_shadow = inst->TexShadow; + out->eot = inst->Aux & 1; + out->target = inst->Aux >> 1; /* Args: */ @@ -365,8 +367,6 @@ static struct brw_wm_instruction *translate_insn( struct brw_wm_compile *c, pass0_set_dst_scalar(c, out, inst, writemask); else pass0_set_dst(c, out, inst, writemask); - - return out; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c index 6eaed8a665..cf031899dd 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c @@ -210,9 +210,10 @@ void brw_wm_pass1( struct brw_wm_compile *c ) break; case OPCODE_TEX: + case OPCODE_TXP: read0 = get_texcoord_mask(inst->tex_idx); - if (c->key.shadowtex_mask & (1<<inst->tex_unit)) + if (inst->tex_shadow) read0 |= WRITEMASK_Z; break; @@ -267,7 +268,6 @@ void brw_wm_pass1( struct brw_wm_compile *c ) break; case OPCODE_DST: - case OPCODE_TXP: default: break; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index b6dac0d698..68a9296a71 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -95,6 +95,7 @@ struct wm_sampler_key { int sampler_count; struct wm_sampler_entry { + GLenum tex_target; GLenum wrap_r, wrap_s, wrap_t; float maxlod, minlod; float lod_bias; @@ -168,19 +169,20 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, } } - sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); - sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); - sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); - - /* Fulsim complains if I don't do this. Hardware doesn't mind: - */ -#if 0 - if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) { + if (key->tex_target == GL_TEXTURE_CUBE_MAP && + (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) { + /* If we're using anything but nearest sampling for a cube map, we + * need to set this wrap mode to avoid GPU lock-ups. + */ sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; } -#endif + else { + sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); + sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); + } /* Set shadow function: */ @@ -234,6 +236,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw, struct gl_texture_image *firstImage = texObj->Image[0][intelObj->firstLevel]; + entry->tex_target = texObj->Target; + entry->wrap_r = texObj->WrapR; entry->wrap_s = texObj->WrapS; entry->wrap_t = texObj->WrapT; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 3c3b3473d6..63fc8a004f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -62,6 +62,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { GLcontext *ctx = &brw->intel.ctx; const struct gl_fragment_program *fp = brw->fragment_program; + const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; struct intel_context *intel = &brw->intel; memset(key, 0, sizeof(*key)); @@ -103,11 +104,14 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* as far as we can tell */ key->computes_depth = - (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) != 0; + (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; - key->is_glsl = brw_wm_is_glsl(fp); + key->is_glsl = bfp->isGLSL; + + /* temporary sanity check assertion */ + ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); /* XXX: This needs a flag to indicate when it changes. */ key->stats_wm = intel->stats_wm; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index d70f9c646c..9b320480b6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -139,7 +139,18 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; case MESA_FORMAT_S8_Z24: - return BRW_SURFACEFORMAT_I24X8_UNORM; + /* XXX: these different surface formats don't seem to + * make any difference for shadow sampler/compares. + */ + if (depth_mode == GL_INTENSITY) + return BRW_SURFACEFORMAT_I24X8_UNORM; + else if (depth_mode == GL_ALPHA) + return BRW_SURFACEFORMAT_A24X8_UNORM; + else + return BRW_SURFACEFORMAT_L24X8_UNORM; + + case MESA_FORMAT_DUDV8: + return BRW_SURFACEFORMAT_R8G8_SNORM; default: assert(0); @@ -381,8 +392,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, * a more restrictive relocation to emit. */ dri_bo_emit_reloc(brw->wm.surf_bo[unit], - I915_GEM_DOMAIN_RENDER | - I915_GEM_DOMAIN_SAMPLER, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0, offsetof(struct brw_surface_state, ss1), @@ -447,13 +457,13 @@ static void prepare_wm_surfaces(struct brw_context *brw ) GLuint i; int old_nr_surfaces; - if (brw->state.nr_draw_regions > 1) { - for (i = 0; i < brw->state.nr_draw_regions; i++) { - brw_update_region_surface(brw, brw->state.draw_regions[i], i, + if (brw->state.nr_color_regions > 1) { + for (i = 0; i < brw->state.nr_color_regions; i++) { + brw_update_region_surface(brw, brw->state.color_regions[i], i, GL_FALSE); } - }else { - brw_update_region_surface(brw, brw->state.draw_regions[0], 0, GL_TRUE); + } else { + brw_update_region_surface(brw, brw->state.color_regions[0], 0, GL_TRUE); } old_nr_surfaces = brw->wm.nr_surfaces; |