diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_context.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_disasm.c | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_eu_emit.c | 20 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_program.c | 14 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_sf.c | 21 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_sf.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_sf_emit.c | 135 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs_emit.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_emit.c | 56 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_glsl.c | 190 | ||||
l--------- | src/mesa/drivers/dri/i965/server/intel_dri.c | 1 |
16 files changed, 258 insertions, 209 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index f98a1a27db..a242580273 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -99,7 +99,6 @@ DRIVER_SOURCES = \ C_SOURCES = \ $(COMMON_SOURCES) \ - $(MINIGLX_SOURCES) \ $(DRIVER_SOURCES) ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index a512896f31..241193c357 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -156,6 +156,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, brw->has_surface_tile_offset = GL_TRUE; brw->has_compr4 = GL_TRUE; brw->has_aa_line_parameters = GL_TRUE; + brw->has_pln = GL_TRUE; } else { brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index d6fc37e4d8..2855c93ea6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -446,6 +446,7 @@ struct brw_context GLboolean has_compr4; GLboolean has_negative_rhw_bug; GLboolean has_aa_line_parameters; + GLboolean has_pln; ; struct { struct brw_state_flags dirty; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index bb1b5f5ef0..984e56d00c 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -550,6 +550,7 @@ #define BRW_OPCODE_DP2 87 #define BRW_OPCODE_DPA2 88 #define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_PLN 90 #define BRW_OPCODE_NOP 126 #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index a8f6b993ac..ad61770212 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -50,6 +50,7 @@ struct { [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, @@ -73,10 +74,10 @@ struct { [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, - [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, - [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 }, [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 39eb88d7c2..8e9117b644 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -795,6 +795,7 @@ ALU2(DPH) ALU2(DP3) ALU2(DP2) ALU2(LINE) +ALU2(PLN) #undef ALU1 #undef ALU2 diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index f69d529613..d2395dec28 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -573,7 +573,7 @@ ALU2(DPH) ALU2(DP3) ALU2(DP2) ALU2(LINE) - +ALU2(PLN) @@ -1290,7 +1290,7 @@ void brw_SAMPLE(struct brw_compile *p, GLuint simd_mode) { GLboolean need_stall = 0; - + if (writemask == 0) { /*printf("%s: zero writemask??\n", __FUNCTION__); */ return; @@ -1327,8 +1327,14 @@ void brw_SAMPLE(struct brw_compile *p, /* printf("need stall %x %x\n", newmask , writemask); */ } else { + GLboolean dispatch_16 = GL_FALSE; + struct brw_reg m1 = brw_message_reg(msg_reg_nr); - + + guess_execution_size(p->current, dest); + if (p->current->header.execution_size == BRW_EXECUTE_16) + dispatch_16 = GL_TRUE; + newmask = ~newmask & WRITEMASK_XYZW; brw_push_insn_state(p); @@ -1343,7 +1349,13 @@ void brw_SAMPLE(struct brw_compile *p, src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); dest = offset(dest, dst_offset); - response_length = len * 2; + + /* For 16-wide dispatch, masked channels are skipped in the + * response. For 8-wide, masked channels still take up slots, + * and are just not written to. + */ + if (dispatch_16) + response_length = len * 2; } } diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index c78f7b38ae..1fd957b3ad 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -95,9 +95,17 @@ static void brwDeleteProgram( GLcontext *ctx, struct gl_program *prog ) { if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; - struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); - dri_bo_unreference(brw_fprog->const_buffer); + struct gl_fragment_program *fp = (struct gl_fragment_program *) prog; + struct brw_fragment_program *brw_fp = brw_fragment_program(fp); + + dri_bo_unreference(brw_fp->const_buffer); + } + + if (prog->Target == GL_VERTEX_PROGRAM_ARB) { + struct gl_vertex_program *vp = (struct gl_vertex_program *) prog; + struct brw_vertex_program *brw_vp = brw_vertex_program(vp); + + dri_bo_unreference(brw_vp->const_buffer); } _mesa_delete_program( ctx, prog ); diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 8e6839b812..57d1c29ade 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -46,7 +46,6 @@ static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { - GLcontext *ctx = &brw->intel.ctx; struct brw_sf_compile c; const GLuint *program; GLuint program_size; @@ -69,20 +68,14 @@ static void compile_sf_prog( struct brw_context *brw, /* Construct map from attribute number to position in the vertex. */ - for (i = idx = 0; i < VERT_RESULT_MAX; i++) + for (i = idx = 0; i < VERT_RESULT_MAX; i++) { if (c.key.attrs & BITFIELD64_BIT(i)) { c.attr_to_idx[i] = idx; c.idx_to_attr[idx] = i; - if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { - c.point_attrs[i].CoordReplace = - ctx->Point.CoordReplace[i - VERT_RESULT_TEX0]; - } - else { - c.point_attrs[i].CoordReplace = GL_FALSE; - } idx++; } - + } + /* Which primitive? Or all three? */ switch (key->primitive) { @@ -162,6 +155,14 @@ static void upload_sf_prog(struct brw_context *brw) } key.do_point_sprite = ctx->Point.PointSprite; + if (key.do_point_sprite) { + int i; + + for (i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + key.point_sprite_coord_replace |= (1 << i); + } + } key.sprite_origin_lower_left = (ctx->Point.SpriteOrigin == GL_LOWER_LEFT); /* _NEW_LIGHT */ key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index 0ba731fac9..a0680a56f2 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -46,6 +46,7 @@ struct brw_sf_prog_key { GLbitfield64 attrs; + uint8_t point_sprite_coord_replace; GLuint primitive:2; GLuint do_twoside_color:1; GLuint do_flat_shading:1; @@ -56,10 +57,6 @@ struct brw_sf_prog_key { GLuint pad:24; }; -struct brw_sf_point_tex { - GLboolean CoordReplace; -}; - struct brw_sf_compile { struct brw_compile func; struct brw_sf_prog_key key; @@ -100,7 +97,6 @@ struct brw_sf_compile { GLubyte attr_to_idx[VERT_RESULT_MAX]; GLubyte idx_to_attr[VERT_RESULT_MAX]; - struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; }; diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index bb08055e3b..56f7c986e7 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -354,6 +354,33 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, return is_last_attr; } +/* Calculates the predicate control for which channels of a reg + * (containing 2 attrs) to do point sprite coordinate replacement on. + */ +static uint16_t +calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg) +{ + int attr1, attr2; + uint16_t pc = 0; + + attr1 = c->idx_to_attr[reg * 2]; + if (attr1 >= VERT_RESULT_TEX0 && attr1 <= VERT_RESULT_TEX7) { + if (c->key.point_sprite_coord_replace & (1 << (attr1 - VERT_RESULT_TEX0))) + pc |= 0x0f; + } + + if (reg * 2 + 1 < c->nr_setup_attrs) { + attr2 = c->idx_to_attr[reg * 2 + 1]; + if (attr2 >= VERT_RESULT_TEX0 && attr2 <= VERT_RESULT_TEX7) { + if (c->key.point_sprite_coord_replace & (1 << (attr2 - + VERT_RESULT_TEX0))) + pc |= 0xf0; + } + } + + return pc; +} + void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) @@ -529,22 +556,27 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) copy_z_inv_w(c); for (i = 0; i < c->nr_setup_regs; i++) { - struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]]; struct brw_reg a0 = offset(c->vert[0], i); - GLushort pc, pc_persp, pc_linear; + GLushort pc, pc_persp, pc_linear, pc_coord_replace; GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - - if (pc_persp) - { - if (!tex->CoordReplace) { - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - } + + pc_coord_replace = calculate_point_sprite_mask(c, i); + pc_persp &= ~pc_coord_replace; + + if (pc_persp) { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); } - if (tex->CoordReplace) { - /* Caculate 1.0/PointWidth */ - brw_math(&c->func, + /* Point sprite coordinate replacement: A texcoord with this + * enabled gets replaced with the value (x, y, 0, 1) where x and + * y vary from 0 to 1 across the horizontal and vertical of the + * point. + */ + if (pc_coord_replace) { + brw_set_predicate_control_flag_value(p, pc_coord_replace); + /* Caculate 1.0/PointWidth */ + brw_math(&c->func, c->tmp, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, @@ -553,50 +585,51 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); - if (c->key.sprite_origin_lower_left) { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } else { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } - } else { - brw_MOV(p, c->m1Cx, brw_imm_ud(0)); - brw_MOV(p, c->m2Cy, brw_imm_ud(0)); - } + brw_set_access_mode(p, BRW_ALIGN_16); - { - brw_set_predicate_control_flag_value(p, pc); - if (tex->CoordReplace) { - if (c->key.sprite_origin_lower_left) { - brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); - brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); - } - else - brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + /* dA/dx, dA/dy */ + brw_MOV(p, c->m1Cx, brw_imm_f(0.0)); + brw_MOV(p, c->m2Cy, brw_imm_f(0.0)); + brw_MOV(p, brw_writemask(c->m1Cx, WRITEMASK_X), c->tmp); + if (c->key.sprite_origin_lower_left) { + brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), negate(c->tmp)); } else { - brw_MOV(p, c->m3C0, a0); /* constant value */ + brw_MOV(p, brw_writemask(c->m2Cy, WRITEMASK_Y), c->tmp); } - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); + /* attribute constant offset */ + brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + if (c->key.sprite_origin_lower_left) { + brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_YW), brw_imm_f(1.0)); + } else { + brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0)); + } + + brw_set_access_mode(p, BRW_ALIGN_1); } + + if (pc & ~pc_coord_replace) { + brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace); + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); + brw_MOV(p, c->m3C0, a0); /* constant value */ + } + + + brw_set_predicate_control_flag_value(p, pc); + /* Copy m0..m3 to URB. */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); } } diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index a7c4b58972..a48804a660 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -1717,11 +1717,13 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* patch all the BREAK/CONT instructions from last BEGINLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 88d84ee82f..47b764d24d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -328,6 +328,12 @@ void emit_cinterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, const struct brw_reg *arg0); +void emit_cmp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2); void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 9315bca315..412e09b76a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -34,6 +34,23 @@ #include "brw_context.h" #include "brw_wm.h" +static GLboolean can_do_pln(struct intel_context *intel, + const struct brw_reg *deltas) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + if (!brw->has_pln) + return GL_FALSE; + + if (deltas[1].nr != deltas[0].nr + 1) + return GL_FALSE; + + if (intel->gen < 6 && ((deltas[0].nr & 1) != 0)) + return GL_FALSE; + + return GL_TRUE; +} + /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. */ @@ -186,6 +203,7 @@ void emit_pixel_w(struct brw_wm_compile *c, const struct brw_reg *deltas) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; /* Don't need this if all you are doing is interpolating color, for * instance. @@ -196,8 +214,12 @@ void emit_pixel_w(struct brw_wm_compile *c, /* Calc 1/w - just linterp wpos[3] optimized by putting the * result straight into a message reg. */ - brw_LINE(p, brw_null_reg(), interp3, deltas[0]); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, brw_message_reg(2), interp3, deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp3, deltas[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + } /* Calc w */ if (c->dispatch_width == 16) { @@ -224,6 +246,7 @@ void emit_linterp(struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *deltas) { + struct intel_context *intel = &p->brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; @@ -235,8 +258,12 @@ void emit_linterp(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); - brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, dst[i], interp[i], deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); + brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } } } } @@ -249,6 +276,7 @@ void emit_pinterp(struct brw_compile *p, const struct brw_reg *deltas, const struct brw_reg *w) { + struct intel_context *intel = &p->brw->intel; struct brw_reg interp[4]; GLuint nr = arg0[0].nr; GLuint i; @@ -260,8 +288,12 @@ void emit_pinterp(struct brw_compile *p, for (i = 0; i < 4; i++) { if (mask & (1<<i)) { - brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); - brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + if (can_do_pln(intel, deltas)) { + brw_PLN(p, dst[i], interp[i], deltas[0]); + } else { + brw_LINE(p, brw_null_reg(), interp[i], deltas[0]); + brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]); + } } } for (i = 0; i < 4; i++) { @@ -566,12 +598,12 @@ static void emit_sne( struct brw_compile *p, emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1); } -static void emit_cmp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2 ) +void emit_cmp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { GLuint i; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 562608e2ec..0b66cc6c9f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -289,6 +289,7 @@ reclaim_temps(struct brw_wm_compile *c) */ static void prealloc_reg(struct brw_wm_compile *c) { + struct intel_context *intel = &c->func.brw->intel; int i, j; struct brw_reg reg; int urb_read_length = 0; @@ -413,6 +414,43 @@ static void prealloc_reg(struct brw_wm_compile *c) } } + for (i = 0; i < c->nr_fp_insns; i++) { + const struct prog_instruction *inst = &c->prog_instructions[i]; + + switch (inst->Opcode) { + case WM_DELTAXY: + /* Allocate WM_DELTAXY destination on G45/GM45 to an + * even-numbered GRF if possible so that we can use the PLN + * instruction. + */ + if (inst->DstReg.WriteMask == WRITEMASK_XY && + !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][0].inited && + !c->wm_regs[inst->DstReg.File][inst->DstReg.Index][1].inited && + (IS_G4X(intel->intelScreen->deviceID) || intel->gen == 5)) { + int grf; + + for (grf = c->first_free_grf & ~1; + grf < BRW_WM_MAX_GRF; + grf += 2) + { + if (!c->used_grf[grf] && !c->used_grf[grf + 1]) { + c->used_grf[grf] = GL_TRUE; + c->used_grf[grf + 1] = GL_TRUE; + c->first_free_grf = grf + 2; /* a guess */ + + set_reg(c, inst->DstReg.File, inst->DstReg.Index, 0, + brw_vec8_grf(grf, 0)); + set_reg(c, inst->DstReg.File, inst->DstReg.Index, 1, + brw_vec8_grf(grf + 1, 0)); + break; + } + } + } + default: + break; + } + } + /* An instruction may reference up to three constants. * They'll be found in these registers. * XXX alloc these on demand! @@ -614,112 +652,6 @@ static void invoke_subroutine( struct brw_wm_compile *c, } } -/* Workaround for using brw_wm_emit.c's emit functions, which expect - * destination regs to be uniquely written. Moves arguments out to - * temporaries as necessary for instructions which use their destination as - * a temporary. - */ -static void -unalias3(struct brw_wm_compile *c, - void (*func)(struct brw_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4]; - int i, j; - int mark = mark_tmps(c); - - for (j = 0; j < 4; j++) { - tmp_arg0[j] = arg0[j]; - tmp_arg1[j] = arg1[j]; - tmp_arg2[j] = arg2[j]; - } - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - for (j = 0; j < 4; j++) { - if (arg0[j].file == dst[i].file && - dst[i].nr == arg0[j].nr) { - tmp_arg0[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg0[j], arg0[j]); - } - if (arg1[j].file == dst[i].file && - dst[i].nr == arg1[j].nr) { - tmp_arg1[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg1[j], arg1[j]); - } - if (arg2[j].file == dst[i].file && - dst[i].nr == arg2[j].nr) { - tmp_arg2[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg2[j], arg2[j]); - } - } - } - } - - func(p, dst, mask, tmp_arg0, tmp_arg1, tmp_arg2); - - release_tmps(c, mark); -} - -/* Workaround for using brw_wm_emit.c's emit functions, which expect - * destination regs to be uniquely written. Moves arguments out to - * temporaries as necessary for instructions which use their destination as - * a temporary. - */ -static void -unalias2(struct brw_wm_compile *c, - void (*func)(struct brw_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) -{ - struct brw_compile *p = &c->func; - struct brw_reg tmp_arg0[4], tmp_arg1[4]; - int i, j; - int mark = mark_tmps(c); - - for (j = 0; j < 4; j++) { - tmp_arg0[j] = arg0[j]; - tmp_arg1[j] = arg1[j]; - } - - for (i = 0; i < 4; i++) { - if (mask & (1<<i)) { - for (j = 0; j < 4; j++) { - if (arg0[j].file == dst[i].file && - dst[i].nr == arg0[j].nr) { - tmp_arg0[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg0[j], arg0[j]); - } - if (arg1[j].file == dst[i].file && - dst[i].nr == arg1[j].nr) { - tmp_arg1[j] = alloc_tmp(c); - brw_MOV(p, tmp_arg1[j], arg1[j]); - } - } - } - } - - func(p, dst, mask, tmp_arg0, tmp_arg1); - - release_tmps(c, mark); -} - static void emit_arl(struct brw_wm_compile *c, const struct prog_instruction *inst) { @@ -1813,14 +1745,29 @@ static void get_argument_regs(struct brw_wm_compile *c, const struct prog_instruction *inst, int index, + struct brw_reg *dst, struct brw_reg *regs, int mask) { - int i; + struct brw_compile *p = &c->func; + int i, j; for (i = 0; i < 4; i++) { - if (mask & (1 << i)) + if (mask & (1 << i)) { regs[i] = get_src_reg(c, inst, index, i); + + /* Unalias destination registers from our sources. */ + if (regs[i].file == BRW_GENERAL_REGISTER_FILE) { + for (j = 0; j < 4; j++) { + if (memcmp(®s[i], &dst[j], sizeof(regs[0])) == 0) { + struct brw_reg tmp = alloc_tmp(c); + brw_MOV(p, tmp, regs[i]); + regs[i] = tmp; + break; + } + } + } + } } } @@ -1845,6 +1792,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) int dst_flags; struct brw_reg args[3][4], dst[4]; int j; + int mark = mark_tmps( c ); c->cur_inst = i; @@ -1866,7 +1814,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } } for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++) - get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW); + get_argument_regs(c, inst, j, dst, args[j], WRITEMASK_XYZW); dst_flags = inst->DstReg.WriteMask; if (inst->SaturateMode == SATURATE_ZERO_ONE) @@ -1920,8 +1868,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; case OPCODE_LRP: - unalias3(c, emit_lrp, - dst, dst_flags, args[0], args[1], args[2]); + emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_TRUNC: emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); @@ -1960,11 +1907,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_LG2: emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; + case OPCODE_CMP: + emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); + break; case OPCODE_MIN: - unalias2(c, emit_min, dst, dst_flags, args[0], args[1]); + emit_min(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_MAX: - unalias2(c, emit_max, dst, dst_flags, args[0], args[1]); + emit_max(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DDX: case OPCODE_DDY: @@ -2103,11 +2053,13 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) /* patch all the BREAK/CONT instructions from last BGNLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { + if (inst0->header.opcode == BRW_OPCODE_BREAK && + inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE && + inst0->bits3.if_else.jump_count == 0) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; } @@ -2115,10 +2067,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } break; default: - printf("unsupported IR in fragment shader %d\n", - inst->Opcode); + printf("unsupported opcode %d (%s) in fragment shader\n", + inst->Opcode, inst->Opcode < MAX_OPCODE ? + _mesa_opcode_string(inst->Opcode) : "unknown"); } + /* Release temporaries containing any unaliased source regs. */ + release_tmps( c, mark ); + if (inst->CondUpdate) brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); else diff --git a/src/mesa/drivers/dri/i965/server/intel_dri.c b/src/mesa/drivers/dri/i965/server/intel_dri.c deleted file mode 120000 index effdd26448..0000000000 --- a/src/mesa/drivers/dri/i965/server/intel_dri.c +++ /dev/null @@ -1 +0,0 @@ -../../intel/server/intel_dri.c
\ No newline at end of file |