diff options
author | Alex Deucher <alexdeucher@gmail.com> | 2009-07-15 14:17:07 -0400 |
---|---|---|
committer | Alex Deucher <alexdeucher@gmail.com> | 2009-07-15 14:17:07 -0400 |
commit | c5c19919ce627b98d8aab4284da1694573bcccd4 (patch) | |
tree | 2e29b313b79b6a392e020fd5723e3cc00c800fd2 /src/mesa/drivers/dri/i965 | |
parent | a0d4a12614fce072fa1eb5516e626909171c95e1 (diff) | |
parent | 3a3b83e5112b725e22f05b32a273a2351b820944 (diff) |
Merge branch 'master' of git+ssh://agd5f@git.freedesktop.org/git/mesa/mesa into r6xx-rewrite
This builds, but I get an assertion in radeonGetLock() due to
the drawable being null.
Diffstat (limited to 'src/mesa/drivers/dri/i965')
43 files changed, 1116 insertions, 261 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 9712c38725..00a42111da 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -23,6 +23,7 @@ DRIVER_SOURCES = \ intel_pixel_bitmap.c \ intel_pixel_copy.c \ intel_pixel_draw.c \ + intel_pixel_read.c \ intel_state.c \ intel_swapbuffers.c \ intel_tex.c \ diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 5cffcebde4..54d30a3f42 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -65,21 +65,31 @@ static void compile_clip_prog( struct brw_context *brw, c.func.single_program_flow = 1; c.key = *key; - + c.need_ff_sync = BRW_IS_IGDNG(brw); /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.header_position_offset = ATTR_SIZE; - for (i = 0, delta = REG_SIZE; i < VERT_RESULT_MAX; i++) + if (BRW_IS_IGDNG(brw)) + delta = 3 * REG_SIZE; + else + delta = REG_SIZE; + + for (i = 0; i < VERT_RESULT_MAX; i++) if (c.key.attrs & (1<<i)) { c.offset[i] = delta; delta += ATTR_SIZE; } c.nr_attrs = brw_count_bits(c.key.attrs); - c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + + if (BRW_IS_IGDNG(brw)) + c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ + else + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ @@ -148,7 +158,11 @@ static void upload_clip_prog(struct brw_context *brw) key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); /* _NEW_TRANSFORM */ key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); - key.clip_mode = BRW_CLIPMODE_NORMAL; + + if (BRW_IS_IGDNG(brw)) + key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; + else + key.clip_mode = BRW_CLIPMODE_NORMAL; /* _NEW_POLYGON */ if (key.primitive == GL_TRIANGLES) { diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h index e06747864b..12e8548df1 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.h +++ b/src/mesa/drivers/dri/i965/brw_clip.h @@ -117,6 +117,7 @@ struct brw_clip_compile { GLuint header_position_offset; GLuint offset[VERT_ATTRIB_MAX]; + GLboolean need_ff_sync; }; #define ATTR_SIZE (4*4) @@ -171,5 +172,5 @@ struct brw_reg get_tmp( struct brw_clip_compile *c ); void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ); - +void brw_clip_ff_sync(struct brw_clip_compile *c); #endif diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index d830e49e50..9abd0642aa 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -130,7 +130,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) struct brw_instruction *plane_loop; struct brw_instruction *plane_active; struct brw_instruction *is_negative; - struct brw_instruction *is_neg2; + struct brw_instruction *is_neg2 = NULL; struct brw_instruction *not_culled; struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); @@ -148,7 +148,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_clipmask(c); /* -ve rhw workaround */ - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); @@ -185,7 +185,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) * Both can be negative on GM965/G965 due to RHW workaround * if so, this object should be rejected. */ - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); { @@ -210,7 +210,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* If both are positive, do nothing */ /* Only on GM965/G965 */ - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); } @@ -225,7 +225,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_ENDIF(p, is_neg2); } } @@ -246,6 +246,8 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); + if (c->need_ff_sync) + brw_clip_ff_sync(c); not_culled = brw_IF(p, BRW_EXECUTE_1); { brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE); diff --git a/src/mesa/drivers/dri/i965/brw_clip_point.c b/src/mesa/drivers/dri/i965/brw_clip_point.c index d17b199b89..9738299168 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_point.c +++ b/src/mesa/drivers/dri/i965/brw_clip_point.c @@ -50,5 +50,7 @@ void brw_emit_point_clip( struct brw_clip_compile *c ) /* Send an empty message to kill the thread: */ brw_clip_tri_alloc_regs(c, 0); + if (c->need_ff_sync) + brw_clip_ff_sync(c); brw_clip_kill_thread(c); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 9b0d7eab7b..5762c9577c 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -95,7 +95,14 @@ clip_unit_create_from_key(struct brw_context *brw, * even number. */ assert(key->nr_urb_entries % 2 == 0); - clip.thread4.max_threads = 2 - 1; + + /* Although up to 16 concurrent Clip threads are allowed on IGDNG, + * only 2 threads can output VUEs at a time. + */ + if (BRW_IS_IGDNG(brw)) + clip.thread4.max_threads = 16 - 1; + else + clip.thread4.max_threads = 2 - 1; } else { assert(key->nr_urb_entries >= 5); clip.thread4.max_threads = 1 - 1; diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index 7fd37bd05f..4c2d655fb1 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -77,6 +77,10 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, if (c->nr_attrs & 1) { for (j = 0; j < 3; j++) { GLuint delta = c->nr_attrs*16 + 32; + + if (BRW_IS_IGDNG(c->func.brw)) + delta = c->nr_attrs * 16 + 32 * 3; + brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); } } @@ -562,7 +566,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) /* if -ve rhw workaround bit is set, do cliptest */ - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); @@ -579,11 +583,14 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) if (c->key.do_flat_shading) brw_clip_tri_flat_shade(c); - if (c->key.clip_mode == BRW_CLIPMODE_NORMAL) + if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) || + (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP)) do_clip_tri(c); else maybe_do_clip_tri(c); - + + if (c->need_ff_sync) + brw_clip_ff_sync(c); brw_clip_tri_emit_polygon(c); /* Send an empty message to kill the thread: diff --git a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c index d7ca517927..26950383c1 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_unfilled.c +++ b/src/mesa/drivers/dri/i965/brw_clip_unfilled.c @@ -496,6 +496,8 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c ) } brw_ENDIF(p, do_clip); + if (c->need_ff_sync) + brw_clip_ff_sync(c); emit_unfilled_primitives(c); brw_clip_kill_thread(c); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c index 9d3b0be694..e09efc07ed 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_util.c +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -140,6 +140,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, /* Just copy the vertex header: */ + /* + * After CLIP stage, only first 256 bits of the VUE are read + * back on IGDNG, so needn't change it + */ brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); /* Iterate over each attribute (could be done in pairs?) @@ -147,6 +151,9 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, for (i = 0; i < c->nr_attrs; i++) { GLuint delta = i*16 + 32; + if (BRW_IS_IGDNG(p->brw)) + delta = i * 16 + 32 * 3; + if (delta == c->offset[VERT_RESULT_EDGE]) { if (force_edgeflag) brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); @@ -177,6 +184,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, if (i & 1) { GLuint delta = i*16 + 32; + + if (BRW_IS_IGDNG(p->brw)) + delta = i * 16 + 32 * 3; + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); } @@ -343,3 +354,19 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) } } +void brw_clip_ff_sync(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, + 1, /* used */ + 1, /* msg length */ + 1, /* response length */ + 0, /* eot */ + 1, /* write compelete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 873fc8ffff..57ddf75413 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -129,7 +129,6 @@ struct brw_context; #define BRW_NEW_PRIMITIVE 0x40 #define BRW_NEW_CONTEXT 0x80 #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 -#define BRW_NEW_INPUT_VARYING 0x200 #define BRW_NEW_PSP 0x800 #define BRW_NEW_WM_SURFACES 0x1000 #define BRW_NEW_FENCE 0x2000 @@ -246,9 +245,6 @@ struct brw_vs_ouput_sizes { }; -/** Number of general purpose registers (VS, WM, etc) */ -#define BRW_MAX_GRF 128 - /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 @@ -405,7 +401,6 @@ struct brw_vertex_element { struct brw_vertex_info { - GLuint varying; /* varying:1[VERT_ATTRIB_MAX] */ GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */ }; @@ -614,9 +609,10 @@ struct brw_context struct brw_wm_prog_data *prog_data; struct brw_wm_compile *compile_data; - /* Input sizes, calculated from active vertex program: + /** Input sizes, calculated from active vertex program. + * One bit per fragment program input attribute. */ - GLuint input_size_masks[4]; + GLbitfield input_size_masks[4]; /** Array of surface default colors (texture border color) */ dri_bo *sdc_bo[BRW_MAX_TEX_UNIT]; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 98fc909c2a..d166250b4f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -139,6 +139,7 @@ #define BRW_CLIPMODE_CLIP_NON_REJECTED 2 #define BRW_CLIPMODE_REJECT_ALL 3 #define BRW_CLIPMODE_ACCEPT_ALL 4 +#define BRW_CLIPMODE_KERNEL_CLIP 5 #define BRW_CLIP_NDCSPACE 0 #define BRW_CLIP_SCREENSPACE 1 @@ -670,6 +671,25 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 #define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3 + +/* for IGDNG only */ +#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 +#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 +#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 +#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 + #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 #define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 @@ -819,8 +839,11 @@ #include "intel_chipset.h" #define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID)) -#define CMD_PIPELINE_SELECT(brw) (BRW_IS_G4X(brw) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) -#define CMD_VF_STATISTICS(brw) (BRW_IS_G4X(brw) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) -#define URB_SIZES(brw) (BRW_IS_G4X(brw) ? 384 : 256) /* 512 bit units */ +#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID)) +#define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) +#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) +#define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) +#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \ + (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ #endif diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 5342622a73..5152c3f3a5 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -187,19 +187,13 @@ static void brw_merge_inputs( struct brw_context *brw, brw->vb.inputs[i].glarray = arrays[i]; if (arrays[i]->StrideB != 0) - brw->vb.info.varying |= 1 << i; - brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) << ((i%16) * 2); } - /* Raise statechanges if input sizes and varying have changed: - */ + /* Raise statechanges if input sizes have changed. */ if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0) brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; - - if (brw->vb.info.varying != old.varying) - brw->state.dirty.brw |= BRW_NEW_INPUT_VARYING; } /* XXX: could split the primitive list to fallback only on the @@ -416,6 +410,8 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, out: UNLOCK_HARDWARE(intel); + brw_state_cache_check_size(brw); + if (warn) fprintf(stderr, "i965: Single primitive emit potentially exceeded " "available aperture space\n"); diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 1b8bcc14ec..c29f1dd5c0 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -512,7 +512,19 @@ static void brw_emit_vertices(struct brw_context *brw) OUT_RELOC(input->bo, I915_GEM_DOMAIN_VERTEX, 0, input->offset); - OUT_BATCH(brw->vb.max_index); + if (BRW_IS_IGDNG(brw)) { + if (input->stride) { + OUT_RELOC(input->bo, + I915_GEM_DOMAIN_VERTEX, 0, + input->offset + input->stride * input->count); + } else { + assert(input->count == 1); + OUT_RELOC(input->bo, + I915_GEM_DOMAIN_VERTEX, 0, + input->offset + input->element_size); + } + } else + OUT_BATCH(brw->vb.max_index); OUT_BATCH(0); /* Instance data step rate */ } ADVANCE_BATCH(); @@ -542,11 +554,18 @@ static void brw_emit_vertices(struct brw_context *brw) BRW_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | (0 << BRW_VE0_SRC_OFFSET_SHIFT)); - OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | - (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | - (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | - (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | - ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); + + if (BRW_IS_IGDNG(brw)) + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); + else + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | + ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } ADVANCE_BATCH(); } @@ -635,7 +654,7 @@ static void brw_emit_indices(struct brw_context *brw) if (index_buffer == NULL) return; - ib_size = get_size(index_buffer->type) * index_buffer->count; + ib_size = get_size(index_buffer->type) * index_buffer->count - 1; /* Emit the indexbuffer packet: */ diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index bc7756ceab..30603bdd0e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -171,9 +171,9 @@ static INLINE struct brw_reg brw_reg( GLuint file, { struct brw_reg reg; if (type == BRW_GENERAL_REGISTER_FILE) - assert(nr < 128); + assert(nr < BRW_MAX_GRF); else if (type == BRW_MESSAGE_REGISTER_FILE) - assert(nr < 9); + assert(nr < BRW_MAX_MRF); else if (type == BRW_ARCHITECTURE_REGISTER_FILE) assert(nr <= BRW_ARF_IP); @@ -538,6 +538,7 @@ static INLINE struct brw_reg brw_mask_reg( GLuint subnr ) static INLINE struct brw_reg brw_message_reg( GLuint nr ) { + assert(nr < BRW_MAX_MRF); return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); @@ -815,6 +816,19 @@ void brw_urb_WRITE(struct brw_compile *p, GLuint offset, GLuint swizzle); +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle); + void brw_fb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -834,7 +848,9 @@ void brw_SAMPLE(struct brw_compile *p, GLuint msg_type, GLuint response_length, GLuint msg_length, - GLboolean eot); + GLboolean eot, + GLuint header_present, + GLuint simd_mode); void brw_math_16( struct brw_compile *p, struct brw_reg dest, @@ -855,12 +871,10 @@ void brw_math( struct brw_compile *p, void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLuint scratch_offset ); void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLboolean relAddr, GLuint location, GLuint bind_table_index ); @@ -875,7 +889,6 @@ void brw_dp_READ_4_vs( struct brw_compile *p, void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, - GLuint msg_reg_nr, GLuint scratch_offset ); /* If/else/endif. Works by manipulating the execution flags on each diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 60ea44f7a9..2412014248 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -241,7 +241,8 @@ void brw_set_src1( struct brw_instruction *insn, -static void brw_set_math_message( struct brw_instruction *insn, +static void brw_set_math_message( struct brw_context *brw, + struct brw_instruction *insn, GLuint msg_length, GLuint response_length, GLuint function, @@ -252,18 +253,35 @@ static void brw_set_math_message( struct brw_instruction *insn, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.math.function = function; - insn->bits3.math.int_type = integer_type; - insn->bits3.math.precision = low_precision; - insn->bits3.math.saturate = saturate; - insn->bits3.math.data_type = dataType; - insn->bits3.math.response_length = response_length; - insn->bits3.math.msg_length = msg_length; - insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; - insn->bits3.math.end_of_thread = 0; + if (BRW_IS_IGDNG(brw)) { + insn->bits3.math_igdng.function = function; + insn->bits3.math_igdng.int_type = integer_type; + insn->bits3.math_igdng.precision = low_precision; + insn->bits3.math_igdng.saturate = saturate; + insn->bits3.math_igdng.data_type = dataType; + insn->bits3.math_igdng.snapshot = 0; + insn->bits3.math_igdng.header_present = 0; + insn->bits3.math_igdng.response_length = response_length; + insn->bits3.math_igdng.msg_length = msg_length; + insn->bits3.math_igdng.end_of_thread = 0; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH; + insn->bits2.send_igdng.end_of_thread = 0; + } else { + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + insn->bits3.math.response_length = response_length; + insn->bits3.math.msg_length = msg_length; + insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; + insn->bits3.math.end_of_thread = 0; + } } -static void brw_set_urb_message( struct brw_instruction *insn, + +static void brw_set_ff_sync_message( struct brw_context *brw, + struct brw_instruction *insn, GLboolean allocate, GLboolean used, GLuint msg_length, @@ -273,21 +291,64 @@ static void brw_set_urb_message( struct brw_instruction *insn, GLuint offset, GLuint swizzle_control ) { - brw_set_src1(insn, brw_imm_d(0)); + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.urb_igdng.opcode = 1; + insn->bits3.urb_igdng.offset = offset; + insn->bits3.urb_igdng.swizzle_control = swizzle_control; + insn->bits3.urb_igdng.allocate = allocate; + insn->bits3.urb_igdng.used = used; + insn->bits3.urb_igdng.complete = complete; + insn->bits3.urb_igdng.header_present = 1; + insn->bits3.urb_igdng.response_length = response_length; + insn->bits3.urb_igdng.msg_length = msg_length; + insn->bits3.urb_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; +} - insn->bits3.urb.opcode = 0; /* ? */ - insn->bits3.urb.offset = offset; - insn->bits3.urb.swizzle_control = swizzle_control; - insn->bits3.urb.allocate = allocate; - insn->bits3.urb.used = used; /* ? */ - insn->bits3.urb.complete = complete; - insn->bits3.urb.response_length = response_length; - insn->bits3.urb.msg_length = msg_length; - insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; - insn->bits3.urb.end_of_thread = end_of_thread; +static void brw_set_urb_message( struct brw_context *brw, + struct brw_instruction *insn, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean end_of_thread, + GLboolean complete, + GLuint offset, + GLuint swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.urb_igdng.opcode = 0; /* ? */ + insn->bits3.urb_igdng.offset = offset; + insn->bits3.urb_igdng.swizzle_control = swizzle_control; + insn->bits3.urb_igdng.allocate = allocate; + insn->bits3.urb_igdng.used = used; /* ? */ + insn->bits3.urb_igdng.complete = complete; + insn->bits3.urb_igdng.header_present = 1; + insn->bits3.urb_igdng.response_length = response_length; + insn->bits3.urb_igdng.msg_length = msg_length; + insn->bits3.urb_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + insn->bits3.urb.response_length = response_length; + insn->bits3.urb.msg_length = msg_length; + insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; + insn->bits3.urb.end_of_thread = end_of_thread; + } } -static void brw_set_dp_write_message( struct brw_instruction *insn, +static void brw_set_dp_write_message( struct brw_context *brw, + struct brw_instruction *insn, GLuint binding_table_index, GLuint msg_control, GLuint msg_type, @@ -298,18 +359,33 @@ static void brw_set_dp_write_message( struct brw_instruction *insn, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.dp_write.binding_table_index = binding_table_index; - insn->bits3.dp_write.msg_control = msg_control; - insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; - insn->bits3.dp_write.msg_type = msg_type; - insn->bits3.dp_write.send_commit_msg = 0; - insn->bits3.dp_write.response_length = response_length; - insn->bits3.dp_write.msg_length = msg_length; - insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; - insn->bits3.urb.end_of_thread = end_of_thread; + if (BRW_IS_IGDNG(brw)) { + insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; + insn->bits3.dp_write_igdng.msg_control = msg_control; + insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write_igdng.msg_type = msg_type; + insn->bits3.dp_write_igdng.send_commit_msg = 0; + insn->bits3.dp_write_igdng.header_present = 1; + insn->bits3.dp_write_igdng.response_length = response_length; + insn->bits3.dp_write_igdng.msg_length = msg_length; + insn->bits3.dp_write_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.response_length = response_length; + insn->bits3.dp_write.msg_length = msg_length; + insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits3.dp_write.end_of_thread = end_of_thread; + } } -static void brw_set_dp_read_message( struct brw_instruction *insn, +static void brw_set_dp_read_message( struct brw_context *brw, + struct brw_instruction *insn, GLuint binding_table_index, GLuint msg_control, GLuint msg_type, @@ -320,15 +396,29 @@ static void brw_set_dp_read_message( struct brw_instruction *insn, { brw_set_src1(insn, brw_imm_d(0)); - insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ - insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ - insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ - insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ - insn->bits3.dp_read.response_length = response_length; /*16:19*/ - insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ - insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ - insn->bits3.dp_read.pad1 = 0; /*28:30*/ - insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ + if (BRW_IS_IGDNG(brw)) { + insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; + insn->bits3.dp_read_igdng.msg_control = msg_control; + insn->bits3.dp_read_igdng.msg_type = msg_type; + insn->bits3.dp_read_igdng.target_cache = target_cache; + insn->bits3.dp_read_igdng.header_present = 1; + insn->bits3.dp_read_igdng.response_length = response_length; + insn->bits3.dp_read_igdng.msg_length = msg_length; + insn->bits3.dp_read_igdng.pad1 = 0; + insn->bits3.dp_read_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ + insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ + insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ + insn->bits3.dp_read.response_length = response_length; /*16:19*/ + insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ + insn->bits3.dp_read.pad1 = 0; /*28:30*/ + insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ + } } static void brw_set_sampler_message(struct brw_context *brw, @@ -338,11 +428,25 @@ static void brw_set_sampler_message(struct brw_context *brw, GLuint msg_type, GLuint response_length, GLuint msg_length, - GLboolean eot) + GLboolean eot, + GLuint header_present, + GLuint simd_mode) { + assert(eot == 0); brw_set_src1(insn, brw_imm_d(0)); - if (BRW_IS_G4X(brw)) { + if (BRW_IS_IGDNG(brw)) { + insn->bits3.sampler_igdng.binding_table_index = binding_table_index; + insn->bits3.sampler_igdng.sampler = sampler; + insn->bits3.sampler_igdng.msg_type = msg_type; + insn->bits3.sampler_igdng.simd_mode = simd_mode; + insn->bits3.sampler_igdng.header_present = header_present; + insn->bits3.sampler_igdng.response_length = response_length; + insn->bits3.sampler_igdng.msg_length = msg_length; + insn->bits3.sampler_igdng.end_of_thread = eot; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; + insn->bits2.send_igdng.end_of_thread = eot; + } else if (BRW_IS_G4X(brw)) { insn->bits3.sampler_g4x.binding_table_index = binding_table_index; insn->bits3.sampler_g4x.sampler = sampler; insn->bits3.sampler_g4x.msg_type = msg_type; @@ -484,6 +588,10 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, { struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + insn->header.execution_size = 1; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_DISABLE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; @@ -540,6 +648,10 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *if_insn) { struct brw_instruction *insn; + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; if (p->single_program_flow) { insn = next_insn(p, BRW_OPCODE_ADD); @@ -566,7 +678,7 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, } else { assert(if_insn->header.opcode == BRW_OPCODE_IF); - if_insn->bits3.if_else.jump_count = insn - if_insn; + if_insn->bits3.if_else.jump_count = br * (insn - if_insn); if_insn->bits3.if_else.pop_count = 1; if_insn->bits3.if_else.pad0 = 0; } @@ -577,6 +689,11 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, void brw_ENDIF(struct brw_compile *p, struct brw_instruction *patch_insn) { + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; + if (p->single_program_flow) { /* In single program flow mode, there's no need to execute an ENDIF, * since we don't need to do any stack operations, and if we're executing @@ -608,11 +725,11 @@ void brw_ENDIF(struct brw_compile *p, /* Automagically turn it into an IFF: */ patch_insn->header.opcode = BRW_OPCODE_IFF; - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); patch_insn->bits3.if_else.pop_count = 0; patch_insn->bits3.if_else.pad0 = 0; } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { - patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); patch_insn->bits3.if_else.pop_count = 1; patch_insn->bits3.if_else.pad0 = 0; } else { @@ -686,6 +803,10 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *do_insn) { struct brw_instruction *insn; + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; if (p->single_program_flow) insn = next_insn(p, BRW_OPCODE_ADD); @@ -706,7 +827,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, insn->header.execution_size = do_insn->header.execution_size; assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = do_insn - insn + 1; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); insn->bits3.if_else.pop_count = 0; insn->bits3.if_else.pad0 = 0; } @@ -725,11 +846,15 @@ void brw_land_fwd_jump(struct brw_compile *p, struct brw_instruction *jmp_insn) { struct brw_instruction *landing = &p->store[p->nr_insn]; + GLuint jmpi = 1; + + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); - jmp_insn->bits3.ud = (landing - jmp_insn) - 1; + jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); } @@ -794,7 +919,8 @@ void brw_math( struct brw_compile *p, brw_set_dest(insn, dest); brw_set_src0(insn, src); - brw_set_math_message(insn, + brw_set_math_message(p->brw, + insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, @@ -830,7 +956,8 @@ void brw_math_16( struct brw_compile *p, brw_set_dest(insn, dest); brw_set_src0(insn, src); - brw_set_math_message(insn, + brw_set_math_message(p->brw, + insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, @@ -846,7 +973,8 @@ void brw_math_16( struct brw_compile *p, brw_set_dest(insn, offset(dest,1)); brw_set_src0(insn, src); - brw_set_math_message(insn, + brw_set_math_message(p->brw, + insn, msg_length, response_length, function, BRW_MATH_INTEGER_UNSIGNED, @@ -865,9 +993,9 @@ void brw_math_16( struct brw_compile *p, */ void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, - GLuint msg_reg_nr, GLuint scratch_offset ) { + GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); @@ -877,7 +1005,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); - + brw_pop_insn_state(p); } @@ -893,7 +1021,8 @@ void brw_dp_WRITE_16( struct brw_compile *p, brw_set_dest(insn, dest); brw_set_src0(insn, src); - brw_set_dp_write_message(insn, + brw_set_dp_write_message(p->brw, + insn, 255, /* binding table index (255=stateless) */ BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ @@ -912,9 +1041,9 @@ void brw_dp_WRITE_16( struct brw_compile *p, */ void brw_dp_READ_16( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLuint scratch_offset ) { + GLuint msg_reg_nr = 1; { brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); @@ -924,7 +1053,7 @@ void brw_dp_READ_16( struct brw_compile *p, brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), brw_imm_d(scratch_offset)); - + brw_pop_insn_state(p); } @@ -938,7 +1067,8 @@ void brw_dp_READ_16( struct brw_compile *p, brw_set_dest(insn, dest); /* UW? */ brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); - brw_set_dp_read_message(insn, + brw_set_dp_read_message(p->brw, + insn, 255, /* binding table index (255=stateless) */ 3, /* msg_control (3 means 4 Owords) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ @@ -958,21 +1088,26 @@ void brw_dp_READ_16( struct brw_compile *p, */ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, - GLuint msg_reg_nr, GLboolean relAddr, GLuint location, GLuint bind_table_index ) { + /* XXX: relAddr not implemented */ + GLuint msg_reg_nr = 1; { + struct brw_reg b; brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); - /* set message header global offset field (reg 0, element 2) */ - /* Note that grf[0] will be copied to mrf[1] implicitly by the SEND instr */ - brw_MOV(p, - retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), - brw_imm_d(location)); + /* Setup MRF[1] with location/offset into const buffer */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + brw_MOV(p, b, brw_imm_ud(location)); brw_pop_insn_state(p); } @@ -988,9 +1123,10 @@ void brw_dp_READ_4( struct brw_compile *p, dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); brw_set_dest(insn, dest); - brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + brw_set_src0(insn, brw_null_reg()); - brw_set_dp_read_message(insn, + brw_set_dp_read_message(p->brw, + insn, bind_table_index, 0, /* msg_control (0 means 1 Oword) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ @@ -1061,7 +1197,8 @@ void brw_dp_READ_4_vs(struct brw_compile *p, brw_set_dest(insn, dest); brw_set_src0(insn, brw_null_reg()); - brw_set_dp_read_message(insn, + brw_set_dp_read_message(p->brw, + insn, bind_table_index, oword, /* 0 = lower Oword, 1 = upper Oword */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ @@ -1091,7 +1228,8 @@ void brw_fb_WRITE(struct brw_compile *p, brw_set_dest(insn, dest); brw_set_src0(insn, src0); - brw_set_dp_write_message(insn, + brw_set_dp_write_message(p->brw, + insn, binding_table_index, BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ @@ -1117,7 +1255,9 @@ void brw_SAMPLE(struct brw_compile *p, GLuint msg_type, GLuint response_length, GLuint msg_length, - GLboolean eot) + GLboolean eot, + GLuint header_present, + GLuint simd_mode) { GLboolean need_stall = 0; @@ -1192,7 +1332,9 @@ void brw_SAMPLE(struct brw_compile *p, msg_type, response_length, msg_length, - eot); + eot, + header_present, + simd_mode); } if (need_stall) { @@ -1227,7 +1369,7 @@ void brw_urb_WRITE(struct brw_compile *p, { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); - assert(msg_length < 16); + assert(msg_length < BRW_MAX_MRF); brw_set_dest(insn, dest); brw_set_src0(insn, src0); @@ -1235,7 +1377,8 @@ void brw_urb_WRITE(struct brw_compile *p, insn->header.destreg__conditonalmod = msg_reg_nr; - brw_set_urb_message(insn, + brw_set_urb_message(p->brw, + insn, allocate, used, msg_length, @@ -1246,3 +1389,37 @@ void brw_urb_WRITE(struct brw_compile *p, swizzle); } +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < 16); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_ff_sync_message(p->brw, + insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c index 299357409c..d27c6c24ca 100644 --- a/src/mesa/drivers/dri/i965/brw_fallback.c +++ b/src/mesa/drivers/dri/i965/brw_fallback.c @@ -37,6 +37,9 @@ #include "tnl/tnl.h" #include "brw_context.h" #include "brw_fallback.h" +#include "intel_chipset.h" +#include "intel_fbo.h" +#include "intel_regions.h" #include "glapi/glapi.h" @@ -44,6 +47,7 @@ static GLboolean do_check_fallback(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; GLcontext *ctx = &brw->intel.ctx; GLuint i; @@ -81,6 +85,33 @@ static GLboolean do_check_fallback(struct brw_context *brw) return GL_TRUE; } + /* _NEW_BUFFERS */ + if (IS_965(intel->intelScreen->deviceID) && + !IS_G4X(intel->intelScreen->deviceID)) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + /* The original gen4 hardware couldn't set up WM surfaces pointing + * at an offset within a tile, which can happen when rendering to + * anything but the base level of a texture or the +X face/0 depth. + * This was fixed with the 4 Series hardware. + * + * For these original chips, you would have to make the depth and + * color destination surfaces include information on the texture + * type, LOD, face, and various limits to use them as a destination. + * I would have done this, but there's also a nasty requirement that + * the depth and the color surfaces all be of the same LOD, which + * may be a worse requirement than this alignment. (Also, we may + * want to just demote the texture to untiled, instead). + */ + if (irb->region && irb->region->tiling != I915_TILING_NONE && + (irb->region->draw_offset & 4095)) { + DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n"); + return GL_TRUE; + } + } + } return GL_FALSE; } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index a8b74a0afe..48c2b9a41c 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -54,12 +54,17 @@ static void compile_gs_prog( struct brw_context *brw, memset(&c, 0, sizeof(c)); c.key = *key; - + c.need_ff_sync = BRW_IS_IGDNG(brw); /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.nr_attrs = brw_count_bits(c.key.attrs); - c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + + if (BRW_IS_IGDNG(brw)) + c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ + else + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index 18a4537c32..bbb991ea2e 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -62,6 +62,7 @@ struct brw_gs_compile { GLuint nr_attrs; GLuint nr_regs; GLuint nr_bytes; + GLboolean need_ff_sync; }; #define ATTR_SIZE (4*4) diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index 22e0d25c2e..980eac7646 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -101,6 +101,23 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, BRW_URB_SWIZZLE_NONE); } +void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) +{ + struct brw_compile *p = &c->func; + brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, + 1, /* used */ + 1, /* msg length */ + 1, /* response length */ + 0, /* eot */ + 1, /* write compelete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} void brw_gs_quads( struct brw_gs_compile *c ) @@ -110,6 +127,8 @@ void brw_gs_quads( struct brw_gs_compile *c ) /* Use polygons for correct edgeflag behaviour. Note that vertex 3 * is the PV for quads, but vertex 0 for polygons: */ + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); @@ -120,6 +139,8 @@ void brw_gs_quad_strip( struct brw_gs_compile *c ) { brw_gs_alloc_regs(c, 4); + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); @@ -129,6 +150,9 @@ void brw_gs_quad_strip( struct brw_gs_compile *c ) void brw_gs_tris( struct brw_gs_compile *c ) { brw_gs_alloc_regs(c, 3); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); @@ -137,6 +161,9 @@ void brw_gs_tris( struct brw_gs_compile *c ) void brw_gs_lines( struct brw_gs_compile *c ) { brw_gs_alloc_regs(c, 2); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); } @@ -144,6 +171,9 @@ void brw_gs_lines( struct brw_gs_compile *c ) void brw_gs_points( struct brw_gs_compile *c ) { brw_gs_alloc_regs(c, 1); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); } diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 27023cf034..a761c03153 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -95,6 +95,9 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) gs.thread4.max_threads = 0; /* Hardware requirement */ + if (BRW_IS_IGDNG(brw)) + gs.thread4.rendering_enable = 1; + if (INTEL_DEBUG & DEBUG_STATS) gs.thread4.stats_enable = 1; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 4784254bc7..85a7706404 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -211,7 +211,7 @@ static void emit_depthbuffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct intel_region *region = brw->state.depth_region; - unsigned int len = BRW_IS_G4X(brw) ? 6 : 5; + unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; if (region == NULL) { BEGIN_BATCH(len, IGNORE_CLIPRECTS); @@ -222,7 +222,7 @@ static void emit_depthbuffer(struct brw_context *brw) OUT_BATCH(0); OUT_BATCH(0); - if (BRW_IS_G4X(brw)) + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) OUT_BATCH(0); ADVANCE_BATCH(); @@ -244,6 +244,8 @@ static void emit_depthbuffer(struct brw_context *brw) return; } + assert(region->tiling != I915_TILING_X); + BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | @@ -259,7 +261,7 @@ static void emit_depthbuffer(struct brw_context *brw) ((region->height - 1) << 19)); OUT_BATCH(0); - if (BRW_IS_G4X(brw)) + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) OUT_BATCH(0); ADVANCE_BATCH(); @@ -372,7 +374,7 @@ static void upload_aa_line_parameters(struct brw_context *brw) { struct brw_aa_line_parameters balp; - if (!BRW_IS_G4X(brw)) + if (BRW_IS_965(brw)) return; /* use legacy aa line coverage computation */ @@ -509,14 +511,27 @@ static void upload_state_base_address( struct brw_context *brw ) /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. */ - BEGIN_BATCH(6, IGNORE_CLIPRECTS); - OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); - OUT_BATCH(1); /* General state base address */ - OUT_BATCH(1); /* Surface state base address */ - OUT_BATCH(1); /* Indirect object base address */ - OUT_BATCH(1); /* General state upper bound */ - OUT_BATCH(1); /* Indirect object upper bound */ - ADVANCE_BATCH(); + if (BRW_IS_IGDNG(brw)) { + BEGIN_BATCH(8, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* Instruction base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + OUT_BATCH(1); /* Instruction access upper bound */ + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(6, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); + } } const struct brw_tracked_state brw_state_base_address = { diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index c3c85978f4..e1c2c7777b 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -166,6 +166,9 @@ static void upload_sf_prog(struct brw_context *brw) key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + /* _NEW_HINT */ + key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + /* _NEW_POLYGON */ if (key.do_twoside_color) { /* If we're rendering to a FBO, we have to invert the polygon @@ -188,7 +191,7 @@ static void upload_sf_prog(struct brw_context *brw) const struct brw_tracked_state brw_sf_prog = { .dirty = { - .mesa = (_NEW_LIGHT|_NEW_POLYGON|_NEW_POINT), + .mesa = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index 1c0fb70fe0..6426b6df9f 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -51,7 +51,8 @@ struct brw_sf_prog_key { GLuint do_flat_shading:1; GLuint frontface_ccw:1; GLuint do_point_sprite:1; - GLuint pad:10; + GLuint linear_color:1; /**< linear interp vs. perspective interp */ + GLuint pad:25; GLenum SpriteOrigin; }; diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index 862835f157..ca8f97f9f9 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -151,6 +151,8 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint jmpi = 1; + if (!nr) return; @@ -159,18 +161,21 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + brw_push_insn_state(p); - brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr*2+1)); + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1))); brw_JMPI(p, ip, ip, c->pv); copy_colors(c, c->vert[1], c->vert[0]); copy_colors(c, c->vert[2], c->vert[0]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr*4+1)); + brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1))); copy_colors(c, c->vert[0], c->vert[1]); copy_colors(c, c->vert[2], c->vert[1]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr*2)); + brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2)); copy_colors(c, c->vert[0], c->vert[2]); copy_colors(c, c->vert[1], c->vert[2]); @@ -184,7 +189,8 @@ static void do_flatshade_line( struct brw_sf_compile *c ) struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); - + GLuint jmpi = 1; + if (!nr) return; @@ -193,13 +199,16 @@ static void do_flatshade_line( struct brw_sf_compile *c ) if (c->key.primitive == SF_UNFILLED_TRIS) return; + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + brw_push_insn_state(p); - brw_MUL(p, c->pv, c->pv, brw_imm_ud(nr+1)); + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); brw_JMPI(p, ip, ip, c->pv); copy_colors(c, c->vert[1], c->vert[0]); - brw_JMPI(p, ip, ip, brw_imm_ud(nr)); + brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr)); copy_colors(c, c->vert[0], c->vert[1]); brw_pop_insn_state(p); @@ -218,7 +227,7 @@ static void alloc_regs( struct brw_sf_compile *c ) /* Values computed by fixed function unit: */ - c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD); + c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D); c->det = brw_vec1_grf(1, 2); c->dx0 = brw_vec1_grf(1, 3); c->dx2 = brw_vec1_grf(1, 4); @@ -295,9 +304,6 @@ static void invert_det( struct brw_sf_compile *c) } -#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \ - FRAG_BIT_COL0 | \ - FRAG_BIT_COL1) static GLboolean calculate_masks( struct brw_sf_compile *c, GLuint reg, @@ -306,9 +312,16 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, GLushort *pc_linear) { GLboolean is_last_attr = (reg == c->nr_setup_regs - 1); - GLuint persp_mask = c->key.attrs & ~NON_PERPECTIVE_ATTRS; + GLuint persp_mask; GLuint linear_mask; + if (c->key.do_flat_shading || c->key.linear_color) + persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS | + FRAG_BIT_COL0 | + FRAG_BIT_COL1); + else + persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS); + if (c->key.do_flat_shading) linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1); else @@ -674,7 +687,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1<<_3DPRIM_POLYGON) | (1<<_3DPRIM_RECTLIST) | (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); @@ -695,7 +708,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) (1<<_3DPRIM_LINESTRIP_CONT) | (1<<_3DPRIM_LINESTRIP_BF) | (1<<_3DPRIM_LINESTRIP_CONT_BF))); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); @@ -708,7 +721,7 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE)); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { saveflag = p->flag_value; brw_push_insn_state(p); diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index c99918724b..e73e57a1c8 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -162,7 +162,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, { struct brw_sf_unit_state sf; dri_bo *bo; - + int chipset_max_threads; memset(&sf, 0, sizeof(sf)); sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; @@ -171,13 +171,26 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; sf.thread3.dispatch_grf_start_reg = 3; - sf.thread3.urb_entry_read_offset = 1; + + if (BRW_IS_IGDNG(brw)) + sf.thread3.urb_entry_read_offset = 3; + else + sf.thread3.urb_entry_read_offset = 1; + sf.thread3.urb_entry_read_length = key->urb_entry_read_length; sf.thread4.nr_urb_entries = key->nr_urb_entries; sf.thread4.urb_entry_allocation_size = key->sfsize - 1; - /* Each SF thread produces 1 PUE, and there can be up to 24 threads */ - sf.thread4.max_threads = MIN2(24, key->nr_urb_entries) - 1; + + /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or + * 48(IGDNG) threads + */ + if (BRW_IS_IGDNG(brw)) + chipset_max_threads = 48; + else + chipset_max_threads = 24; + + sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1; if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) sf.thread4.max_threads = 0; diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index a713262269..e94fa7d2b4 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -126,6 +126,8 @@ static void dump_wm_surface_state(struct brw_context *brw) surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not "); state_out(name, surf, surfoff, 4, "mip base %d\n", surf->ss4.min_lod); + state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", + surf->ss5.x_offset, surf->ss5.y_offset); dri_bo_unmap(surf_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index c6dfea4743..38d9dd8991 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -205,7 +205,6 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_PRIMITIVE), DEFINE_BIT(BRW_NEW_CONTEXT), DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), - DEFINE_BIT(BRW_NEW_INPUT_VARYING), DEFINE_BIT(BRW_NEW_PSP), DEFINE_BIT(BRW_NEW_FENCE), DEFINE_BIT(BRW_NEW_INDICES), diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 89e2981203..8ba7eb27b3 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -33,6 +33,14 @@ #ifndef BRW_STRUCTS_H #define BRW_STRUCTS_H + +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + +/** Number of message register file registers */ +#define BRW_MAX_MRF 16 + + /* Command packets: */ struct header @@ -815,7 +823,9 @@ struct brw_gs_unit_state struct { - GLuint pad0:10; + GLuint pad0:8; + GLuint rendering_enable:1; /* for IGDNG */ + GLuint pad4:1; GLuint stats_enable:1; GLuint nr_urb_entries:7; GLuint pad1:1; @@ -923,6 +933,28 @@ struct brw_wm_unit_state GLfloat global_depth_offset_constant; GLfloat global_depth_offset_scale; + + /* for IGDNG only */ + struct { + GLuint pad0:1; + GLuint grf_reg_count_1:3; + GLuint pad1:2; + GLuint kernel_start_pointer_1:26; + } wm8; + + struct { + GLuint pad0:1; + GLuint grf_reg_count_2:3; + GLuint pad1:2; + GLuint kernel_start_pointer_2:26; + } wm9; + + struct { + GLuint pad0:1; + GLuint grf_reg_count_3:3; + GLuint pad1:2; + GLuint kernel_start_pointer_3:26; + } wm10; }; struct brw_sampler_default_color { @@ -1075,7 +1107,7 @@ struct brw_surface_state GLuint y_offset:4; GLuint pad0:1; GLuint x_offset:7; - } ss5; /* NEW in Integrated Graphics Device */ + } ss5; /* New in G4X */ }; @@ -1298,6 +1330,14 @@ struct brw_instruction GLuint pad1:6; } ia16; + struct + { + GLuint pad:26; + GLuint end_of_thread:1; + GLuint pad1:1; + GLuint sfid:4; + } send_igdng; /* for IGDNG only */ + } bits2; union @@ -1385,6 +1425,21 @@ struct brw_instruction } math; struct { + GLuint function:4; + GLuint int_type:1; + GLuint precision:1; + GLuint saturate:1; + GLuint data_type:1; + GLuint snapshot:1; + GLuint pad0:10; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } math_igdng; + + struct { GLuint binding_table_index:8; GLuint sampler:4; GLuint return_format:2; @@ -1407,9 +1462,38 @@ struct brw_instruction GLuint end_of_thread:1; } sampler_g4x; + struct { + GLuint binding_table_index:8; + GLuint sampler:4; + GLuint msg_type:4; + GLuint simd_mode:2; + GLuint pad0:1; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } sampler_igdng; + struct brw_urb_immediate urb; struct { + GLuint opcode:4; + GLuint offset:6; + GLuint swizzle_control:2; + GLuint pad:1; + GLuint allocate:1; + GLuint used:1; + GLuint complete:1; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } urb_igdng; + + struct { GLuint binding_table_index:8; GLuint msg_control:4; GLuint msg_type:2; @@ -1423,6 +1507,19 @@ struct brw_instruction struct { GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint msg_type:3; + GLuint target_cache:2; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_read_igdng; + + struct { + GLuint binding_table_index:8; GLuint msg_control:3; GLuint pixel_scoreboard_clear:1; GLuint msg_type:3; @@ -1435,6 +1532,20 @@ struct brw_instruction } dp_write; struct { + GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint pixel_scoreboard_clear:1; + GLuint msg_type:3; + GLuint send_commit_msg:1; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_write_igdng; + + struct { GLuint pad:16; GLuint response_length:4; GLuint msg_length:4; @@ -1443,6 +1554,15 @@ struct brw_instruction GLuint end_of_thread:1; } generic; + struct { + GLuint pad:19; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } generic_igdng; + GLint d; GLuint ud; } bits3; diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 5c5455813a..7f9b253534 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -36,6 +36,7 @@ #include "intel_tex_layout.h" #include "intel_context.h" #include "main/macros.h" +#include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE @@ -48,6 +49,77 @@ GLboolean brw_miptree_layout(struct intel_context *intel, switch (mt->target) { case GL_TEXTURE_CUBE_MAP: + if (IS_IGDNG(intel->intelScreen->deviceID)) { + GLuint align_h = 2, align_w = 4; + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = mt->width0; + GLuint height = mt->height0; + GLuint qpitch = 0; + GLuint y_pitch = 0; + + mt->pitch = mt->width0; + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + y_pitch = ALIGN(height, align_h); + + if (mt->compressed) { + mt->pitch = ALIGN(mt->width0, align_w); + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; + } else { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; + } + + if (mt->first_level != mt->last_level) { + GLuint mip1_width; + + if (mt->compressed) { + mip1_width = ALIGN(minify(mt->width0), align_w) + + ALIGN(minify(minify(mt->width0)), align_w); + } else { + mip1_width = ALIGN(minify(mt->width0), align_w) + + minify(minify(mt->width0)); + } + + if (mip1_width > mt->pitch) { + mt->pitch = mip1_width; + } + } + + mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); + + for (level = mt->first_level; level <= mt->last_level; level++) { + GLuint img_height; + GLuint nr_images = 6; + GLuint q = 0; + + intel_miptree_set_level_info(mt, level, nr_images, x, y, width, + height, 1); + + for (q = 0; q < nr_images; q++) + intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch); + + if (mt->compressed) + img_height = MAX2(1, height/4); + else + img_height = ALIGN(height, align_h); + + if (level == mt->first_level + 1) { + x += ALIGN(width, align_w); + } + else { + y += img_height; + } + + width = minify(width); + height = minify(height); + } + + break; + } + case GL_TEXTURE_3D: { GLuint width = mt->width0; GLuint height = mt->height0; @@ -59,9 +131,9 @@ GLboolean brw_miptree_layout(struct intel_context *intel, GLuint align_w = 4; mt->total_height = 0; + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); if (mt->compressed) { - align_w = intel_compressed_alignment(mt->internal_format); mt->pitch = ALIGN(width, align_w); pack_y_pitch = (height + 3) / 4; } else { @@ -69,7 +141,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, pack_y_pitch = ALIGN(mt->height0, align_h); } - pack_x_pitch = mt->pitch; + pack_x_pitch = width; pack_x_nr = 1; for (level = mt->first_level ; level <= mt->last_level ; level++) { diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index 7673dd36eb..8c6f4355a6 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -143,7 +143,29 @@ static void recalculate_urb_fence( struct brw_context *brw ) brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; - + + brw->urb.constrained = 0; + + if (BRW_IS_IGDNG(brw)) { + brw->urb.nr_vs_entries = 128; + brw->urb.nr_sf_entries = 48; + if (check_urb_layout(brw)) { + goto done; + } else { + brw->urb.constrained = 1; + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; + } + } else if (BRW_IS_G4X(brw)) { + brw->urb.nr_vs_entries = 64; + if (check_urb_layout(brw)) { + goto done; + } else { + brw->urb.constrained = 1; + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + } + } + if (!check_urb_layout(brw)) { brw->urb.nr_vs_entries = limits[VS].min_nr_entries; brw->urb.nr_gs_entries = limits[GS].min_nr_entries; @@ -169,9 +191,8 @@ static void recalculate_urb_fence( struct brw_context *brw ) if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) _mesa_printf("URB CONSTRAINED\n"); } - else - brw->urb.constrained = 0; +done: if (INTEL_DEBUG & DEBUG_URB) _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", brw->urb.vs_start, diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 1e4f66091e..4a591365c9 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -58,6 +58,7 @@ struct brw_vs_compile { GLuint first_output; GLuint nr_outputs; + GLuint first_overflow_output; /**< VERT_ATTRIB_x */ GLuint first_tmp; GLuint last_tmp; diff --git a/src/mesa/drivers/dri/i965/brw_vs_constval.c b/src/mesa/drivers/dri/i965/brw_vs_constval.c index 2637344b48..249a800bf4 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_constval.c +++ b/src/mesa/drivers/dri/i965/brw_vs_constval.c @@ -39,8 +39,8 @@ */ struct tracker { GLboolean twoside; - GLubyte active[PROGRAM_OUTPUT+1][128]; - GLuint size_masks[4]; + GLubyte active[PROGRAM_OUTPUT+1][MAX_PROGRAM_TEMPS]; + GLbitfield size_masks[4]; /**< one bit per fragment program input attrib */ }; @@ -53,8 +53,10 @@ static void set_active_component( struct tracker *t, case PROGRAM_TEMPORARY: case PROGRAM_INPUT: case PROGRAM_OUTPUT: + assert(file < PROGRAM_OUTPUT + 1); + assert(index < Elements(t->active[0])); t->active[file][index] |= active; - + break; default: break; } @@ -108,10 +110,15 @@ static GLubyte get_active( struct tracker *t, return active; } +/** + * Return the size (1,2,3 or 4) of the output/result for VERT_RESULT_idx. + */ static GLubyte get_output_size( struct tracker *t, GLuint idx ) { - GLubyte active = t->active[PROGRAM_OUTPUT][idx]; + GLubyte active; + assert(idx < VERT_RESULT_MAX); + active = t->active[PROGRAM_OUTPUT][idx]; if (active & (1<<3)) return 4; if (active & (1<<2)) return 3; if (active & (1<<1)) return 2; @@ -123,7 +130,7 @@ static GLubyte get_output_size( struct tracker *t, */ static void calc_sizes( struct tracker *t ) { - GLuint i; + GLint vertRes; if (t->twoside) { t->active[PROGRAM_OUTPUT][VERT_RESULT_COL0] |= @@ -133,12 +140,27 @@ static void calc_sizes( struct tracker *t ) t->active[PROGRAM_OUTPUT][VERT_RESULT_BFC1]; } - for (i = 0; i < FRAG_ATTRIB_MAX; i++) { - switch (get_output_size(t, i)) { - case 4: t->size_masks[4-1] |= 1<<i; - case 3: t->size_masks[3-1] |= 1<<i; - case 2: t->size_masks[2-1] |= 1<<i; - case 1: t->size_masks[1-1] |= 1<<i; + /* Examine vertex program output sizes to set the size_masks[] info + * which describes the fragment program input sizes. + */ + for (vertRes = VERT_RESULT_TEX0; vertRes < VERT_RESULT_MAX; vertRes++) { + GLint fragAttrib; + + /* map vertex program output index to fragment program input index */ + if (vertRes <= VERT_RESULT_TEX7) + fragAttrib = FRAG_ATTRIB_TEX0 + vertRes - VERT_RESULT_TEX0; + else if (vertRes >= VERT_RESULT_VAR0) + fragAttrib = FRAG_ATTRIB_VAR0 + vertRes - VERT_RESULT_VAR0; + else + continue; + assert(fragAttrib >= FRAG_ATTRIB_TEX0); + assert(fragAttrib <= FRAG_ATTRIB_MAX); + + switch (get_output_size(t, vertRes)) { + case 4: t->size_masks[4-1] |= 1 << fragAttrib; + case 3: t->size_masks[3-1] |= 1 << fragAttrib; + case 2: t->size_masks[2-1] |= 1 << fragAttrib; + case 1: t->size_masks[1-1] |= 1 << fragAttrib; break; } } diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index d7f75e3685..514f15d5e3 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -129,15 +129,21 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } } - /* Allocate outputs: TODO: could organize the non-position outputs - * to go straight into message regs. + /* Allocate outputs. The non-position outputs go straight into message regs. */ c->nr_outputs = 0; c->first_output = reg; - mrf = 4; + c->first_overflow_output = 0; + + if (BRW_IS_IGDNG(c->func.brw)) + mrf = 8; + else + mrf = 4; + for (i = 0; i < VERT_RESULT_MAX; i++) { if (c->prog_data.outputs_written & (1 << i)) { c->nr_outputs++; + assert(i < Elements(c->regs[PROGRAM_OUTPUT])); if (i == VERT_RESULT_HPOS) { c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; @@ -148,8 +154,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) mrf++; /* just a placeholder? XXX fix later stages & remove this */ } else { - c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); - mrf++; + if (mrf < 16) { + c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } + else { + /* too many vertex results to fit in MRF, use GRF for overflow */ + if (!c->first_overflow_output) + c->first_overflow_output = i; + c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } } } } @@ -206,7 +221,11 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; - c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4; + if (BRW_IS_IGDNG(c->func.brw)) + c->prog_data.urb_entry_size = (c->nr_outputs + 6 + 3) / 4; + else + c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4; + c->prog_data.total_grf = reg; if (INTEL_DEBUG & DEBUG_VS) { @@ -1067,6 +1086,8 @@ static void emit_vertex_write( struct brw_vs_compile *c) struct brw_reg m0 = brw_message_reg(0); struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; + int eot; + GLuint len_vertext_header = 2; if (c->key.copy_edgeflag) { brw_MOV(p, @@ -1076,14 +1097,16 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Build ndc coords */ ndc = get_tmp(c); + /* ndc = 1.0 / pos.w */ emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + /* ndc.xyz = pos * ndc */ brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) || - c->key.nr_userclip || !BRW_IS_G4X(p->brw)) + c->key.nr_userclip || BRW_IS_965(p->brw)) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1114,7 +1137,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (!BRW_IS_G4X(p->brw)) { + if (BRW_IS_965(p->brw)) { brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, @@ -1141,7 +1164,23 @@ static void emit_vertex_write( struct brw_vs_compile *c) */ brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, offset(m0, 2), ndc); - brw_MOV(p, offset(m0, 3), pos); + + if (BRW_IS_IGDNG(p->brw)) { + /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */ + brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ + /* m4, m5 contain the distances from vertex to the user clip planeXXX. + * Seems it is useless for us. + * m6 is used for aligning, so that the remainder of vertex element is + * reg-aligned. + */ + brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */ + len_vertext_header = 6; + } else { + brw_MOV(p, offset(m0, 3), pos); + len_vertext_header = 2; + } + + eot = (c->first_overflow_output == 0); brw_urb_WRITE(p, brw_null_reg(), /* dest */ @@ -1149,12 +1188,43 @@ static void emit_vertex_write( struct brw_vs_compile *c) c->r0, /* src */ 0, /* allocate */ 1, /* used */ - c->nr_outputs + 3, /* msg len */ + MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */ 0, /* response len */ - 1, /* eot */ + eot, /* eot */ 1, /* writes complete */ 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); + + if (c->first_overflow_output > 0) { + /* Not all of the vertex outputs/results fit into the MRF. + * Move the overflowed attributes from the GRF to the MRF and + * issue another brw_urb_WRITE(). + */ + /* XXX I'm not 100% sure about which MRF regs to use here. Starting + * at mrf[4] atm... + */ + GLuint i, mrf = 0; + for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) { + if (c->prog_data.outputs_written & (1 << i)) { + /* move from GRF to MRF */ + brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); + mrf++; + } + } + + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + 4, /* starting mrf reg nr */ + c->r0, /* src */ + 0, /* allocate */ + 1, /* used */ + mrf+1, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + BRW_MAX_MRF-1, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); + } } @@ -1183,15 +1253,15 @@ post_vs_emit( struct brw_vs_compile *c, */ void brw_vs_emit(struct brw_vs_compile *c ) { -#define MAX_IFSN 32 +#define MAX_IF_DEPTH 32 +#define MAX_LOOP_DEPTH 32 struct brw_compile *p = &c->func; - GLuint nr_insns = c->vp->program.Base.NumInstructions; - GLuint insn, if_insn = 0; + const GLuint nr_insns = c->vp->program.Base.NumInstructions; + GLuint insn, if_depth = 0, loop_depth = 0; GLuint end_offset = 0; struct brw_instruction *end_inst, *last_inst; - struct brw_instruction *if_inst[MAX_IFSN]; - struct brw_indirect stack_index = brw_indirect(0, 0); - + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + const struct brw_indirect stack_index = brw_indirect(0, 0); GLuint index; GLuint file; @@ -1382,16 +1452,51 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_xpd(p, dst, args[0], args[1]); break; case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); break; case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); break; +#if 0 + case OPCODE_BGNLOOP: + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + break; + case OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_ENDLOOP: + { + struct brw_instruction *inst0, *inst1; + loop_depth--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; + inst0->bits3.if_else.pop_count = 0; + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = inst1 - inst0; + inst0->bits3.if_else.pop_count = 0; + } + } + } + break; +#else + (void) loop_inst; + (void) loop_depth; +#endif case OPCODE_BRA: brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 3d29538843..d790ab6555 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -97,7 +97,11 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) * brw_urb_WRITE() results. */ vs.thread1.single_program_flow = 0; - vs.thread1.binding_table_entry_count = key->nr_surfaces; + + if (BRW_IS_IGDNG(brw)) + vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ + else + vs.thread1.binding_table_entry_count = key->nr_surfaces; vs.thread3.urb_entry_read_length = key->urb_entry_read_length; vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; @@ -105,10 +109,16 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) vs.thread3.urb_entry_read_offset = 0; vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - vs.thread4.nr_urb_entries = key->nr_urb_entries; + if (BRW_IS_IGDNG(brw)) + vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; + else + vs.thread4.nr_urb_entries = key->nr_urb_entries; + vs.thread4.urb_entry_allocation_size = key->urb_size - 1; - if (BRW_IS_G4X(brw)) + if (BRW_IS_IGDNG(brw)) + chipset_max_threads = 72; + else if (BRW_IS_G4X(brw)) chipset_max_threads = 32; else chipset_max_threads = 16; @@ -120,6 +130,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) /* No samplers for ARB_vp programs: */ + /* It has to be set to 0 for IGDNG + */ vs.vs5.sampler_count = 0; if (INTEL_DEBUG & DEBUG_STATS) diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index ba03afd6c1..ac11790151 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -177,14 +177,6 @@ static void brw_note_fence( struct intel_context *intel, GLuint fence ) brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; } - -static void brw_note_unlock( struct intel_context *intel ) -{ - struct brw_context *brw = brw_context(&intel->ctx); - brw_state_cache_check_size(brw); -} - - /* called from intelWaitForIdle() and intelFlush() * * For now, just flush everything. Could be smarter later. @@ -194,7 +186,7 @@ static GLuint brw_flush_cmd( void ) struct brw_mi_flush flush; flush.opcode = CMD_MI_FLUSH; flush.pad = 0; - flush.flags = BRW_FLUSH_READ_CACHE | BRW_FLUSH_STATE_CACHE; + flush.flags = BRW_FLUSH_STATE_CACHE; return *(GLuint *)&flush; } @@ -215,7 +207,6 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.invalidate_state = brw_invalidate_state; brw->intel.vtbl.note_fence = brw_note_fence; - brw->intel.vtbl.note_unlock = brw_note_unlock; brw->intel.vtbl.new_batch = brw_new_batch; brw->intel.vtbl.finish_batch = brw_finish_batch; brw->intel.vtbl.destroy = brw_destroy_context; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 3e476fd3be..14e05be4f6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -267,11 +267,14 @@ static void brw_wm_populate_key( struct brw_context *brw, /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->projtex_mask = brw->wm.input_size_masks[4-1] >> (FRAG_ATTRIB_TEX0 - FRAG_ATTRIB_WPOS); + key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; /* _NEW_LIGHT */ key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + /* _NEW_HINT */ + key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + /* _NEW_TEXTURE */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; @@ -351,6 +354,7 @@ const struct brw_tracked_state brw_wm_prog = { .dirty = { .mesa = (_NEW_COLOR | _NEW_DEPTH | + _NEW_HINT | _NEW_STENCIL | _NEW_POLYGON | _NEW_LINE | diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index fb15c03e83..ba497432c6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -63,9 +63,10 @@ struct brw_wm_prog_key { GLuint computes_depth:1; /* could be derived from program string */ GLuint source_depth_to_render_target:1; GLuint flat_shade:1; + GLuint linear_color:1; /**< linear interpolation vs perspective interp */ GLuint runtime_check_aads_emit:1; - GLuint projtex_mask:16; + GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ GLuint shadowtex_mask:16; GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 14ab9042de..9f82916c02 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -714,6 +714,7 @@ static void emit_tex( struct brw_wm_compile *c, GLuint msgLength, responseLength; GLuint i, nr; GLuint emit; + GLuint msg_type; /* How many input regs are there? */ @@ -751,6 +752,18 @@ static void emit_tex( struct brw_wm_compile *c, responseLength = 8; /* always */ + if (BRW_IS_IGDNG(p->brw)) { + if (inst->tex_shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; + } else { + if (inst->tex_shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; + } + brw_SAMPLE(p, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, @@ -758,12 +771,12 @@ static void emit_tex( struct brw_wm_compile *c, SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, - (inst->tex_shadow ? - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE : - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE), + msg_type, responseLength, msgLength, - 0); + 0, + 1, + BRW_SAMPLER_SIMD_MODE_SIMD16); } @@ -775,7 +788,7 @@ static void emit_txb( struct brw_wm_compile *c, { struct brw_compile *p = &c->func; GLuint msgLength; - + GLuint msg_type; /* Shadow ignored for txb. */ switch (inst->tex_idx) { @@ -800,6 +813,11 @@ static void emit_txb( struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(8), arg[3]); msgLength = 9; + if (BRW_IS_IGDNG(p->brw)) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + brw_SAMPLE(p, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, @@ -807,10 +825,12 @@ static void emit_txb( struct brw_wm_compile *c, SURF_INDEX_TEXTURE(inst->tex_unit), inst->tex_unit, /* sampler */ inst->writemask, - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, + msg_type, 8, /* responseLength */ msgLength, - 0); + 0, + 1, + BRW_SAMPLER_SIMD_MODE_SIMD16); } @@ -1022,7 +1042,7 @@ static void emit_fb_write( struct brw_wm_compile *c, get_element_ud(brw_vec8_grf(1,0), 6), brw_imm_ud(1<<26)); - jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); { emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); @@ -1057,7 +1077,6 @@ static void emit_spill( struct brw_wm_compile *c, */ brw_dp_WRITE_16(p, retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW), - 1, slot); } @@ -1085,7 +1104,6 @@ static void emit_unspill( struct brw_wm_compile *c, brw_dp_READ_16(p, retype(vec16(reg), BRW_REGISTER_TYPE_UW), - 1, slot); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 1798d842c7..b9e8dd2e96 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -354,13 +354,25 @@ static void emit_interp( struct brw_wm_compile *c, src_undef()); } else { - emit_op(c, - WM_LINTERP, - dst, - 0, - interp, - deltas, - src_undef()); + if (c->key.linear_color) { + emit_op(c, + WM_LINTERP, + dst, + 0, + interp, + deltas, + src_undef()); + } + else { + /* perspective-corrected color interpolation */ + emit_op(c, + WM_PINTERP, + dst, + 0, + interp, + deltas, + get_pixel_w(c)); + } } break; case FRAG_ATTRIB_FOGC: @@ -834,10 +846,16 @@ static void precalc_tex( struct brw_wm_compile *c, } +/** + * Check if the given TXP instruction really needs the divide-by-W step. + */ static GLboolean projtex( struct brw_wm_compile *c, const struct prog_instruction *inst ) { - struct prog_src_register src = inst->SrcReg[0]; + const struct prog_src_register src = inst->SrcReg[0]; + GLboolean retVal; + + assert(inst->Opcode == OPCODE_TXP); /* Only try to detect the simplest cases. Could detect (later) * cases where we are trying to emit code like RCP {1.0}, MUL x, @@ -847,16 +865,21 @@ static GLboolean projtex( struct brw_wm_compile *c, * user-provided fragment programs anyway: */ if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) - return 0; /* ut2004 gun rendering !?! */ + retVal = GL_FALSE; /* ut2004 gun rendering !?! */ else if (src.File == PROGRAM_INPUT && GET_SWZ(src.Swizzle, W) == W && - (c->key.projtex_mask & (1<<(src.Index + FRAG_ATTRIB_WPOS - FRAG_ATTRIB_TEX0))) == 0) - return 0; + (c->key.proj_attrib_mask & (1 << src.Index)) == 0) + retVal = GL_FALSE; else - return 1; + retVal = GL_TRUE; + + return retVal; } +/** + * Emit code for TXP. + */ static void precalc_txp( struct brw_wm_compile *c, const struct prog_instruction *inst ) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 0e6a2f8ef0..19f777fe32 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -446,7 +446,6 @@ static void fetch_constants(struct brw_wm_compile *c, /* need to fetch the constant now */ brw_dp_READ_4(p, c->current_const[i].reg, /* writeback dest */ - 1, /* msg_reg */ src->RelAddr, /* relative indexing? */ 16 * src->Index, /* byte offset */ SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ @@ -2626,6 +2625,7 @@ static void emit_txb(struct brw_wm_compile *c, struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; GLuint i; + GLuint msg_type; payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); @@ -2654,6 +2654,14 @@ static void emit_txb(struct brw_wm_compile *c, } brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ + + if (BRW_IS_IGDNG(p->brw)) { + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG; + } else { + /* Does it work well on SIMD8? */ + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + } + brw_SAMPLE(p, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ 1, /* msg_reg_nr */ @@ -2661,10 +2669,12 @@ static void emit_txb(struct brw_wm_compile *c, SURF_INDEX_TEXTURE(unit), unit, /* sampler */ inst->DstReg.WriteMask, /* writemask */ - BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, /* msg_type */ + msg_type, /* msg_type */ 4, /* response_length */ 4, /* msg_length */ - 0); /* eot */ + 0, /* eot */ + 1, + BRW_SAMPLER_SIMD_MODE_SIMD8); } @@ -2678,6 +2688,7 @@ static void emit_tex(struct brw_wm_compile *c, GLuint i, nr; GLuint emit; GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0; + GLuint msg_type; payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); @@ -2718,6 +2729,16 @@ static void emit_tex(struct brw_wm_compile *c, brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ } + if (BRW_IS_IGDNG(p->brw)) { + if (shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG; + } else { + /* Does it work for shadow on SIMD8 ? */ + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; + } + brw_SAMPLE(p, retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ 1, /* msg_reg_nr */ @@ -2725,10 +2746,12 @@ static void emit_tex(struct brw_wm_compile *c, SURF_INDEX_TEXTURE(unit), unit, /* sampler */ inst->DstReg.WriteMask, /* writemask */ - BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, /* msg_type */ + msg_type, /* msg_type */ 4, /* response_length */ shadow ? 6 : 4, /* msg_length */ - 0); /* eot */ + 0, /* eot */ + 1, + BRW_SAMPLER_SIMD_MODE_SIMD8); if (shadow) brw_MOV(p, dst[3], brw_imm_f(1.0)); @@ -2745,11 +2768,10 @@ static void post_wm_emit( struct brw_wm_compile *c ) static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) { -#define MAX_IFSN 32 +#define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 - struct brw_instruction *if_inst[MAX_IFSN], *loop_inst[MAX_LOOP_DEPTH]; - struct brw_instruction *inst0, *inst1; - int i, if_insn = 0, loop_insn = 0; + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + GLuint i, if_depth = 0, loop_depth = 0; struct brw_compile *p = &c->func; struct brw_indirect stack_index = brw_indirect(0, 0); @@ -2831,6 +2853,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_trunc(c, inst); break; case OPCODE_MOV: + case OPCODE_SWZ: emit_mov(c, inst); break; case OPCODE_DP3: @@ -2922,15 +2945,15 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_kil(c); break; case OPCODE_IF: - assert(if_insn < MAX_IFSN); - if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); break; case OPCODE_ELSE: - if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; case OPCODE_ENDIF: - assert(if_insn > 0); - brw_ENDIF(p, if_inst[--if_insn]); + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); break; case OPCODE_BGNSUB: brw_save_label(p, inst->Comment, p->nr_insn); @@ -2964,7 +2987,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) break; case OPCODE_BGNLOOP: /* XXX may need to invalidate the current_constant regs */ - loop_inst[loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; case OPCODE_BRK: brw_BREAK(p); @@ -2975,21 +2998,29 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; case OPCODE_ENDLOOP: - loop_insn--; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_insn]); - /* patch all the BREAK instructions from - last BEGINLOOP */ - while (inst0 > loop_inst[loop_insn]) { - inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { - inst0->bits3.if_else.jump_count = inst1 - inst0 + 1; + { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + if (BRW_IS_IGDNG(brw)) + br = 2; + + loop_depth--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; - } else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { - inst0->bits3.if_else.jump_count = inst1 - inst0; + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; - } - } - break; + } + } + } + break; default: _mesa_printf("unsupported IR in fragment shader %d\n", inst->Opcode); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 67b41173fb..39f8c6d522 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -71,7 +71,9 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->max_threads = 1; else { /* WM maximum threads is number of EUs times number of threads per EU. */ - if (BRW_IS_G4X(brw)) + if (BRW_IS_IGDNG(brw)) + key->max_threads = 12 * 6; + else if (BRW_IS_G4X(brw)) key->max_threads = 10 * 5; else key->max_threads = 8 * 4; @@ -141,7 +143,11 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - wm.thread1.binding_table_entry_count = key->nr_surfaces; + + if (BRW_IS_IGDNG(brw)) + wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ + else + wm.thread1.binding_table_entry_count = key->nr_surfaces; if (key->total_scratch != 0) { wm.thread2.scratch_space_base_pointer = @@ -158,7 +164,11 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; - wm.wm4.sampler_count = (key->sampler_count + 1) / 4; + if (BRW_IS_IGDNG(brw)) + wm.wm4.sampler_count = 0; /* hardware requirement */ + else + wm.wm4.sampler_count = (key->sampler_count + 1) / 4; + if (brw->wm.sampler_bo != NULL) { /* reloc */ wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index c49a5f6b4e..096f74394e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -578,7 +578,27 @@ brw_update_renderbuffer_surface(struct brw_context *brw, surf.ss0.surface_format = key.surface_format; surf.ss0.surface_type = key.surface_type; - surf.ss1.base_addr = key.draw_offset; + if (key.tiling == I915_TILING_NONE) { + surf.ss1.base_addr = key.draw_offset; + } else { + uint32_t tile_offset = key.draw_offset % 4096; + + surf.ss1.base_addr = key.draw_offset - tile_offset; + + assert(BRW_IS_G4X(brw) || tile_offset == 0); + if (BRW_IS_G4X(brw)) { + if (key.tiling == I915_TILING_X) { + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 512 / 2; + } else { + surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 128 / 2; + } + } + } if (region_bo != NULL) surf.ss1.base_addr += region_bo->offset; /* reloc */ @@ -609,7 +629,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], offsetof(struct brw_surface_state, ss1), region_bo, - key.draw_offset, + surf.ss1.base_addr, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c new file mode 120000 index 0000000000..cc4589f4d4 --- /dev/null +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -0,0 +1 @@ +../intel/intel_pixel_read.c
\ No newline at end of file |