From fe2d4a5ea02df38c9940a726aa04bcf550fab1da Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 22 Aug 2010 01:33:57 -0700 Subject: i965: Add support for POW in gen6 FS. Fixes glsl-algebraic-pow-2 in brw_wm_glsl.c mode. --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c') diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index ddd3a94eb0..9c320c613f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1131,6 +1131,29 @@ void brw_math( struct brw_compile *p, } } +/** Extended math function, float[8]. + */ +void brw_math2(struct brw_compile *p, + struct brw_reg dest, + GLuint function, + struct brw_reg src0, + struct brw_reg src1) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); + + assert(intel->gen >= 6); + + /* Math is the same ISA format as other opcodes, except that CondModifier + * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. + */ + insn->header.destreg__conditionalmod = function; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); +} + /** * Extended math function, float[16]. * Use 2 send instructions. -- cgit v1.2.3 From 67dafa4b56422b44ca26b093d8feb6e743eb89e6 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 17 Sep 2010 14:23:48 +0800 Subject: i965: ff sync message change for sandybridge --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c') diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 9c320c613f..1d3f19759d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1744,13 +1744,27 @@ void brw_ff_sync(struct brw_compile *p, GLuint response_length, GLboolean eot) { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + /* Sandybridge doesn't have the implied move for SENDs, + * and the first message register index comes from src0. + */ + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + brw_pop_insn_state(p); + src0 = brw_message_reg(msg_reg_nr); + } + + insn = next_insn(p, BRW_OPCODE_SEND); brw_set_dest(insn, dest); brw_set_src0(insn, src0); brw_set_src1(insn, brw_imm_d(0)); - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_ff_sync_message(p->brw, insn, -- cgit v1.2.3 From c5a3b25bb954db49dcb5e7737018979782d2edba Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Tue, 28 Sep 2010 14:54:26 +0800 Subject: i965: fix jump count on sandybridge Jump count is for 64bit long each, so one instruction requires 2 like on Ironlake. --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c') diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 1d3f19759d..38ac3be8df 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -819,7 +819,9 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, struct brw_instruction *insn; GLuint br = 1; - if (intel->gen == 5) + /* jump count is for 64bit data chunk each, so one 128bit + instruction requires 2 chunks. */ + if (intel->gen >= 5) br = 2; if (p->single_program_flow) { @@ -861,7 +863,7 @@ void brw_ENDIF(struct brw_compile *p, struct intel_context *intel = &p->brw->intel; GLuint br = 1; - if (intel->gen == 5) + if (intel->gen >= 5) br = 2; if (p->single_program_flow) { @@ -978,7 +980,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, struct brw_instruction *insn; GLuint br = 1; - if (intel->gen == 5) + if (intel->gen >= 5) br = 2; if (p->single_program_flow) @@ -1022,7 +1024,7 @@ void brw_land_fwd_jump(struct brw_compile *p, struct brw_instruction *landing = &p->store[p->nr_insn]; GLuint jmpi = 1; - if (intel->gen == 5) + if (intel->gen >= 5) jmpi = 2; assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); -- cgit v1.2.3 From 956f866030f7bea5fc4a2de28c72e60bdc3a5b3d Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 17 Sep 2010 14:17:06 +0800 Subject: i965: Fix sampler on sandybridge Sandybridge has not much change on texture sampler with Ironlake. --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 29 ++++++++++++++++++++---- src/mesa/drivers/dri/i965/brw_structs.h | 14 +++++++----- src/mesa/drivers/dri/i965/brw_tex_layout.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_emit.c | 6 ++--- src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 11 +++++++-- 5 files changed, 45 insertions(+), 17 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c') diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 38ac3be8df..09cc8b2bd5 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -548,7 +548,7 @@ static void brw_set_sampler_message(struct brw_context *brw, assert(eot == 0); brw_set_src1(insn, brw_imm_d(0)); - if (intel->gen == 5) { + if (intel->gen >= 5) { insn->bits3.sampler_gen5.binding_table_index = binding_table_index; insn->bits3.sampler_gen5.sampler = sampler; insn->bits3.sampler_gen5.msg_type = msg_type; @@ -557,8 +557,12 @@ static void brw_set_sampler_message(struct brw_context *brw, insn->bits3.sampler_gen5.response_length = response_length; insn->bits3.sampler_gen5.msg_length = msg_length; insn->bits3.sampler_gen5.end_of_thread = eot; - insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; - insn->bits2.send_gen5.end_of_thread = eot; + if (intel->gen >= 6) + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER; + else { + insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_SAMPLER; + insn->bits2.send_gen5.end_of_thread = eot; + } } else if (intel->is_g4x) { insn->bits3.sampler_g4x.binding_table_index = binding_table_index; insn->bits3.sampler_g4x.sampler = sampler; @@ -1581,6 +1585,7 @@ void brw_SAMPLE(struct brw_compile *p, GLuint header_present, GLuint simd_mode) { + struct intel_context *intel = &p->brw->intel; GLboolean need_stall = 0; if (writemask == 0) { @@ -1652,11 +1657,25 @@ void brw_SAMPLE(struct brw_compile *p, } { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + struct brw_instruction *insn; + /* Sandybridge doesn't have the implied move for SENDs, + * and the first message register index comes from src0. + */ + if (intel->gen >= 6) { + brw_push_insn_state(p); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + /* m1 contains header? */ + brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + brw_pop_insn_state(p); + src0 = brw_message_reg(msg_reg_nr); + } + + insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.destreg__conditionalmod = msg_reg_nr; + if (intel->gen < 6) + insn->header.destreg__conditionalmod = msg_reg_nr; brw_set_dest(insn, dest); brw_set_src0(insn, src0); diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 2a118e01c5..8e8f418eb7 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1073,7 +1073,7 @@ struct brw_sampler_state GLuint mag_filter:3; GLuint mip_filter:2; GLuint base_level:5; - GLuint pad:1; + GLuint min_mag_neq:1; GLuint lod_preclamp:1; GLuint default_color_mode:1; GLuint pad0:1; @@ -1085,7 +1085,8 @@ struct brw_sampler_state GLuint r_wrap_mode:3; GLuint t_wrap_mode:3; GLuint s_wrap_mode:3; - GLuint pad:3; + GLuint cube_control_mode:1; + GLuint pad:2; GLuint max_lod:10; GLuint min_lod:10; } ss1; @@ -1099,7 +1100,9 @@ struct brw_sampler_state struct { - GLuint pad:19; + GLuint non_normalized_coord:1; + GLuint pad:12; + GLuint address_round:6; GLuint max_aniso:3; GLuint chroma_key_mode:1; GLuint chroma_key_index:2; @@ -1210,10 +1213,9 @@ struct brw_surface_state struct { GLuint pad1:16; - GLuint llc_mapping:1; - GLuint mlc_mapping:1; + GLuint cache_control:2; GLuint gfdt:1; - GLuint gfdt_src:1; + GLuint encrypt:1; GLuint y_offset:4; GLuint pad0:1; GLuint x_offset:7; diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 768ccfd79c..9ac0713a1d 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -48,7 +48,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, switch (mt->target) { case GL_TEXTURE_CUBE_MAP: - if (intel->gen == 5) { + if (intel->gen >= 5) { GLuint align_h = 2; GLuint level; GLuint qpitch = 0; diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 260a04c774..a9ebc2689c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1102,7 +1102,7 @@ void emit_tex(struct brw_wm_compile *c, /* Fill in the shadow comparison reference value. */ if (shadow) { - if (intel->gen == 5) { + if (intel->gen >= 5) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); cur_mrf += mrf_per_channel; @@ -1115,7 +1115,7 @@ void emit_tex(struct brw_wm_compile *c, cur_mrf += mrf_per_channel; } - if (intel->gen == 5) { + if (intel->gen >= 5) { if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5; else @@ -1168,7 +1168,7 @@ void emit_txb(struct brw_wm_compile *c, * from mattering. */ if (c->dispatch_width == 16 || intel->gen < 5) { - if (intel->gen == 5) + if (intel->gen >= 5) msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 1fc802cfa6..f9c48140fb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -100,10 +100,13 @@ struct wm_sampler_key { * Sets the sampler state for a single unit based off of the sampler key * entry. */ -static void brw_update_sampler_state(struct wm_sampler_entry *key, +static void brw_update_sampler_state(struct brw_context *brw, + struct wm_sampler_entry *key, drm_intel_bo *sdc_bo, struct brw_sampler_state *sampler) { + struct intel_context *intel = &brw->intel; + memset(sampler, 0, sizeof(*sampler)); switch (key->minfilter) { @@ -163,6 +166,10 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); + if (intel->gen >= 6 && + sampler->ss0.min_filter != sampler->ss0.mag_filter) + sampler->ss0.min_mag_neq = 1; + /* Cube-maps on 965 and later must use the same wrap mode for all 3 * coordinate dimensions. Futher, only CUBE and CLAMP are valid. */ @@ -329,7 +336,7 @@ static void upload_wm_samplers( struct brw_context *brw ) if (brw->wm.sdc_bo[i] == NULL) continue; - brw_update_sampler_state(&key.sampler[i], brw->wm.sdc_bo[i], + brw_update_sampler_state(brw, &key.sampler[i], brw->wm.sdc_bo[i], &sampler[i]); } -- cgit v1.2.3 From ea909be58dda7e916cb9ce434ecb78597881ad33 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 4 Oct 2010 15:07:17 -0700 Subject: i965: Add support for gen6 FB writes to the new FS. This uses message headers for now, since we'll need it for MRT. We can cut out the header later. --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 11 +++++++++-- src/mesa/drivers/dri/i965/brw_fs.cpp | 14 +++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c') diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 09cc8b2bd5..5954135321 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -448,6 +448,7 @@ static void brw_set_dp_write_message( struct brw_context *brw, GLuint msg_control, GLuint msg_type, GLuint msg_length, + GLboolean header_present, GLuint pixel_scoreboard_clear, GLuint response_length, GLuint end_of_thread, @@ -462,7 +463,7 @@ static void brw_set_dp_write_message( struct brw_context *brw, insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear; insn->bits3.dp_render_cache.msg_type = msg_type; insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg; - insn->bits3.dp_render_cache.header_present = 0; /* XXX */ + insn->bits3.dp_render_cache.header_present = header_present; insn->bits3.dp_render_cache.response_length = response_length; insn->bits3.dp_render_cache.msg_length = msg_length; insn->bits3.dp_render_cache.end_of_thread = end_of_thread; @@ -476,7 +477,7 @@ static void brw_set_dp_write_message( struct brw_context *brw, insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear; insn->bits3.dp_write_gen5.msg_type = msg_type; insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg; - insn->bits3.dp_write_gen5.header_present = 1; + insn->bits3.dp_write_gen5.header_present = header_present; insn->bits3.dp_write_gen5.response_length = response_length; insn->bits3.dp_write_gen5.msg_length = msg_length; insn->bits3.dp_write_gen5.end_of_thread = end_of_thread; @@ -1293,6 +1294,7 @@ void brw_dp_WRITE_16( struct brw_compile *p, BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ msg_length, + GL_TRUE, /* header_present */ 0, /* pixel scoreboard */ send_commit_msg, /* response_length */ 0, /* eot */ @@ -1530,12 +1532,16 @@ void brw_fb_WRITE(struct brw_compile *p, struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_control, msg_type; + GLboolean header_present = GL_TRUE; insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ insn->header.compression_control = BRW_COMPRESSION_NONE; if (intel->gen >= 6) { + if (msg_length == 4) + header_present = GL_FALSE; + /* headerless version, just submit color payload */ src0 = brw_message_reg(msg_reg_nr); @@ -1559,6 +1565,7 @@ void brw_fb_WRITE(struct brw_compile *p, msg_control, msg_type, msg_length, + header_present, 1, /* pixel scoreboard */ response_length, eot, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5eaa998447..f42c469641 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1920,6 +1920,7 @@ void fs_visitor::generate_fb_write(fs_inst *inst) { GLboolean eot = inst->eot; + struct brw_reg implied_header; /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied * move, here's g1. @@ -1927,16 +1928,27 @@ fs_visitor::generate_fb_write(fs_inst *inst) brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + if (intel->gen >= 6) { + brw_MOV(p, + brw_message_reg(0), + brw_vec8_grf(0, 0)); + implied_header = brw_null_reg(); + } else { + implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); + } + brw_MOV(p, brw_message_reg(1), brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); brw_fb_WRITE(p, 8, /* dispatch_width */ retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), 0, /* base MRF */ - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + implied_header, inst->target, inst->mlen, 0, -- cgit v1.2.3 From f7cb28fad9855020e9fbd1481df03bb09346d4be Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 4 Oct 2010 15:09:18 -0700 Subject: i965: Gen6 no longer has the IFF instruction; always use IF. --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c') diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 5954135321..fbc6894d05 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -899,9 +899,11 @@ void brw_ENDIF(struct brw_compile *p, * instruction respectively. */ if (patch_insn->header.opcode == BRW_OPCODE_IF) { - /* Automagically turn it into an IFF: - */ - patch_insn->header.opcode = BRW_OPCODE_IFF; + if (intel->gen < 6) { + /* Automagically turn it into an IFF: + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + } patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); patch_insn->bits3.if_else.pop_count = 0; patch_insn->bits3.if_else.pad0 = 0; -- cgit v1.2.3 From feca6609390d4642418cf7aab878e654964510c4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 4 Oct 2010 15:08:03 -0700 Subject: i965: Fix up IF/ELSE/ENDIF for gen6. The jump delta is now in the part of the instruction where the destination fields used to be, and the src args are ignored (or not, for the new non-predicated IF that we don't use yet). --- src/mesa/drivers/dri/i965/brw_disasm.c | 5 ++ src/mesa/drivers/dri/i965/brw_eu_emit.c | 86 ++++++++++++++++++++++++--------- src/mesa/drivers/dri/i965/brw_structs.h | 12 +++++ 3 files changed, 79 insertions(+), 24 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c') diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 3a93b25377..be27f9226c 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -892,7 +892,12 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) if (opcode[inst->header.opcode].ndst > 0) { pad (file, 16); err |= dest (file, inst); + } else if (gen >= 6 && (inst->header.opcode == BRW_OPCODE_IF || + inst->header.opcode == BRW_OPCODE_ELSE || + inst->header.opcode == BRW_OPCODE_ENDIF)) { + format (file, " %d", inst->bits1.branch_gen6.jump_count); } + if (opcode[inst->header.opcode].nsrc > 0) { pad (file, 32); err |= src0 (file, inst); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index fbc6894d05..419b40ba84 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -787,6 +787,7 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, */ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; if (p->single_program_flow) { @@ -800,9 +801,15 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) /* Override the defaults for this instruction: */ - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + if (intel->gen < 6) { + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(insn, brw_imm_w(0)); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + } insn->header.execution_size = execute_size; insn->header.compression_control = BRW_COMPRESSION_NONE; @@ -835,9 +842,15 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, insn = next_insn(p, BRW_OPCODE_ELSE); } - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + if (intel->gen < 6) { + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(insn, brw_imm_w(0)); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + } insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = if_insn->header.execution_size; @@ -854,9 +867,13 @@ struct brw_instruction *brw_ELSE(struct brw_compile *p, } else { assert(if_insn->header.opcode == BRW_OPCODE_IF); - if_insn->bits3.if_else.jump_count = br * (insn - if_insn); - if_insn->bits3.if_else.pop_count = 0; - if_insn->bits3.if_else.pad0 = 0; + if (intel->gen < 6) { + if_insn->bits3.if_else.jump_count = br * (insn - if_insn); + if_insn->bits3.if_else.pop_count = 0; + if_insn->bits3.if_else.pad0 = 0; + } else { + if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1); + } } return insn; @@ -884,9 +901,15 @@ void brw_ENDIF(struct brw_compile *p, } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, brw_imm_d(0x0)); + if (intel->gen < 6) { + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_d(0x0)); + } else { + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_W)); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + } insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = patch_insn->header.execution_size; @@ -900,26 +923,41 @@ void brw_ENDIF(struct brw_compile *p, */ if (patch_insn->header.opcode == BRW_OPCODE_IF) { if (intel->gen < 6) { - /* Automagically turn it into an IFF: + /* Turn it into an IFF, which means no mask stack operations for + * all-false and jumping past the ENDIF. */ patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else { + /* As of gen6, there is no IFF and IF must point to the ENDIF. */ + patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); } - patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); - patch_insn->bits3.if_else.pop_count = 0; - patch_insn->bits3.if_else.pad0 = 0; - } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { - patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); - patch_insn->bits3.if_else.pop_count = 1; - patch_insn->bits3.if_else.pad0 = 0; } else { - assert(0); + assert(patch_insn->header.opcode == BRW_OPCODE_ELSE); + if (intel->gen < 6) { + /* BRW_OPCODE_ELSE pre-gen6 should point just past the + * matching ENDIF. + */ + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ + patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); + } } /* Also pop item off the stack in the endif instruction: */ - insn->bits3.if_else.jump_count = 0; - insn->bits3.if_else.pop_count = 1; - insn->bits3.if_else.pad0 = 0; + if (intel->gen < 6) { + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } else { + insn->bits1.branch_gen6.jump_count = 2; + } } } diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 7b919872c4..8ce9af9c4f 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1381,6 +1381,18 @@ struct brw_instruction GLuint dest_horiz_stride:2; GLuint dest_address_mode:1; } ia16; + + struct { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint src1_reg_file:2; + GLuint src1_reg_type:3; + GLuint pad:1; + + GLint jump_count:16; + } branch_gen6; } bits1; -- cgit v1.2.3 From 978ffa1d61902f0d55e15fbc51af75d444f35124 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 8 Oct 2010 16:02:59 -0700 Subject: i965: Silence unused variable warning on non-debug builds. Fixes this GCC warning. brw_eu_emit.c: In function 'brw_math2': brw_eu_emit.c:1189: warning: unused variable 'intel' --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c') diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 419b40ba84..8ebcfa3c65 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1190,6 +1190,7 @@ void brw_math2(struct brw_compile *p, struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); assert(intel->gen >= 6); + (void) intel; /* Math is the same ISA format as other opcodes, except that CondModifier * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. -- cgit v1.2.3 From 7b5bc38c44269fc51db2f8b5e4ba0222212c6d71 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 11 Oct 2010 13:30:12 -0700 Subject: i965: Add a couple of checks for gen6 math instruction limits. --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c') diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 8ebcfa3c65..f9aa5f7efe 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1147,6 +1147,17 @@ void brw_math( struct brw_compile *p, if (intel->gen >= 6) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH); + assert(dest.file == BRW_GENERAL_REGISTER_FILE); + assert(src.file == BRW_GENERAL_REGISTER_FILE); + + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && + function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { + assert(src.type == BRW_REGISTER_TYPE_F); + } + /* Math is the same ISA format as other opcodes, except that CondModifier * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ @@ -1192,6 +1203,21 @@ void brw_math2(struct brw_compile *p, assert(intel->gen >= 6); (void) intel; + + assert(dest.file == BRW_GENERAL_REGISTER_FILE); + assert(src0.file == BRW_GENERAL_REGISTER_FILE); + assert(src1.file == BRW_GENERAL_REGISTER_FILE); + + assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1); + assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1); + + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && + function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { + assert(src0.type == BRW_REGISTER_TYPE_F); + assert(src1.type == BRW_REGISTER_TYPE_F); + } + /* Math is the same ISA format as other opcodes, except that CondModifier * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ -- cgit v1.2.3 From e8e79c1d7eed0f5ae8820611cb86bdbd6ce595e6 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 14 Oct 2010 10:54:53 +0800 Subject: i965: Fix GS hang on Sandybridge Don't use r0 for FF_SYNC dest reg on Sandybridge, which would smash FFID field in GS payload, that cause later URB write fail. Also not use r0 in any URB write requiring allocate. --- src/mesa/drivers/dri/i965/brw_eu_emit.c | 3 +- src/mesa/drivers/dri/i965/brw_gs.c | 2 -- src/mesa/drivers/dri/i965/brw_gs.h | 1 + src/mesa/drivers/dri/i965/brw_gs_emit.c | 54 ++++++++++++++++++++++++++------- 4 files changed, 46 insertions(+), 14 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c') diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index f9aa5f7efe..41286382d0 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1848,7 +1848,8 @@ void brw_ff_sync(struct brw_compile *p, if (intel->gen >= 6) { brw_push_insn_state(p); brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), + retype(src0, BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); src0 = brw_message_reg(msg_reg_nr); } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index ad178d84b6..cfcc8ea4d6 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -96,8 +96,6 @@ static void compile_gs_prog( struct brw_context *brw, brw_gs_quad_strip( &c, key ); break; case GL_LINE_LOOP: - /* XXX fix GS hang issue */ - assert(intel->gen < 6); brw_gs_lines( &c ); break; case GL_LINES: diff --git a/src/mesa/drivers/dri/i965/brw_gs.h b/src/mesa/drivers/dri/i965/brw_gs.h index 813b8d447a..7e3531086f 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.h +++ b/src/mesa/drivers/dri/i965/brw_gs.h @@ -56,6 +56,7 @@ struct brw_gs_compile { struct { struct brw_reg R0; struct brw_reg vertex[MAX_GS_VERTS]; + struct brw_reg temp; } reg; /* 3 different ways of expressing vertex size: diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index a01d5576f8..e1f751fdaa 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -58,6 +58,8 @@ static void brw_gs_alloc_regs( struct brw_gs_compile *c, i += c->nr_regs; } + c->reg.temp = brw_vec8_grf(i, 0); + c->prog_data.urb_read_length = c->nr_regs; c->prog_data.total_grf = i; } @@ -69,12 +71,22 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, GLuint header) { struct brw_compile *p = &c->func; + struct intel_context *intel = &c->func.brw->intel; GLboolean allocate = !last; + struct brw_reg temp; + + if (intel->gen < 6) + temp = c->reg.R0; + else { + temp = c->reg.temp; + brw_MOV(p, retype(temp, BRW_REGISTER_TYPE_UD), + retype(c->reg.R0, BRW_REGISTER_TYPE_UD)); + } /* Overwrite PrimType and PrimStart in the message header, for * each vertex in turn: */ - brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + brw_MOV(p, get_element_ud(temp, 2), brw_imm_ud(header)); /* Copy the vertex from vertn into m1..mN+1: */ @@ -87,9 +99,9 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, * allocated each time. */ brw_urb_WRITE(p, - allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + allocate ? temp : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), 0, - c->reg.R0, + temp, allocate, 1, /* used */ c->nr_regs + 1, /* msg length */ @@ -98,19 +110,39 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, 1, /* writes_complete */ 0, /* urb offset */ BRW_URB_SWIZZLE_NONE); + + if (intel->gen >= 6 && allocate) + brw_MOV(p, get_element_ud(c->reg.R0, 0), get_element_ud(temp, 0)); } static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) { struct brw_compile *p = &c->func; - brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); - brw_ff_sync(p, - c->reg.R0, - 0, - c->reg.R0, - 1, /* allocate */ - 1, /* response length */ - 0 /* eot */); + struct intel_context *intel = &c->func.brw->intel; + + if (intel->gen < 6) { + brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, /* allocate */ + 1, /* response length */ + 0 /* eot */); + } else { + brw_MOV(p, retype(c->reg.temp, BRW_REGISTER_TYPE_UD), + retype(c->reg.R0, BRW_REGISTER_TYPE_UD)); + brw_MOV(p, get_element_ud(c->reg.temp, 1), brw_imm_ud(num_prim)); + brw_ff_sync(p, + c->reg.temp, + 0, + c->reg.temp, + 1, /* allocate */ + 1, /* response length */ + 0 /* eot */); + brw_MOV(p, get_element_ud(c->reg.R0, 0), + get_element_ud(c->reg.temp, 0)); + } } -- cgit v1.2.3 From 897f6d3c7d06316b0535971cc2de318157c23692 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 14 Oct 2010 11:40:19 -0700 Subject: i965: Correctly emit the RNDZ instruction. Simply using RNDU, RNDZ, or RNDE does not produce the desired result. Rather, the RND* instructions place a value in the destination register that may be 1 less than the correct answer. They can also set per-channel "increment bits" in a flag register, which, if set, mean dest needs to be incremented by 1. A second instruction - a predicated add - completes the job. Notably, RNDD always produces the correct answer in a single instruction. Fixes piglit test glsl-fs-trunc. --- src/mesa/drivers/dri/i965/brw_eu.h | 9 +++++++-- src/mesa/drivers/dri/i965/brw_eu_emit.c | 25 ++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c') diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index c0deb238c2..c16613e3df 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -789,6 +789,10 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \ struct brw_reg src0, \ struct brw_reg src1); +#define ROUND(OP) \ +void brw_##OP(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0); + + ALU1(MOV) ALU2(SEL) ALU1(NOT) @@ -805,7 +809,6 @@ ALU2(ADD) ALU2(MUL) ALU1(FRC) ALU1(RNDD) -ALU1(RNDZ) ALU2(MAC) ALU2(MACH) ALU1(LZD) @@ -816,9 +819,11 @@ ALU2(DP2) ALU2(LINE) ALU2(PLN) +ROUND(RNDZ) + #undef ALU1 #undef ALU2 - +#undef ROUND /* Helpers for SEND instruction: diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 41286382d0..68c6c47763 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -654,6 +654,26 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \ return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ } +/* Rounding operations (other than RNDD) require two instructions - the first + * stores a rounded value (possibly the wrong way) in the dest register, but + * also sets a per-channel "increment bit" in the flag register. A predicated + * add of 1.0 fixes dest to contain the desired result. + */ +#define ROUND(OP) \ +void brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src) \ +{ \ + struct brw_instruction *rnd, *add; \ + rnd = next_insn(p, BRW_OPCODE_##OP); \ + brw_set_dest(rnd, dest); \ + brw_set_src0(rnd, src); \ + rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ + \ + add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ + add->header.predicate_control = BRW_PREDICATE_NORMAL; \ +} + ALU1(MOV) ALU2(SEL) @@ -668,7 +688,6 @@ ALU2(RSL) ALU2(ASR) ALU1(FRC) ALU1(RNDD) -ALU1(RNDZ) ALU2(MAC) ALU2(MACH) ALU1(LZD) @@ -679,6 +698,10 @@ ALU2(DP2) ALU2(LINE) ALU2(PLN) + +ROUND(RNDZ) + + struct brw_instruction *brw_ADD(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0, -- cgit v1.2.3 From f157812bbbcf9caac1f84988e738fc9d1e051056 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 14 Oct 2010 14:31:54 -0700 Subject: i965: Add support for ir_unop_round_even via the RNDE instruction. --- src/mesa/drivers/dri/i965/brw_eu.h | 1 + src/mesa/drivers/dri/i965/brw_eu_emit.c | 1 + src/mesa/drivers/dri/i965/brw_fs.cpp | 6 ++++++ 3 files changed, 8 insertions(+) (limited to 'src/mesa/drivers/dri/i965/brw_eu_emit.c') diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index c16613e3df..7cac4a72e4 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -820,6 +820,7 @@ ALU2(LINE) ALU2(PLN) ROUND(RNDZ) +ROUND(RNDE) #undef ALU1 #undef ALU2 diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 68c6c47763..a1fead0a4f 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -700,6 +700,7 @@ ALU2(PLN) ROUND(RNDZ) +ROUND(RNDE) struct brw_instruction *brw_ADD(struct brw_compile *p, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index d220c79a8e..e372f3a13d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -874,6 +874,9 @@ fs_visitor::visit(ir_expression *ir) case ir_unop_fract: inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0])); break; + case ir_unop_round_even: + emit(fs_inst(BRW_OPCODE_RNDE, this->result, op[0])); + break; case ir_binop_min: inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); @@ -3016,6 +3019,9 @@ fs_visitor::generate_code() case BRW_OPCODE_RNDD: brw_RNDD(p, dst, src[0]); break; + case BRW_OPCODE_RNDE: + brw_RNDE(p, dst, src[0]); + break; case BRW_OPCODE_RNDZ: brw_RNDZ(p, dst, src[0]); break; -- cgit v1.2.3