From b47c9f8c915ae4ca8c7fa5ee3b6b64f17c38b569 Mon Sep 17 00:00:00 2001 From: Zou Nan hai Date: Fri, 31 Aug 2007 13:42:20 +0800 Subject: optimize 965 clip 1. increase clip thread number to 2 2. do cliptest for -rhw --- src/mesa/drivers/dri/i965/brw_clip_line.c | 7 ++ src/mesa/drivers/dri/i965/brw_clip_state.c | 2 +- src/mesa/drivers/dri/i965/brw_clip_tri.c | 107 +++++++++++++++++++++++++++-- src/mesa/drivers/dri/i965/brw_clip_util.c | 12 +--- 4 files changed, 113 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/mesa/drivers/dri/i965/brw_clip_line.c b/src/mesa/drivers/dri/i965/brw_clip_line.c index 0103be4345..9ad00676d4 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_line.c +++ b/src/mesa/drivers/dri/i965/brw_clip_line.c @@ -147,6 +147,13 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_planes(c); brw_clip_init_clipmask(c); + /* -ve rhw workaround */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + plane_loop = brw_DO(p, BRW_EXECUTE_1); { /* if (planemask & 1) diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index 1e6d6fa176..ae46d7a86e 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -55,7 +55,7 @@ static void upload_clip_unit( struct brw_context *brw ) /* BRW_NEW_URB_FENCE */ clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries; clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - clip.thread4.max_threads = 0; /* Hmm, maybe the max is 1 or 2 threads */ + clip.thread4.max_threads = 1; /* 2 threads */ if (INTEL_DEBUG & DEBUG_STATS) clip.thread4.stats_enable = 1; diff --git a/src/mesa/drivers/dri/i965/brw_clip_tri.c b/src/mesa/drivers/dri/i965/brw_clip_tri.c index f62b02cedf..506ab3712d 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_tri.c +++ b/src/mesa/drivers/dri/i965/brw_clip_tri.c @@ -42,6 +42,20 @@ #include "brw_util.h" #include "brw_clip.h" +static struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmps( struct brw_clip_compile *c ) +{ + c->last_tmp = c->first_tmp; +} void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, @@ -435,15 +449,103 @@ static void maybe_do_clip_tri( struct brw_clip_compile *c ) brw_ENDIF(p, do_clip); } - +static void brw_clip_test( struct brw_clip_compile *c ) +{ + struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + + struct brw_reg v0 = get_tmp(c); + struct brw_reg v1 = get_tmp(c); + struct brw_reg v2 = get_tmp(c); + + struct brw_indirect vt0 = brw_indirect(0, 0); + struct brw_indirect vt1 = brw_indirect(1, 0); + struct brw_indirect vt2 = brw_indirect(2, 0); + + struct brw_compile *p = &c->func; + + brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); + brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); + + /* test nearz, xmin, ymin plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* test farz, xmax, ymax plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + release_tmps(c); +} void brw_emit_tri_clip( struct brw_clip_compile *c ) { + struct brw_instruction *neg_rhw; + struct brw_compile *p = &c->func; brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); brw_clip_tri_init_vertices(c); brw_clip_init_clipmask(c); + /* if -ve rhw workaround bit is set, + do cliptest */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + neg_rhw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_test(c); + } + brw_ENDIF(p, neg_rhw); + /* Can't push into do_clip_tri because with polygon (or quad) * flatshading, need to apply the flatshade here because we don't * respect the PV when converting to trifan for emit: @@ -462,6 +564,3 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) */ brw_clip_kill_thread(c); } - - - diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c index 19bef19801..c37bfeb1ce 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_util.c +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c @@ -272,6 +272,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c) + struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) { return brw_address(c->reg.fixed_planes); @@ -327,8 +328,7 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) /* Shift so that lowest outcode bit is rightmost: */ - brw_MOV(p, c->reg.planemask, incoming); - brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(26)); + brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); if (c->key.nr_userclip) { struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); @@ -342,13 +342,5 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c ) release_tmp(c, tmp); } - - /* Test for -ve rhw workaround - */ - brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); - brw_AND(p, vec1(brw_null_reg()), incoming, brw_imm_ud(1<<20)); - brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - } -- cgit v1.2.3