From a8d233e509a2c1aada7cd4e83b126ba06cb90565 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Oct 2009 14:29:55 -0600 Subject: i965: use macros to get/set prog_instruction::Aux field This makes things a bit easier to remember/understand. --- src/mesa/drivers/dri/i965/brw_wm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 872b1f3ecf..f13a958a44 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -271,6 +271,12 @@ struct brw_wm_compile { }; +/** Bits for prog_instruction::Aux field */ +#define INST_AUX_EOT 0x1 +#define INST_AUX_TARGET(T) (T << 1) +#define INST_AUX_GET_TARGET(AUX) ((AUX) >> 1) + + GLuint brw_wm_nr_args( GLuint opcode ); GLuint brw_wm_is_scalar_result( GLuint opcode ); -- cgit v1.2.3 From 9ef33b86855c4d000271774030bd1b19b6d79687 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Oct 2009 14:53:53 -0600 Subject: i965: don't use context state in emit_fb_write() Put the state that we care about in the hash key. Issue spotted by Keith Whitwell. --- src/mesa/drivers/dri/i965/brw_wm.c | 2 ++ src/mesa/drivers/dri/i965/brw_wm.h | 1 + src/mesa/drivers/dri/i965/brw_wm_fp.c | 4 ++-- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 5a2ac1a651..eeb25980fa 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -325,6 +325,8 @@ static void brw_wm_populate_key( struct brw_context *brw, key->drawable_height = brw->intel.driDrawable->h; } + key->nr_color_regions = brw->state.nr_color_regions; + /* CACHE_NEW_VS_PROG */ key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index f13a958a44..d0f30607d0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -67,6 +67,7 @@ struct brw_wm_prog_key { GLuint flat_shade:1; GLuint linear_color:1; /**< linear interpolation vs perspective interp */ GLuint runtime_check_aads_emit:1; + GLuint nr_color_regions:2; GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ GLuint shadowtex_mask:16; diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index d33fd9abed..9a0aea872b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -968,8 +968,8 @@ static void emit_fb_write( struct brw_wm_compile *c ) /* The inst->Aux field is used for FB write target and the EOT marker */ - if (brw->state.nr_color_regions > 1) { - for (i = 0 ; i < brw->state.nr_color_regions; i++) { + if (c->key.nr_color_regions > 1) { + for (i = 0 ; i < c->key.nr_color_regions; i++) { outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(), 0), 0, outcolor, payload_r0_depth, outdepth); -- cgit v1.2.3 From 035b21f365f563cba9bf9b8cb4fef613ceb9d3ac Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 29 Oct 2009 15:57:22 -0600 Subject: i965: make brw_wm_prog_key a little smaller GLushort is big enough for the swizzle and origin fields. The key could probably be made smaller still by re-ordering things. I'll hold off on that until after the outputswritten64 branch is merged. The key will get a little larger again with the GLbitfield64 fields. --- src/mesa/drivers/dri/i965/brw_wm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index d0f30607d0..dd4644fc36 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -74,11 +74,11 @@ struct brw_wm_prog_key { GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ - GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; + GLushort tex_swizzles[BRW_MAX_TEX_UNIT]; GLuint program_string_id:32; - GLuint origin_x, origin_y; - GLuint drawable_height; + GLushort origin_x, origin_y; + GLushort drawable_height; GLuint vp_outputs_written; }; -- cgit v1.2.3 From 2c30ee9bd69ed606b984c051748a7cdb34905eeb Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 30 Oct 2009 13:20:13 -0700 Subject: i965: Fix BRW_WM_MAX_INSN to reflect current limits. Part of fixing bug #24355. --- src/mesa/drivers/dri/i965/brw_wm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index dd4644fc36..47aa4da306 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -154,13 +154,12 @@ struct brw_wm_instruction { }; -#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) +#define BRW_WM_MAX_INSN (MAX_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) #define BRW_WM_MAX_GRF 128 /* hardware limit */ #define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) #define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) #define BRW_WM_MAX_PARAM 256 #define BRW_WM_MAX_CONST 256 -#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS #define BRW_WM_MAX_SUBROUTINE 16 -- cgit v1.2.3 From 0f34cdf6210b748db77c5eba2993637f4af6faeb Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 12 Aug 2009 12:56:35 -0700 Subject: i965: Collect GLSL src/dst regs up in generic code. This matches brw_wm_emit.c, which we'll be using shortly. There's a possible penalty here in that we'll allocate registers for unused channels, since we aren't doing ref tracking like brw_wm_pass*.c does. However, my measurements on GM965 don't show any for either OA or UT2004 with the GLSL path forced. --- src/mesa/drivers/dri/i965/brw_wm.h | 2 ++ src/mesa/drivers/dri/i965/brw_wm_glsl.c | 22 +++++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 47aa4da306..f841d25870 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -162,6 +162,8 @@ struct brw_wm_instruction { #define BRW_WM_MAX_CONST 256 #define BRW_WM_MAX_SUBROUTINE 16 +/* used in masks next to WRITEMASK_*. */ +#define SATURATE (1<<5) /* New opcodes to track internal operations required for WM unit. diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 28d6d4eba5..fd6d6c5602 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -2771,6 +2771,21 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) if (c->fp->use_const_buffer) fetch_constants(c, inst); + if (inst->Opcode != OPCODE_ARL) { + for (j = 0; j < 4; j++) { + if (inst->DstReg.WriteMask & (1 << j)) + dst[j] = get_dst_reg(c, inst, j); + else + dst[j] = brw_null_reg(); + } + } + for (j = 0; j < brw_wm_nr_args(inst->Opcode); j++) + get_argument_regs(c, inst, j, args[j], WRITEMASK_XYZW); + + dst_flags = inst->DstReg.WriteMask; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + dst_flags |= SATURATE; + if (inst->CondUpdate) brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); else @@ -2866,13 +2881,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) break; case OPCODE_DDX: case OPCODE_DDY: - for (j = 0; j < 4; j++) { - if (inst->DstReg.WriteMask & (1 << j)) - dst[j] = get_dst_reg(c, inst, j); - else - dst[j] = brw_null_reg(); - } - get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW); emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), args[0]); break; -- cgit v1.2.3 From 71af5080722afcbbb8a935138d95214ef7afe219 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 11 Aug 2009 16:31:19 -0700 Subject: i965: Share basic ALU ops between brw_wm_glsl and brw_wm_emit.c This drops support for get_src_reg_imm in these, but the prospect of getting brw_wm_pass*.c onto our GLSL path is well worth some temporary pain. --- src/mesa/drivers/dri/i965/brw_wm.h | 17 +++++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 32 +++++----- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 106 ++------------------------------ 3 files changed, 38 insertions(+), 117 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index f841d25870..93f79dbfe5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -308,6 +308,23 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); +/* brw_wm_emit.c */ +void emit_alu1(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_alu2(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index bf80a2942a..5e41a3f571 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -352,13 +352,13 @@ void emit_ddxy(struct brw_compile *p, brw_set_saturate(p, 0); } -static void emit_alu1( struct brw_compile *p, - struct brw_instruction *(*func)(struct brw_compile *, - struct brw_reg, - struct brw_reg), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +void emit_alu1(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { GLuint i; @@ -376,15 +376,15 @@ static void emit_alu1( struct brw_compile *p, } -static void emit_alu2( struct brw_compile *p, - struct brw_instruction *(*func)(struct brw_compile *, - struct brw_reg, - struct brw_reg, - struct brw_reg), - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_alu2(struct brw_compile *p, + struct brw_instruction *(*func)(struct brw_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index fd6d6c5602..fdd31d4ed5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -668,26 +668,6 @@ static void emit_trunc( struct brw_wm_compile *c, brw_set_saturate(p, 0); } -static void emit_mov( struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); - for (i = 0; i < 4; i++) { - if (mask & (1<func; - struct brw_reg src0, src1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<func; - struct brw_reg src0, src1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<func; - struct brw_reg src0, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SaturateMode != SATURATE_OFF) - brw_set_saturate(p, 0); -} - -static void emit_flr(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - for (i = 0 ; i < 4; i++) { - if (mask & (1< Date: Tue, 11 Aug 2009 17:52:44 -0700 Subject: i965: Add generic GLSL code for unaliasing a 3-arg opcode, and share LRP code. --- src/mesa/drivers/dri/i965/brw_wm.h | 6 ++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 12 ++-- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 98 ++++++++++++++++++++------------- 3 files changed, 71 insertions(+), 45 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 93f79dbfe5..66902bab65 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -330,5 +330,11 @@ void emit_ddxy(struct brw_compile *p, GLuint mask, GLboolean is_ddx, const struct brw_reg *arg0); +void emit_lrp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2); #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 215515d10b..9e637ef3f2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -422,12 +422,12 @@ static void emit_mad( struct brw_compile *p, } } -static void emit_lrp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2 ) +void emit_lrp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { GLuint i; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index cd5a59463c..458d06187a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -650,6 +650,63 @@ static void invoke_subroutine( struct brw_wm_compile *c, } } +/* Workaround for using brw_wm_emit.c's emit functions, which expect + * destination regs to be uniquely written. Moves arguments out to + * temporaries as necessary for instructions which use their destination as + * a temporary. + */ +static void +unalias3(struct brw_wm_compile *c, + void (*func)(struct brw_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp_arg0[4], tmp_arg1[4], tmp_arg2[4]; + int i, j; + int mark = mark_tmps(c); + + for (j = 0; j < 4; j++) { + tmp_arg0[j] = arg0[j]; + tmp_arg1[j] = arg1[j]; + tmp_arg2[j] = arg2[j]; + } + + for (i = 0; i < 4; i++) { + if (mask & (1<func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg dst, tmp1, tmp2, src0, src1, src2; - int i; - int mark = mark_tmps(c); - for (i = 0; i < 4; i++) { - if (mask & (1<SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0, tmp1); - brw_set_saturate(p, 0); - } - release_tmps(c, mark); - } -} - /** * For GLSL shaders, this KIL will be unconditional. * It may be contained inside an IF/ENDIF structure of course. @@ -2722,7 +2741,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; case OPCODE_LRP: - emit_lrp(c, inst); + unalias3(c, emit_lrp, + dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_TRUNC: emit_alu1(p, brw_RNDZ, dst, dst_flags, args[0]); -- cgit v1.2.3 From bad5b120be8de37cf8481d865790298fd9651381 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 11 Aug 2009 19:13:52 -0700 Subject: i965: Share the DP3, DP4, and DPH between brw_wm_glsl.c and brw_wm_emit.c --- src/mesa/drivers/dri/i965/brw_wm.h | 15 ++++++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 30 ++++++------ src/mesa/drivers/dri/i965/brw_wm_glsl.c | 87 ++------------------------------- 3 files changed, 33 insertions(+), 99 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 66902bab65..18f17f2488 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -330,6 +330,21 @@ void emit_ddxy(struct brw_compile *p, GLuint mask, GLboolean is_ddx, const struct brw_reg *arg0); +void emit_dp3(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_dp4(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_dph(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); void emit_lrp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 9e637ef3f2..bc5abb942b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -594,11 +594,11 @@ static void emit_min( struct brw_compile *p, } -static void emit_dp3( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_dp3(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; @@ -616,11 +616,11 @@ static void emit_dp3( struct brw_compile *p, } -static void emit_dp4( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_dp4(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; @@ -639,11 +639,11 @@ static void emit_dp4( struct brw_compile *p, } -static void emit_dph( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_dph(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 458d06187a..536eac8851 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1038,87 +1038,6 @@ static void emit_xpd(struct brw_wm_compile *c, brw_set_saturate(p, 0); } -static void emit_dp3(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg src0[3], src1[3], dst; - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - for (i = 0; i < 3; i++) { - src0[i] = get_src_reg(c, inst, 0, i); - src1[i] = get_src_reg_imm(c, inst, 1, i); - } - - dst = get_dst_reg(c, inst, dst_chan); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, 0); -} - -static void emit_dp4(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, inst, 0, i); - src1[i] = get_src_reg_imm(c, inst, 1, i); - } - dst = get_dst_reg(c, inst, dst_chan); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, brw_null_reg(), src0[2], src1[2]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MAC(p, dst, src0[3], src1[3]); - brw_set_saturate(p, 0); -} - -static void emit_dph(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg src0[4], src1[4], dst; - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - for (i = 0; i < 4; i++) { - src0[i] = get_src_reg(c, inst, 0, i); - src1[i] = get_src_reg_imm(c, inst, 1, i); - } - dst = get_dst_reg(c, inst, dst_chan); - brw_MUL(p, brw_null_reg(), src0[0], src1[0]); - brw_MAC(p, brw_null_reg(), src0[1], src1[1]); - brw_MAC(p, dst, src0[2], src1[2]); - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_ADD(p, dst, dst, src1[3]); - brw_set_saturate(p, 0); -} - /** * Emit a scalar instruction, like RCP, RSQ, LOG, EXP. * Note that the result of the function is smeared across the dest @@ -2752,16 +2671,16 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; case OPCODE_DP3: - emit_dp3(c, inst); + emit_dp3(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DP4: - emit_dp4(c, inst); + emit_dp4(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_XPD: emit_xpd(c, inst); break; case OPCODE_DPH: - emit_dph(c, inst); + emit_dph(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_RCP: emit_rcp(c, inst); -- cgit v1.2.3 From 726ad1560660a1fc769c87e0ea16f8b3334df0d2 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 11 Aug 2009 19:17:31 -0700 Subject: i965: Share OPCODE_MAD between brw_wm_glsl.c and brw_wm_emit.c --- src/mesa/drivers/dri/i965/brw_wm.h | 6 ++++++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 12 ++++++------ src/mesa/drivers/dri/i965/brw_wm_glsl.c | 25 +------------------------ 3 files changed, 13 insertions(+), 30 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 18f17f2488..e0445aaf52 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -351,5 +351,11 @@ void emit_lrp(struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1, const struct brw_reg *arg2); +void emit_mad(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2); #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index bc5abb942b..6cdc4f7483 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -402,12 +402,12 @@ void emit_alu2(struct brw_compile *p, } -static void emit_mad( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1, - const struct brw_reg *arg2 ) +void emit_mad(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1, + const struct brw_reg *arg2) { GLuint i; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 536eac8851..55e3e9fa7f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1214,29 +1214,6 @@ static void emit_kil(struct brw_wm_compile *c) brw_pop_insn_state(p); } -static void emit_mad(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg dst, src0, src1, src2; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_ADD(p, dst, dst, src2); - brw_set_saturate(p, 0); - } - } -} - static void emit_sop(struct brw_wm_compile *c, const struct prog_instruction *inst, GLuint cond) { @@ -2734,7 +2711,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_pow(c, inst); break; case OPCODE_MAD: - emit_mad(c, inst); + emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); break; case OPCODE_NOISE1: emit_noise1(c, inst); -- cgit v1.2.3 From 7059aa0eff9ff6ec361e584b413f63b25762a89c Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 11 Aug 2009 21:17:14 -0700 Subject: i965: Share the sop opcodes between brw_wm_glsl.c and brw_wm_emit.c. --- src/mesa/drivers/dri/i965/brw_wm.h | 6 +++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 19 ++++---- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 78 +++++---------------------------- 3 files changed, 29 insertions(+), 74 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index e0445aaf52..df8e467aaf 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -357,5 +357,11 @@ void emit_mad(struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1, const struct brw_reg *arg2); +void emit_sop(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLuint cond, + const struct brw_reg *arg0, + const struct brw_reg *arg1); #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 6cdc4f7483..41ebadb553 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -447,21 +447,24 @@ void emit_lrp(struct brw_compile *p, } } -static void emit_sop( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - GLuint cond, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_sop(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLuint cond, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; for (i = 0; i < 4; i++) { if (mask & (1<func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg dst, src0, src1; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1< Date: Wed, 12 Aug 2009 09:52:44 -0700 Subject: i965: Share math functions between brw_wm_glsl.c and brw_wm_emit.c. --- src/mesa/drivers/dri/i965/brw_wm.h | 16 +++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 139 +++++++++++++++---------- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 177 ++------------------------------ 3 files changed, 111 insertions(+), 221 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index df8e467aaf..7d470e8dfe 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -357,11 +357,27 @@ void emit_mad(struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1, const struct brw_reg *arg2); +void emit_math1(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); +void emit_math2(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); void emit_sop(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, GLuint cond, const struct brw_reg *arg0, const struct brw_reg *arg1); +void emit_xpd(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 41ebadb553..763fe4b4d6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -665,11 +665,11 @@ void emit_dph(struct brw_compile *p, } -static void emit_xpd( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_xpd(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; @@ -690,41 +690,68 @@ static void emit_xpd( struct brw_compile *p, } -static void emit_math1( struct brw_compile *p, - GLuint function, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +void emit_math1(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { + struct brw_compile *p = &c->func; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + GLuint saturate = ((mask & SATURATE) ? + BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE); if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ assert(is_power_of_two(mask & WRITEMASK_XYZW)); + /* If compressed, this will write message reg 2,3 from arg0.x's 16 + * channels. + */ brw_MOV(p, brw_message_reg(2), arg0[0]); /* Send two messages to perform all 16 operations: */ - brw_math_16(p, - dst[dst_chan], + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_math(p, + dst[dst_chan], + function, + saturate, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[dst_chan],1), function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, + saturate, + 3, brw_null_reg(), + BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); + } + brw_pop_insn_state(p); } -static void emit_math2( struct brw_compile *p, - GLuint function, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1) +void emit_math2(struct brw_wm_compile *c, + GLuint function, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { + struct brw_compile *p = &c->func; int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + GLuint saturate = ((mask & SATURATE) ? + BRW_MATH_SATURATE_SATURATE : + BRW_MATH_SATURATE_NONE); if (!(mask & WRITEMASK_XYZW)) return; /* Do not emit dead code */ @@ -735,37 +762,41 @@ static void emit_math2( struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(2), arg0[0]); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(4), sechalf(arg0[0])); + } brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MOV(p, brw_message_reg(3), arg1[0]); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, brw_message_reg(5), sechalf(arg1[0])); + } - - /* Send two messages to perform all 16 operations: - */ brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math(p, dst[dst_chan], function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + saturate, 2, brw_null_reg(), BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); - brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); - brw_math(p, - offset(dst[dst_chan],1), - function, - (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 4, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); - + /* Send two messages to perform all 16 operations: + */ + if (c->dispatch_width == 16) { + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_math(p, + offset(dst[dst_chan],1), + function, + saturate, + 4, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + } brw_pop_insn_state(p); } @@ -909,11 +940,13 @@ static void emit_txb( struct brw_wm_compile *c, } -static void emit_lit( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +static void emit_lit(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { + struct brw_compile *p = &c->func; + assert((mask & WRITEMASK_XW) == 0); if (mask & WRITEMASK_Y) { @@ -923,7 +956,7 @@ static void emit_lit( struct brw_compile *p, } if (mask & WRITEMASK_Z) { - emit_math2(p, BRW_MATH_FUNCTION_POW, + emit_math2(c, BRW_MATH_FUNCTION_POW, &dst[2], WRITEMASK_X | (mask & SATURATE), &arg0[1], @@ -1380,27 +1413,27 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Higher math functions: */ case OPCODE_RCP: - emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; case OPCODE_RSQ: - emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; case OPCODE_SIN: - emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; case OPCODE_COS: - emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; case OPCODE_EX2: - emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; case OPCODE_LG2: - emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; case OPCODE_SCS: @@ -1408,13 +1441,13 @@ void brw_wm_emit( struct brw_wm_compile *c ) * fixup for 16-element execution. */ if (dst_flags & WRITEMASK_X) - emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); if (dst_flags & WRITEMASK_Y) - emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); break; case OPCODE_POW: - emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); + emit_math2(c, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); break; /* Comparisons: @@ -1452,7 +1485,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_LIT: - emit_lit(p, dst, dst_flags, args[0]); + emit_lit(c, dst, dst_flags, args[0]); break; /* Texturing operations: diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 2df47344d5..02b119154f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -550,42 +550,6 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, } } - -/** - * Same as \sa get_src_reg() but if the register is a literal, emit - * a brw_reg encoding the literal. - * Note that a brw instruction only allows one src operand to be a literal. - * For instructions with more than one operand, only the second can be a - * literal. This means that we treat some literals as constants/uniforms - * (which why PROGRAM_CONSTANT is checked in fetch_constants()). - * - */ -static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, - const struct prog_instruction *inst, - GLuint srcRegIndex, GLuint channel) -{ - const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - if (src->File == PROGRAM_CONSTANT) { - /* a literal */ - const int component = GET_SWZ(src->Swizzle, channel); - const GLfloat *param = - c->fp->program.Base.Parameters->ParameterValues[src->Index]; - GLfloat value = param[component]; - if (src->Negate & (1 << channel)) - value = -value; - if (src->Abs) - value = FABSF(value); -#if 0 - printf(" form immed value %f for chan %d\n", value, channel); -#endif - return brw_imm_f(value); - } - else { - return get_src_reg(c, inst, srcRegIndex, channel); - } -} - - /** * Subroutines are minimal support for resusable instruction sequences. * They are implemented as simply as possible to minimise overhead: there @@ -1013,100 +977,6 @@ static void emit_frontfacing(struct brw_wm_compile *c, brw_set_predicate_control_flag_value(p, 0xff); } -static void emit_xpd(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - int i; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - for (i = 0; i < 4; i++) { - GLuint i2 = (i+2)%3; - GLuint i1 = (i+1)%3; - if (mask & (1<SaturateMode != SATURATE_OFF); - brw_MAC(p, dst, src0, src1); - brw_set_saturate(p, 0); - } - } - brw_set_saturate(p, 0); -} - -/** - * Emit a scalar instruction, like RCP, RSQ, LOG, EXP. - * Note that the result of the function is smeared across the dest - * register's X, Y, Z and W channels (subject to writemasking of course). - */ -static void emit_math1(struct brw_wm_compile *c, - const struct prog_instruction *inst, GLuint func) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, dst; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - /* Get first component of source register */ - dst = get_dst_reg(c, inst, dst_chan); - src0 = get_src_reg(c, inst, 0, 0); - - brw_MOV(p, brw_message_reg(2), src0); - brw_math(p, - dst, - func, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); -} - -static void emit_rcp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_INV); -} - -static void emit_rsq(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); -} - -static void emit_sin(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); -} - -static void emit_cos(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_COS); -} - -static void emit_ex2(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); -} - -static void emit_lg2(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); -} - static void emit_arl(struct brw_wm_compile *c, const struct prog_instruction *inst) { @@ -1169,36 +1039,6 @@ static void emit_min_max(struct brw_wm_compile *c, release_tmps(c, mark); } -static void emit_pow(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst, src0, src1; - GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; - - if (!(mask & WRITEMASK_XYZW)) - return; - - assert(is_power_of_two(mask & WRITEMASK_XYZW)); - - dst = get_dst_reg(c, inst, dst_chan); - src0 = get_src_reg_imm(c, inst, 0, 0); - src1 = get_src_reg_imm(c, inst, 1, 0); - - brw_MOV(p, brw_message_reg(2), src0); - brw_MOV(p, brw_message_reg(3), src1); - - brw_math(p, - dst, - BRW_MATH_FUNCTION_POW, - (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, - 2, - brw_null_reg(), - BRW_MATH_DATA_VECTOR, - BRW_MATH_PRECISION_FULL); -} - /** * For GLSL shaders, this KIL will be unconditional. * It may be contained inside an IF/ENDIF structure of course. @@ -2594,28 +2434,28 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_dp4(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_XPD: - emit_xpd(c, inst); + emit_xpd(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_DPH: emit_dph(p, dst, dst_flags, args[0], args[1]); break; case OPCODE_RCP: - emit_rcp(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; case OPCODE_RSQ: - emit_rsq(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; case OPCODE_SIN: - emit_sin(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; case OPCODE_COS: - emit_cos(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; case OPCODE_EX2: - emit_ex2(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; case OPCODE_LG2: - emit_lg2(c, inst); + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; case OPCODE_MIN: case OPCODE_MAX: @@ -2654,7 +2494,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); break; case OPCODE_POW: - emit_pow(c, inst); + emit_math2(c, BRW_MATH_FUNCTION_POW, + dst, dst_flags, args[0], args[1]); break; case OPCODE_MAD: emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); -- cgit v1.2.3 From 2b58c31257f8900067276b6d6537bb2ce54b1b10 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 12 Aug 2009 10:19:31 -0700 Subject: i965: Share most of the WM functions between brw_wm_glsl.c and brw_wm_emit.c The PINTERP code should be faster for brw_wm_glsl.c now since brw_wm_emit.c's had been improved, and pixel_w should no longer stomp on a neighbor to dst. --- src/mesa/drivers/dri/i965/brw_wm.h | 34 +++++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 115 ++++++++------ src/mesa/drivers/dri/i965/brw_wm_glsl.c | 257 +------------------------------- 3 files changed, 109 insertions(+), 297 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 7d470e8dfe..eba828f6e3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -325,11 +325,19 @@ void emit_alu2(struct brw_compile *p, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1); +void emit_cinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, GLboolean is_ddx, const struct brw_reg *arg0); +void emit_delta_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); void emit_dp3(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, @@ -345,6 +353,14 @@ void emit_dph(struct brw_compile *p, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1); +void emit_frontfacing(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask); +void emit_linterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas); void emit_lrp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, @@ -368,12 +384,30 @@ void emit_math2(struct brw_wm_compile *c, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1); +void emit_pinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas, + const struct brw_reg *w); +void emit_pixel_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask); +void emit_pixel_w(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas); void emit_sop(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, GLuint cond, const struct brw_reg *arg0, const struct brw_reg *arg1); +void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0); void emit_xpd(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 763fe4b4d6..6eaa2792be 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -44,6 +44,7 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) return reg; } + /* Payload R0: * * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, @@ -60,42 +61,50 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) * R1.8 -- ? */ - -static void emit_pixel_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask) +void emit_pixel_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask) { + struct brw_compile *p = &c->func; struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); + struct brw_reg dst0_uw, dst1_uw; + brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); + if (c->dispatch_width == 16) { + dst0_uw = vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)); + dst1_uw = vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)); + } else { + dst0_uw = vec8(retype(dst[0], BRW_REGISTER_TYPE_UW)); + dst1_uw = vec8(retype(dst[1], BRW_REGISTER_TYPE_UW)); + } + /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ if (mask & WRITEMASK_X) { brw_ADD(p, - vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)), + dst0_uw, stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } if (mask & WRITEMASK_Y) { brw_ADD(p, - vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)), + dst1_uw, stride(suboffset(r1_uw,5), 2, 4, 0), brw_imm_v(0x11001100)); } - - brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + brw_pop_insn_state(p); } - -static void emit_delta_xy(struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) +void emit_delta_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_reg r1 = brw_vec1_grf(1, 0); @@ -118,10 +127,10 @@ static void emit_delta_xy(struct brw_compile *p, } } -static void emit_wpos_xy(struct brw_wm_compile *c, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0) +void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_compile *p = &c->func; @@ -146,12 +155,14 @@ static void emit_wpos_xy(struct brw_wm_compile *c, } -static void emit_pixel_w( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas) +void emit_pixel_w(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas) { + struct brw_compile *p = &c->func; + /* Don't need this if all you are doing is interpolating color, for * instance. */ @@ -165,21 +176,29 @@ static void emit_pixel_w( struct brw_compile *p, brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); /* Calc w */ - brw_math_16( p, dst[3], - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 2, brw_null_reg(), - BRW_MATH_PRECISION_FULL); + if (c->dispatch_width == 16) { + brw_math_16(p, dst[3], + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } else { + brw_math(p, dst[3], + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); + } } } - -static void emit_linterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas ) +void emit_linterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -199,12 +218,12 @@ static void emit_linterp( struct brw_compile *p, } -static void emit_pinterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *deltas, - const struct brw_reg *w) +void emit_pinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas, + const struct brw_reg *w) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -229,10 +248,10 @@ static void emit_pinterp( struct brw_compile *p, } -static void emit_cinterp( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0 ) +void emit_cinterp(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) { struct brw_reg interp[4]; GLuint nr = arg0[0].nr; @@ -251,9 +270,9 @@ static void emit_cinterp( struct brw_compile *p, } /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ -static void emit_frontfacing( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask ) +void emit_frontfacing(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask) { struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1316,7 +1335,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Generated instructions for calculating triangle interpolants: */ case WM_PIXELXY: - emit_pixel_xy(p, dst, dst_flags); + emit_pixel_xy(c, dst, dst_flags); break; case WM_DELTAXY: @@ -1328,7 +1347,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case WM_PIXELW: - emit_pixel_w(p, dst, dst_flags, args[0], args[1]); + emit_pixel_w(c, dst, dst_flags, args[0], args[1]); break; case WM_LINTERP: diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 02b119154f..d0987362a4 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -671,67 +671,6 @@ unalias3(struct brw_wm_compile *c, release_tmps(c, mark); } -static void emit_pixel_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg r1 = brw_vec1_grf(1, 0); - struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); - - struct brw_reg dst0, dst1; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - - dst0 = get_dst_reg(c, inst, 0); - dst1 = get_dst_reg(c, inst, 1); - /* Calculate pixel centers by adding 1 or 0 to each of the - * micro-tile coordinates passed in r1. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), - stride(suboffset(r1_uw, 4), 2, 4, 0), - brw_imm_v(0x10101010)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), - stride(suboffset(r1_uw, 5), 2, 4, 0), - brw_imm_v(0x11001100)); - } -} - -static void emit_delta_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_reg r1 = brw_vec1_grf(1, 0); - struct brw_reg dst0, dst1, src0, src1; - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - - dst0 = get_dst_reg(c, inst, 0); - dst1 = get_dst_reg(c, inst, 1); - src0 = get_src_reg(c, inst, 0, 0); - src1 = get_src_reg(c, inst, 0, 1); - /* Calc delta X,Y by subtracting origin in r1 from the pixel - * centers. - */ - if (mask & WRITEMASK_X) { - brw_ADD(p, - dst0, - retype(src0, BRW_REGISTER_TYPE_UW), - negate(r1)); - } - - if (mask & WRITEMASK_Y) { - brw_ADD(p, - dst1, - retype(src1, BRW_REGISTER_TYPE_UW), - negate(suboffset(r1,1))); - - } -} - static void fire_fb_write( struct brw_wm_compile *c, GLuint base_reg, GLuint nr, @@ -829,154 +768,6 @@ static void emit_fb_write(struct brw_wm_compile *c, fire_fb_write(c, 0, nr, target, eot); } -static void emit_pixel_w( struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - if (mask & WRITEMASK_W) { - struct brw_reg dst, src0, delta0, delta1; - struct brw_reg interp3; - - dst = get_dst_reg(c, inst, 3); - src0 = get_src_reg(c, inst, 0, 0); - delta0 = get_src_reg(c, inst, 1, 0); - delta1 = get_src_reg(c, inst, 1, 1); - - interp3 = brw_vec1_grf(src0.nr+1, 4); - /* Calc 1/w - just linterp wpos[3] optimized by putting the - * result straight into a message reg. - */ - brw_LINE(p, brw_null_reg(), interp3, delta0); - brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); - - /* Calc w */ - brw_math_16( p, dst, - BRW_MATH_FUNCTION_INV, - BRW_MATH_SATURATE_NONE, - 2, brw_null_reg(), - BRW_MATH_PRECISION_FULL); - } -} - -static void emit_linterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg interp[4]; - struct brw_reg dst, delta0, delta1; - struct brw_reg src0; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - delta0 = get_src_reg(c, inst, 1, 0); - delta1 = get_src_reg(c, inst, 1, 1); - nr = src0.nr; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<func; - GLuint mask = inst->DstReg.WriteMask; - - struct brw_reg interp[4]; - struct brw_reg dst, src0; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - nr = src0.nr; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<func; - GLuint mask = inst->DstReg.WriteMask; - - struct brw_reg interp[4]; - struct brw_reg dst, delta0, delta1; - struct brw_reg src0, w; - GLuint nr, i; - - src0 = get_src_reg(c, inst, 0, 0); - delta0 = get_src_reg(c, inst, 1, 0); - delta1 = get_src_reg(c, inst, 1, 1); - w = get_src_reg(c, inst, 2, 3); - nr = src0.nr; - - interp[0] = brw_vec1_grf(nr, 0); - interp[1] = brw_vec1_grf(nr, 4); - interp[2] = brw_vec1_grf(nr+1, 0); - interp[3] = brw_vec1_grf(nr+1, 4); - - for(i = 0; i < 4; i++ ) { - if (mask & (1<func; - struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); - struct brw_reg dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - - for (i = 0; i < 4; i++) { - if (mask & (1<func; - GLuint mask = inst->DstReg.WriteMask; - struct brw_reg src0[2], dst[2]; - - dst[0] = get_dst_reg(c, inst, 0); - dst[1] = get_dst_reg(c, inst, 1); - src0[0] = get_src_reg(c, inst, 0, 0); - src0[1] = get_src_reg(c, inst, 0, 1); - - /* Calculate the pixel offset from window bottom left into destination - * X and Y channels. - */ - if (mask & WRITEMASK_X) { - /* X' = X - origin_x */ - brw_ADD(p, - dst[0], - retype(src0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(0 - c->key.origin_x)); - } - - if (mask & WRITEMASK_Y) { - /* Y' = height - (Y - origin_y) = height + origin_y - Y */ - brw_ADD(p, - dst[1], - negate(retype(src0[1], BRW_REGISTER_TYPE_W)), - brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); - } -} /* TODO BIAS on SIMD8 not working yet... @@ -2378,31 +2137,31 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) switch (inst->Opcode) { case WM_PIXELXY: - emit_pixel_xy(c, inst); + emit_pixel_xy(c, dst, dst_flags); break; case WM_DELTAXY: - emit_delta_xy(c, inst); + emit_delta_xy(p, dst, dst_flags, args[0]); break; case WM_PIXELW: - emit_pixel_w(c, inst); + emit_pixel_w(c, dst, dst_flags, args[0], args[1]); break; case WM_LINTERP: - emit_linterp(c, inst); + emit_linterp(p, dst, dst_flags, args[0], args[1]); break; case WM_PINTERP: - emit_pinterp(c, inst); + emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); break; case WM_CINTERP: - emit_cinterp(c, inst); + emit_cinterp(p, dst, dst_flags, args[0]); break; case WM_WPOSXY: - emit_wpos_xy(c, inst); + emit_wpos_xy(c, dst, dst_flags, args[0]); break; case WM_FB_WRITE: emit_fb_write(c, inst); break; case WM_FRONTFACING: - emit_frontfacing(c, inst); + emit_frontfacing(p, dst, dst_flags); break; case OPCODE_ADD: emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); -- cgit v1.2.3 From cfa927766ab610a9a76730d337d77008d876ebbd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 19 Aug 2009 11:48:09 -0700 Subject: i965: Share emit_fb_write() between brw_wm_emit.c and brw_wm_glsl.c This should fix issues with antialiased lines in GLSL. --- src/mesa/drivers/dri/i965/brw_wm.h | 6 ++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 56 +++++++++--------- src/mesa/drivers/dri/i965/brw_wm_fp.c | 4 +- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 101 +------------------------------- 4 files changed, 40 insertions(+), 127 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index eba828f6e3..bde0c366bd 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -353,6 +353,12 @@ void emit_dph(struct brw_compile *p, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1); +void emit_fb_write(struct brw_wm_compile *c, + struct brw_reg *arg0, + struct brw_reg *arg1, + struct brw_reg *arg2, + GLuint target, + GLuint eot); void emit_frontfacing(struct brw_compile *p, const struct brw_reg *dst, GLuint mask); diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 6eaa2792be..d02e3cb9ac 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -1042,7 +1042,13 @@ static void fire_fb_write( struct brw_wm_compile *c, GLuint eot ) { struct brw_compile *p = &c->func; - + struct brw_reg dst; + + if (c->dispatch_width == 16) + dst = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); + else + dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); + /* Pass through control information: */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ @@ -1059,7 +1065,7 @@ static void fire_fb_write( struct brw_wm_compile *c, /* Send framebuffer write message: */ /* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ brw_fb_WRITE(p, - retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW), + dst, base_reg, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), target, @@ -1091,12 +1097,12 @@ static void emit_aa( struct brw_wm_compile *c, * \param arg1 the pass-through depth value * \param arg2 the shader-computed depth value */ -static void emit_fb_write( struct brw_wm_compile *c, - struct brw_reg *arg0, - struct brw_reg *arg1, - struct brw_reg *arg2, - GLuint target, - GLuint eot) +void emit_fb_write(struct brw_wm_compile *c, + struct brw_reg *arg0, + struct brw_reg *arg1, + struct brw_reg *arg2, + GLuint target, + GLuint eot) { struct brw_compile *p = &c->func; GLuint nr = 2; @@ -1110,30 +1116,27 @@ static void emit_fb_write( struct brw_wm_compile *c, /* I don't really understand how this achieves the color interleave * (ie RGBARGBA) in the result: [Do the saturation here] */ - { - brw_push_insn_state(p); - - for (channel = 0; channel < 4; channel++) { - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, - brw_message_reg(nr + channel), - arg0[channel]); - + for (channel = 0; channel < 4; channel++) { + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + brw_message_reg(nr + channel), + arg0[channel]); + + if (c->dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_MOV(p, brw_message_reg(nr + channel + 4), sechalf(arg0[channel])); } - - /* skip over the regs populated above: - */ - nr += 8; - - brw_pop_insn_state(p); } + /* skip over the regs populated above: + */ + nr += 8; + brw_pop_insn_state(p); if (c->key.source_depth_to_render_target) { @@ -1183,7 +1186,7 @@ static void emit_fb_write( struct brw_wm_compile *c, get_element_ud(brw_vec8_grf(1,0), 6), brw_imm_ud(1<<26)); - jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)); { emit_aa(c, arg1, 2); fire_fb_write(c, 0, nr, target, eot); @@ -1197,7 +1200,6 @@ static void emit_fb_write( struct brw_wm_compile *c, } } - /** * Move a GPR to scratch memory. */ diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index a453250e79..549afd31de 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -956,7 +956,7 @@ static void precalc_txp( struct brw_wm_compile *c, -static void emit_fb_write( struct brw_wm_compile *c ) +static void emit_render_target_writes( struct brw_wm_compile *c ) { struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); @@ -1153,7 +1153,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) out->DstReg.WriteMask = 0; break; case OPCODE_END: - emit_fb_write(c); + emit_render_target_writes(c); break; case OPCODE_PRINT: break; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index d0987362a4..be89bcf4ba 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -671,103 +671,6 @@ unalias3(struct brw_wm_compile *c, release_tmps(c, mark); } -static void fire_fb_write( struct brw_wm_compile *c, - GLuint base_reg, - GLuint nr, - GLuint target, - GLuint eot) -{ - struct brw_compile *p = &c->func; - /* Pass through control information: - */ - /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ - { - brw_push_insn_state(p); - brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ - brw_MOV(p, - brw_message_reg(base_reg + 1), - brw_vec8_grf(1, 0)); - brw_pop_insn_state(p); - } - /* Send framebuffer write message: */ - brw_fb_WRITE(p, - retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), - base_reg, - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), - target, - nr, - 0, - eot); -} - -static void emit_fb_write(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - int nr = 2; - int channel; - GLuint target, eot; - struct brw_reg src0; - - /* Reserve a space for AA - may not be needed: - */ - if (c->key.aa_dest_stencil_reg) - nr += 1; - - brw_push_insn_state(p); - for (channel = 0; channel < 4; channel++) { - src0 = get_src_reg(c, inst, 0, channel); - /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ - /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ - brw_MOV(p, brw_message_reg(nr + channel), src0); - } - /* skip over the regs populated above: */ - nr += 8; - brw_pop_insn_state(p); - - if (c->key.source_depth_to_render_target) { - if (c->key.computes_depth) { - src0 = get_src_reg(c, inst, 2, 2); - brw_MOV(p, brw_message_reg(nr), src0); - } - else { - src0 = get_src_reg(c, inst, 1, 1); - brw_MOV(p, brw_message_reg(nr), src0); - } - - nr += 2; - } - - if (c->key.dest_depth_reg) { - const GLuint comp = c->key.dest_depth_reg / 2; - const GLuint off = c->key.dest_depth_reg % 2; - - if (off != 0) { - /* XXX this code needs review/testing */ - struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp); - struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1); - - brw_push_insn_state(p); - brw_set_compression_control(p, BRW_COMPRESSION_NONE); - - brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1)); - /* 2nd half? */ - brw_MOV(p, brw_message_reg(nr+1), arg1_1); - brw_pop_insn_state(p); - } - else - { - struct brw_reg src = get_src_reg(c, inst, 1, 1); - brw_MOV(p, brw_message_reg(nr), src); - } - nr += 2; - } - - target = INST_AUX_GET_TARGET(inst->Aux); - eot = inst->Aux & INST_AUX_EOT; - fire_fb_write(c, 0, nr, target, eot); -} - static void emit_arl(struct brw_wm_compile *c, const struct prog_instruction *inst) { @@ -2158,7 +2061,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_wpos_xy(c, dst, dst_flags, args[0]); break; case WM_FB_WRITE: - emit_fb_write(c, inst); + emit_fb_write(c, args[0], args[1], args[2], + INST_AUX_GET_TARGET(inst->Aux), + inst->Aux & INST_AUX_EOT); break; case WM_FRONTFACING: emit_frontfacing(p, dst, dst_flags); -- cgit v1.2.3 From ec66644ed0af976cacb069ca7c7f0d6731666359 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 19 Aug 2009 11:57:32 -0700 Subject: i965: Share min/max between brw_wm_emit.c and brw_wm_glsl.c --- src/mesa/drivers/dri/i965/brw_wm.h | 10 ++++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 20 +++---- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 102 ++++++++++++++++---------------- 3 files changed, 72 insertions(+), 60 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index bde0c366bd..1fa2f1c06c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -390,6 +390,16 @@ void emit_math2(struct brw_wm_compile *c, GLuint mask, const struct brw_reg *arg0, const struct brw_reg *arg1); +void emit_min(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); +void emit_max(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1); void emit_pinterp(struct brw_compile *p, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index d02e3cb9ac..9cd1fedacb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -567,11 +567,11 @@ static void emit_cmp( struct brw_compile *p, } } -static void emit_max( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_max(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; @@ -591,11 +591,11 @@ static void emit_max( struct brw_compile *p, } } -static void emit_min( struct brw_compile *p, - const struct brw_reg *dst, - GLuint mask, - const struct brw_reg *arg0, - const struct brw_reg *arg1 ) +void emit_min(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) { GLuint i; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index be89bcf4ba..9d3bc66f49 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -671,6 +671,55 @@ unalias3(struct brw_wm_compile *c, release_tmps(c, mark); } +/* Workaround for using brw_wm_emit.c's emit functions, which expect + * destination regs to be uniquely written. Moves arguments out to + * temporaries as necessary for instructions which use their destination as + * a temporary. + */ +static void +unalias2(struct brw_wm_compile *c, + void (*func)(struct brw_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1), + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *arg1) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp_arg0[4], tmp_arg1[4]; + int i, j; + int mark = mark_tmps(c); + + for (j = 0; j < 4; j++) { + tmp_arg0[j] = arg0[j]; + tmp_arg1[j] = arg1[j]; + } + + for (i = 0; i < 4; i++) { + if (mask & (1<func; - const GLuint mask = inst->DstReg.WriteMask; - const int mark = mark_tmps(c); - int i; - brw_push_insn_state(p); - for (i = 0; i < 4; i++) { - if (mask & (1<SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_MOV(p, dst, src0); - brw_set_saturate(p, 0); - - if (inst->Opcode == OPCODE_MIN) - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); - else - brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0); - - brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); - brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); - brw_MOV(p, dst, src1); - brw_set_saturate(p, 0); - brw_set_predicate_control_flag_value(p, 0xff); - if (use_temp) - brw_MOV(p, real_dst, dst); - } - } - brw_pop_insn_state(p); - release_tmps(c, mark); -} - /** * For GLSL shaders, this KIL will be unconditional. * It may be contained inside an IF/ENDIF structure of course. @@ -2122,8 +2122,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; case OPCODE_MIN: + unalias2(c, emit_min, dst, dst_flags, args[0], args[1]); + break; case OPCODE_MAX: - emit_min_max(c, inst); + unalias2(c, emit_max, dst, dst_flags, args[0], args[1]); break; case OPCODE_DDX: case OPCODE_DDY: -- cgit v1.2.3 From c5413839b3e99c7b162f1260142f3c175502b0ce Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 10 Nov 2009 15:51:29 -0800 Subject: i965: avoid memsetting all the BRW_WM_MAX_INSN arrays for every compile. For an app that's blowing out the state cache, like sauerbraten, the memset of the giant arrays ended up taking 11% of the CPU even when only a "few" of the entries got used. With this, the WM program compile drops back down to 1% of CPU time. Bug #24981 (bisected to BRW_WM_MAX_INSN increase). --- src/mesa/drivers/dri/i965/brw_wm.c | 14 ++++++++++++++ src/mesa/drivers/dri/i965/brw_wm.h | 8 ++++---- src/mesa/drivers/dri/i965/brw_wm_fp.c | 2 ++ src/mesa/drivers/dri/i965/brw_wm_pass0.c | 3 +++ 4 files changed, 23 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 964ee104c2..d8971321f3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -152,8 +152,22 @@ static void do_wm_prog( struct brw_context *brw, */ return; } + c->instruction = _mesa_calloc(BRW_WM_MAX_INSN * sizeof(*c->instruction)); + c->prog_instructions = _mesa_calloc(BRW_WM_MAX_INSN * + sizeof(*c->prog_instructions)); + c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg)); + c->refs = _mesa_calloc(BRW_WM_MAX_REF * sizeof(*c->refs)); + c->vreg = _mesa_calloc(BRW_WM_MAX_VREG * sizeof(*c->vreg)); } else { + void *instruction = c->instruction; + void *prog_instructions = c->prog_instructions; + void *vreg = c->vreg; + void *refs = c->refs; memset(c, 0, sizeof(*brw->wm.compile_data)); + c->instruction = instruction; + c->prog_instructions = prog_instructions; + c->vreg = vreg; + c->refs = refs; } memcpy(&c->key, key, sizeof(*key)); diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 1fa2f1c06c..7db212e392 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -202,7 +202,7 @@ struct brw_wm_compile { * simplifying and adding instructions for interpolation and * framebuffer writes. */ - struct prog_instruction prog_instructions[BRW_WM_MAX_INSN]; + struct prog_instruction *prog_instructions; GLuint nr_fp_insns; GLuint fp_temp; GLuint fp_interp_emitted; @@ -213,7 +213,7 @@ struct brw_wm_compile { struct prog_src_register pixel_w; - struct brw_wm_value vreg[BRW_WM_MAX_VREG]; + struct brw_wm_value *vreg; GLuint nr_vreg; struct brw_wm_value creg[BRW_WM_MAX_PARAM]; @@ -230,10 +230,10 @@ struct brw_wm_compile { struct brw_wm_ref undef_ref; struct brw_wm_value undef_value; - struct brw_wm_ref refs[BRW_WM_MAX_REF]; + struct brw_wm_ref *refs; GLuint nr_refs; - struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; + struct brw_wm_instruction *instruction; GLuint nr_insns; struct brw_wm_constref constref[BRW_WM_MAX_CONST]; diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 549afd31de..1c4f62ba48 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -182,6 +182,8 @@ static void release_temp( struct brw_wm_compile *c, struct prog_dst_register tem static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) { assert(c->nr_fp_insns < BRW_WM_MAX_INSN); + memset(&c->prog_instructions[c->nr_fp_insns], 0, + sizeof(*c->prog_instructions)); return &c->prog_instructions[c->nr_fp_insns++]; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c index 602b1351ef..ff4c082d5e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c @@ -42,12 +42,14 @@ static struct brw_wm_ref *get_ref( struct brw_wm_compile *c ) { assert(c->nr_refs < BRW_WM_MAX_REF); + memset(&c->refs[c->nr_refs], 0, sizeof(*c->refs)); return &c->refs[c->nr_refs++]; } static struct brw_wm_value *get_value( struct brw_wm_compile *c) { assert(c->nr_refs < BRW_WM_MAX_VREG); + memset(&c->vreg[c->nr_vreg], 0, sizeof(*c->vreg)); return &c->vreg[c->nr_vreg++]; } @@ -55,6 +57,7 @@ static struct brw_wm_value *get_value( struct brw_wm_compile *c) static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) { assert(c->nr_insns < BRW_WM_MAX_INSN); + memset(&c->instruction[c->nr_insns], 0, sizeof(*c->instruction)); return &c->instruction[c->nr_insns++]; } -- cgit v1.2.3 From 1be0efcbdc74f9a84136c9d1f953755c1560e52e Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 19 Aug 2009 14:48:11 -0700 Subject: i965: Share OPCODE_TEX between brw_wm_emit.c and brw_wm_glsl.c. New comments should explain some of the confusion about how this message works. --- src/mesa/drivers/dri/i965/brw_wm.h | 8 +++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 87 ++++++++++++++++++++---------- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 95 +++------------------------------ 3 files changed, 72 insertions(+), 118 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 7db212e392..c497e8a46b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -420,6 +420,14 @@ void emit_sop(struct brw_compile *p, GLuint cond, const struct brw_reg *arg0, const struct brw_reg *arg1); +void emit_tex(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler, + GLboolean shadow); void emit_wpos_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index a47bf3f667..52bb73971b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -818,24 +818,41 @@ void emit_math2(struct brw_wm_compile *c, } brw_pop_insn_state(p); } - -static void emit_tex( struct brw_wm_compile *c, - const struct brw_wm_instruction *inst, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg ) +void emit_tex(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler, + GLboolean shadow) { struct brw_compile *p = &c->func; + struct brw_reg dst_retyped; GLuint cur_mrf = 2, response_length; GLuint i, nr_texcoords; GLuint emit; GLuint msg_type; + GLuint mrf_per_channel; + GLuint simd_mode; + + if (c->dispatch_width == 16) { + mrf_per_channel = 2; + response_length = 8; + dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + } else { + mrf_per_channel = 1; + response_length = 4; + dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; + } /* How many input regs are there? */ - switch (inst->tex_idx) { + switch (tex_idx) { case TEXTURE_1D_INDEX: emit = WRITEMASK_X; nr_texcoords = 1; @@ -855,56 +872,66 @@ static void emit_tex( struct brw_wm_compile *c, abort(); } + /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */ + if (!BRW_IS_IGDNG(p->brw) && c->dispatch_width == 8) + nr_texcoords = 3; + /* For shadow comparisons, we have to supply u,v,r. */ - if (inst->tex_shadow) + if (shadow) nr_texcoords = 3; + /* Emit the texcoords. */ for (i = 0; i < nr_texcoords; i++) { if (emit & (1<tex_shadow) { + if (shadow) { if (BRW_IS_IGDNG(p->brw)) { /* Fill in the cube map array index value. */ brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); - cur_mrf += 2; + cur_mrf += mrf_per_channel; + } else if (c->dispatch_width == 8) { + /* Fill in the LOD bias value. */ + brw_MOV(p, brw_message_reg(cur_mrf), brw_imm_f(0)); + cur_mrf += mrf_per_channel; } brw_MOV(p, brw_message_reg(cur_mrf), arg[2]); - cur_mrf += 2; + cur_mrf += mrf_per_channel; } - response_length = 8; /* always */ - if (BRW_IS_IGDNG(p->brw)) { - if (inst->tex_shadow) - msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG; + if (shadow) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG; } else { - if (inst->tex_shadow) - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; + /* Note that G45 and older determines shadow compare and dispatch width + * from message length for most messages. + */ + if (c->dispatch_width == 16 && shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; } brw_SAMPLE(p, - retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + dst_retyped, 1, - retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ - inst->writemask, + retype(depth_payload, BRW_REGISTER_TYPE_UW), + SURF_INDEX_TEXTURE(sampler), + sampler, + dst_flags & WRITEMASK_XYZW, msg_type, response_length, cur_mrf - 1, 0, 1, - BRW_SAMPLER_SIMD_MODE_SIMD16); + simd_mode); } @@ -1530,7 +1557,9 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Texturing operations: */ case OPCODE_TEX: - emit_tex(c, inst, dst, dst_flags, args[0]); + emit_tex(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, + inst->tex_idx, inst->tex_unit, + inst->tex_shadow); break; case OPCODE_TXB: diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 42a13fc80f..4af01a5f2a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1871,94 +1871,6 @@ static void emit_txb(struct brw_wm_compile *c, BRW_SAMPLER_SIMD_MODE_SIMD8); } - -static void emit_tex(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst[4], src[4], payload_reg; - /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ - const GLuint unit = inst->TexSrcUnit; - GLuint msg_len; - GLuint i, nr; - GLuint emit; - GLboolean shadow = (c->key.shadowtex_mask & (1<TexSrcTarget) { - case TEXTURE_1D_INDEX: - emit = WRITEMASK_X; - nr = 1; - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - emit = WRITEMASK_XY; - nr = 2; - break; - case TEXTURE_3D_INDEX: - case TEXTURE_CUBE_INDEX: - emit = WRITEMASK_XYZ; - nr = 3; - break; - default: - /* invalid target */ - abort(); - } - msg_len = 1; - - /* move/load S, T, R coords */ - for (i = 0; i < nr; i++) { - static const GLuint swz[4] = {0,1,2,2}; - if (emit & (1<brw)) { - if (shadow) - msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_IGDNG; - } else { - /* Does it work for shadow on SIMD8 ? */ - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; - } - - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ - 1, /* msg_reg_nr */ - retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - SURF_INDEX_TEXTURE(unit), - unit, /* sampler */ - inst->DstReg.WriteMask, /* writemask */ - msg_type, /* msg_type */ - 4, /* response_length */ - shadow ? 6 : 4, /* msg_length */ - 0, /* eot */ - 1, - BRW_SAMPLER_SIMD_MODE_SIMD8); - - if (shadow) - brw_MOV(p, dst[3], brw_imm_f(1.0)); -} - - /** * Resolve subroutine calls after code emit is done. */ @@ -2179,7 +2091,12 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_noise4(c, inst); break; case OPCODE_TEX: - emit_tex(c, inst); + emit_tex(c, dst, dst_flags, args[0], + get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, + 0, 1, 0, 0), + inst->TexSrcTarget, + inst->TexSrcUnit, + (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0); break; case OPCODE_TXB: emit_txb(c, inst); -- cgit v1.2.3 From 57f40b18377f87c434f17d5670a13838d58065c9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 19 Aug 2009 13:44:13 -0700 Subject: i965: Share OPCODE_TXB between brw_wm_emit.c and brw_wm_glsl.c This should fix TXB on G45 and older in the GLSL case. --- src/mesa/drivers/dri/i965/brw_wm.h | 7 +++ src/mesa/drivers/dri/i965/brw_wm_emit.c | 81 +++++++++++++++++++++------------ src/mesa/drivers/dri/i965/brw_wm_glsl.c | 76 ++----------------------------- 3 files changed, 63 insertions(+), 101 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index c497e8a46b..b3c05eb0ad 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -428,6 +428,13 @@ void emit_tex(struct brw_wm_compile *c, GLuint tex_idx, GLuint sampler, GLboolean shadow); +void emit_txb(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler); void emit_wpos_xy(struct brw_wm_compile *c, const struct brw_reg *dst, GLuint mask, diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index 52bb73971b..5390fd2584 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -935,57 +935,77 @@ void emit_tex(struct brw_wm_compile *c, } -static void emit_txb( struct brw_wm_compile *c, - const struct brw_wm_instruction *inst, - struct brw_reg *dst, - GLuint dst_flags, - struct brw_reg *arg ) +void emit_txb(struct brw_wm_compile *c, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg, + struct brw_reg depth_payload, + GLuint tex_idx, + GLuint sampler) { struct brw_compile *p = &c->func; GLuint msgLength; GLuint msg_type; - /* Shadow ignored for txb. + GLuint mrf_per_channel; + GLuint response_length; + struct brw_reg dst_retyped; + + /* The G45 and older chipsets don't support 8-wide dispatch for LOD biased + * samples, so we'll use the 16-wide instruction, leave the second halves + * undefined, and trust the execution mask to keep the undefined pixels + * from mattering. */ - switch (inst->tex_idx) { + if (c->dispatch_width == 16 || !BRW_IS_IGDNG(p->brw)) { + if (BRW_IS_IGDNG(p->brw)) + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + mrf_per_channel = 2; + dst_retyped = retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW); + response_length = 8; + } else { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; + mrf_per_channel = 1; + dst_retyped = retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW); + response_length = 4; + } + + /* Shadow ignored for txb. */ + switch (tex_idx) { case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0)); break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); - brw_MOV(p, brw_message_reg(6), arg[2]); + brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]); + brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]); + brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), arg[2]); break; default: /* unexpected target */ abort(); } - brw_MOV(p, brw_message_reg(8), arg[3]); - msgLength = 9; - - if (BRW_IS_IGDNG(p->brw)) - msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; - else - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + brw_MOV(p, brw_message_reg(2 + 3 * mrf_per_channel), arg[3]); + msgLength = 2 + 4 * mrf_per_channel - 1; brw_SAMPLE(p, - retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + dst_retyped, 1, - retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ - inst->writemask, + retype(depth_payload, BRW_REGISTER_TYPE_UW), + SURF_INDEX_TEXTURE(sampler), + sampler, + dst_flags & WRITEMASK_XYZW, msg_type, - 8, /* responseLength */ + response_length, msgLength, 0, 1, @@ -1563,7 +1583,8 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_TXB: - emit_txb(c, inst, dst, dst_flags, args[0]); + emit_txb(c, dst, dst_flags, args[0], c->payload.depth[0].hw_reg, + inst->tex_idx, inst->tex_unit); break; case OPCODE_KIL: diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 4af01a5f2a..3ab446164c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -1801,76 +1801,6 @@ static void emit_noise4( struct brw_wm_compile *c, release_tmps( c, mark ); } - -/* TODO - BIAS on SIMD8 not working yet... - */ -static void emit_txb(struct brw_wm_compile *c, - const struct prog_instruction *inst) -{ - struct brw_compile *p = &c->func; - struct brw_reg dst[4], src[4], payload_reg; - /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ - const GLuint unit = inst->TexSrcUnit; - GLuint i; - GLuint msg_type; - - assert(unit < BRW_MAX_TEX_UNIT); - - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); - - for (i = 0; i < 4; i++) - dst[i] = get_dst_reg(c, inst, i); - for (i = 0; i < 4; i++) - src[i] = get_src_reg(c, inst, 0, i); - - switch (inst->TexSrcTarget) { - case TEXTURE_1D_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ - brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */ - break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); - break; - case TEXTURE_3D_INDEX: - case TEXTURE_CUBE_INDEX: - brw_MOV(p, brw_message_reg(2), src[0]); - brw_MOV(p, brw_message_reg(3), src[1]); - brw_MOV(p, brw_message_reg(4), src[2]); - break; - default: - /* invalid target */ - abort(); - } - brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ - brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ - - if (BRW_IS_IGDNG(p->brw)) { - msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_IGDNG; - } else { - /* Does it work well on SIMD8? */ - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; - } - - brw_SAMPLE(p, - retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ - 1, /* msg_reg_nr */ - retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ - SURF_INDEX_TEXTURE(unit), - unit, /* sampler */ - inst->DstReg.WriteMask, /* writemask */ - msg_type, /* msg_type */ - 4, /* response_length */ - 4, /* msg_length */ - 0, /* eot */ - 1, - BRW_SAMPLER_SIMD_MODE_SIMD8); -} - /** * Resolve subroutine calls after code emit is done. */ @@ -2099,7 +2029,11 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) (c->key.shadowtex_mask & (1 << inst->TexSrcUnit)) != 0); break; case OPCODE_TXB: - emit_txb(c, inst); + emit_txb(c, dst, dst_flags, args[0], + get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, + 0, 1, 0, 0), + inst->TexSrcTarget, + c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]); break; case OPCODE_KIL_NV: emit_kil(c); -- cgit v1.2.3 From 5606dfb572bf4b89b4882265924705bacc8c182b Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 17 Nov 2009 16:10:24 -0800 Subject: Merge branch 'outputswritten64' Add a GLbitfield64 type and several macros to operate on 64-bit fields. The OutputsWritten field of gl_program is changed to use that type. This results in a fair amount of fallout in drivers that use programs. No changes are strictly necessary at this point as all bits used are below the 32-bit boundary. Fairly soon several bits will be added for clip distances written by a vertex shader. This will cause several bits used for varyings to be pushed above the 32-bit boundary. This will affect any drivers that support GLSL. At this point, only the i965 driver has been modified to support this eventuality. I did this as a "squash" merge. There were several places through the outputswritten64 branch where things were broken. I foresee this causing difficulties later for bisecting. The history is still available in the branch. Conflicts: src/mesa/drivers/dri/i965/brw_wm.h --- src/mesa/drivers/dri/i965/brw_clip.c | 2 +- src/mesa/drivers/dri/i965/brw_clip.h | 2 +- src/mesa/drivers/dri/i965/brw_context.h | 6 +----- src/mesa/drivers/dri/i965/brw_gs.h | 2 +- src/mesa/drivers/dri/i965/brw_sf.c | 6 +++--- src/mesa/drivers/dri/i965/brw_sf.h | 2 +- src/mesa/drivers/dri/i965/brw_sf_emit.c | 18 +++++++++--------- src/mesa/drivers/dri/i965/brw_util.c | 2 +- src/mesa/drivers/dri/i965/brw_util.h | 2 +- src/mesa/drivers/dri/i965/brw_vs.c | 2 +- src/mesa/drivers/dri/i965/brw_vs_emit.c | 8 ++++---- src/mesa/drivers/dri/i965/brw_wm.c | 4 ++-- src/mesa/drivers/dri/i965/brw_wm.h | 2 +- src/mesa/drivers/dri/i965/brw_wm_fp.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 2 +- src/mesa/drivers/dri/i965/brw_wm_pass2.c | 4 ++-- src/mesa/drivers/dri/i965/brw_wm_state.c | 2 +- src/mesa/drivers/dri/r200/r200_tcl.c | 15 ++++++++------- src/mesa/main/config.h | 2 +- src/mesa/main/context.c | 8 +++++--- src/mesa/main/ffvertex_prog.c | 2 +- src/mesa/main/mtypes.h | 27 ++++++++++++++++++++++++++- src/mesa/main/texenvprogram.c | 2 +- src/mesa/shader/prog_print.c | 6 +++--- src/mesa/shader/program_parse.tab.c | 2 +- src/mesa/shader/program_parse.y | 2 +- src/mesa/shader/programopt.c | 8 ++++---- src/mesa/shader/slang/slang_link.c | 25 +++++++++++++++---------- src/mesa/state_tracker/st_atom_shader.c | 2 +- src/mesa/state_tracker/st_program.c | 2 +- src/mesa/swrast/s_fragprog.c | 12 ++++++------ src/mesa/tnl/t_context.c | 2 +- src/mesa/tnl/t_vb_program.c | 8 ++++---- 33 files changed, 111 insertions(+), 82 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index f45dcf8282..dbd10a5297 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -78,7 +78,7 @@ static void compile_clip_prog( struct brw_context *brw, delta = REG_SIZE; for (i = 0; i < VERT_RESULT_MAX; i++) - if (c.key.attrs & (1<= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { @@ -147,7 +147,7 @@ static void upload_sf_prog(struct brw_context *brw) * edgeflag testing here, it is already done in the clip * program. */ - if (key.attrs & (1<key.attrs & (1<key.attrs & BITFIELD64_BIT(attr)) ? 1 : 0; } /*********************************************************************** @@ -122,8 +122,8 @@ static void do_twoside_color( struct brw_sf_compile *c ) * Flat shading */ -#define VERT_RESULT_COLOR_BITS ((1<nr_setup_regs - 1); - GLuint persp_mask; - GLuint linear_mask; + GLbitfield64 persp_mask; + GLbitfield64 linear_mask; if (c->key.do_flat_shading || c->key.linear_color) persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS | @@ -331,10 +331,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, *pc_linear = 0; *pc = 0xf; - if (persp_mask & (1 << c->idx_to_attr[reg*2])) + if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2])) *pc_persp = 0xf; - if (linear_mask & (1 << c->idx_to_attr[reg*2])) + if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2])) *pc_linear = 0xf; /* Maybe only processs one attribute on the final round: @@ -342,10 +342,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, if (reg*2+1 < c->nr_setup_attrs) { *pc |= 0xf0; - if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) + if (persp_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1])) *pc_persp |= 0xf0; - if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) + if (linear_mask & BITFIELD64_BIT(c->idx_to_attr[reg*2+1])) *pc_linear |= 0xf0; } diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c index ce21aa4869..bba9249d1b 100644 --- a/src/mesa/drivers/dri/i965/brw_util.c +++ b/src/mesa/drivers/dri/i965/brw_util.c @@ -35,7 +35,7 @@ #include "brw_util.h" #include "brw_defines.h" -GLuint brw_count_bits( GLuint val ) +GLuint brw_count_bits(uint64_t val) { GLuint i; for (i = 0; val ; val >>= 1) diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h index 33e7cd87e4..04f3175d3e 100644 --- a/src/mesa/drivers/dri/i965/brw_util.h +++ b/src/mesa/drivers/dri/i965/brw_util.h @@ -35,7 +35,7 @@ #include "main/mtypes.h" -extern GLuint brw_count_bits( GLuint val ); +extern GLuint brw_count_bits(uint64_t val); extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList); extern GLuint brw_translate_blend_factor( GLenum factor ); extern GLuint brw_translate_blend_equation( GLenum mode ); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index f0c79efbd9..fd055e225e 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -56,7 +56,7 @@ static void do_vs_prog( struct brw_context *brw, c.prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { - c.prog_data.outputs_written |= 1<prog_data.outputs_written & (1 << i)) { + if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { c->nr_outputs++; assert(i < Elements(c->regs[PROGRAM_OUTPUT])); if (i == VERT_RESULT_HPOS) { @@ -1124,7 +1124,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ - if ((c->prog_data.outputs_written & (1<prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) || c->key.nr_userclip || BRW_IS_965(p->brw)) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); @@ -1134,7 +1134,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_set_access_mode(p, BRW_ALIGN_16); - if (c->prog_data.outputs_written & (1<prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); @@ -1224,7 +1224,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) */ GLuint i, mrf = 0; for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1 << i)) { + if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { /* move from GRF to MRF */ brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); mrf++; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index d8971321f3..77e3b2c32a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -231,7 +231,7 @@ static void brw_wm_populate_key( struct brw_context *brw, ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; - if (fp->program.Base.OutputsWritten & (1<program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) lookup |= IZ_PS_COMPUTES_DEPTH_BIT; /* _NEW_DEPTH */ @@ -347,7 +347,7 @@ static void brw_wm_populate_key( struct brw_context *brw, key->nr_color_regions = brw->state.nr_color_regions; /* CACHE_NEW_VS_PROG */ - key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS; + key->vp_outputs_written = brw->vs.prog_data->outputs_written; /* The unique fragment program ID */ key->program_string_id = fp->id; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index b3c05eb0ad..9dcb6e14bb 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -79,7 +79,7 @@ struct brw_wm_prog_key { GLuint program_string_id:32; GLushort origin_x, origin_y; GLushort drawable_height; - GLuint vp_outputs_written; + GLbitfield64 vp_outputs_written; }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 1c4f62ba48..7d03179588 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -986,7 +986,7 @@ static void emit_render_target_writes( struct brw_wm_compile *c ) } else { /* if gl_FragData[0] is written, use it, else use gl_FragColor */ - if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) + if (c->fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0)) outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); else outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 3ab446164c..e8c2cb66ec 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -371,7 +371,7 @@ static void prealloc_reg(struct brw_wm_compile *c) for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); } - if (c->key.vp_outputs_written & (1 << i)) { + if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) { reg_index += 2; } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c index 6faea018fb..31303febf0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c +++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c @@ -82,8 +82,8 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], i++); - for (j = 0; j < FRAG_ATTRIB_MAX; j++) { - if (c->key.vp_outputs_written & (1<key.vp_outputs_written & BITFIELD64_BIT(j)) { int fp_index; if (j >= VERT_RESULT_VAR0) diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 361f91292b..f89ed9bce7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -106,7 +106,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* as far as we can tell */ key->computes_depth = - (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; + (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0; /* BRW_NEW_DEPTH_BUFFER * Override for NULL depthbuffer case, required by the Pixel Shader Computed * Depth field. diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index c702910ef2..e7d48a7f29 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -509,25 +509,26 @@ static GLboolean r200_run_tcl_render( GLcontext *ctx, prog to a not enabled output however, so just don't mess with it. We only need to change compsel. */ GLuint out_compsel = 0; - GLuint vp_out = rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten; + const GLbitfield64 vp_out = + rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten; vimap_rev = &rmesa->curr_vp_hw->inputmap_rev[0]; - assert(vp_out & (1 << VERT_RESULT_HPOS)); + assert(vp_out & BITFIELD64_BIT(VERT_RESULT_HPOS)); out_compsel = R200_OUTPUT_XYZW; - if (vp_out & (1 << VERT_RESULT_COL0)) { + if (vp_out & BITFIELD64_BIT(VERT_RESULT_COL0)) { out_compsel |= R200_OUTPUT_COLOR_0; } - if (vp_out & (1 << VERT_RESULT_COL1)) { + if (vp_out & BITFIELD64_BIT(VERT_RESULT_COL1)) { out_compsel |= R200_OUTPUT_COLOR_1; } - if (vp_out & (1 << VERT_RESULT_FOGC)) { + if (vp_out & BITFIELD64_BIT(VERT_RESULT_FOGC)) { out_compsel |= R200_OUTPUT_DISCRETE_FOG; } - if (vp_out & (1 << VERT_RESULT_PSIZ)) { + if (vp_out & BITFIELD64_BIT(VERT_RESULT_PSIZ)) { out_compsel |= R200_OUTPUT_PT_SIZE; } for (i = VERT_RESULT_TEX0; i < VERT_RESULT_TEX6; i++) { - if (vp_out & (1 << i)) { + if (vp_out & BITFIELD64_BIT(i)) { out_compsel |= R200_OUTPUT_TEX_0 << (i - VERT_RESULT_TEX0); } } diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index 8a09efdb53..c5048970cc 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -205,7 +205,7 @@ #define MAX_VARYING 16 /**< number of float[4] vectors */ #define MAX_SAMPLERS MAX_TEXTURE_IMAGE_UNITS #define MAX_PROGRAM_INPUTS 32 -#define MAX_PROGRAM_OUTPUTS 32 +#define MAX_PROGRAM_OUTPUTS 64 /*@}*/ /** For GL_ARB_vertex_program */ diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index 101d3c6b67..b5bf46718f 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -601,9 +601,11 @@ _mesa_init_constants(GLcontext *ctx) ASSERT(MAX_NV_VERTEX_PROGRAM_INPUTS <= VERT_ATTRIB_MAX); ASSERT(MAX_NV_VERTEX_PROGRAM_OUTPUTS <= VERT_RESULT_MAX); - /* check that we don't exceed various 32-bit bitfields */ - ASSERT(VERT_RESULT_MAX <= 32); - ASSERT(FRAG_ATTRIB_MAX <= 32); + /* check that we don't exceed the size of various bitfields */ + ASSERT(VERT_RESULT_MAX <= + (8 * sizeof(ctx->VertexProgram._Current->Base.OutputsWritten))); + ASSERT(FRAG_ATTRIB_MAX <= + (8 * sizeof(ctx->FragmentProgram._Current->Base.InputsRead))); } diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index 356476e35a..fe2416d894 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -438,7 +438,7 @@ static struct ureg register_input( struct tnl_program *p, GLuint input ) */ static struct ureg register_output( struct tnl_program *p, GLuint output ) { - p->program->Base.OutputsWritten |= (1<program->Base.OutputsWritten |= BITFIELD64_BIT(output); return make_ureg(PROGRAM_OUTPUT, output); } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 34c51b5442..881d233ca3 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -78,6 +78,31 @@ #endif +/** + * \name 64-bit extension of GLbitfield. + */ +/*@{*/ +typedef GLuint64 GLbitfield64; + +#define BITFIELD64_ONE 1ULL +#define BITFIELD64_ALLONES ~0ULL + +/** Set a single bit */ +#define BITFIELD64_BIT(b) (BITFIELD64_ONE << (b)) + +/** Set a mask of the least significant \c b bits */ +#define BITFIELD64_MASK(b) (((b) >= 64) ? BITFIELD64_ALLONES : \ + (BITFIELD64_BIT(b) - 1)) + +/** + * Set all bits from l (low bit) to h (high bit), inclusive. + * + * \note \C BITFIELD_64_RANGE(0, 63) return 64 set bits. + */ +#define BITFIELD64_RANGE(l, h) (BITFIELD64_MASK((h) + 1) & ~BITFIELD64_MASK(l)) +/*@}*/ + + /** * \name Some forward type declarations */ @@ -1670,7 +1695,7 @@ struct gl_program struct prog_instruction *Instructions; GLbitfield InputsRead; /**< Bitmask of which input regs are read */ - GLbitfield OutputsWritten; /**< Bitmask of which output regs are written to */ + GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */ GLbitfield InputFlags[MAX_PROGRAM_INPUTS]; /**< PROG_PARAM_BIT_x flags */ GLbitfield OutputFlags[MAX_PROGRAM_OUTPUTS]; /**< PROG_PARAM_BIT_x flags */ GLbitfield TexturesUsed[MAX_TEXTURE_UNITS]; /**< TEXTURE_x_BIT bitmask */ diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c index d7e77e759e..f439d4addb 100644 --- a/src/mesa/main/texenvprogram.c +++ b/src/mesa/main/texenvprogram.c @@ -367,7 +367,7 @@ static GLbitfield get_fp_input_mask( GLcontext *ctx ) else { /* calculate from vp->outputs */ struct gl_vertex_program *vprog; - GLbitfield vp_outputs; + GLbitfield64 vp_outputs; /* Choose GLSL vertex shader over ARB vertex program. Need this * since vertex shader state validation comes after fragment state diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c index ba4d39452f..52c102cbaa 100644 --- a/src/mesa/shader/prog_print.c +++ b/src/mesa/shader/prog_print.c @@ -826,11 +826,11 @@ _mesa_print_program(const struct gl_program *prog) * XXX move to imports.[ch] if useful elsewhere. */ static const char * -binary(GLbitfield val) +binary(GLbitfield64 val) { - static char buf[50]; + static char buf[80]; GLint i, len = 0; - for (i = 31; i >= 0; --i) { + for (i = 63; i >= 0; --i) { if (val & (1 << i)) buf[len++] = '1'; else if (len > 0 || i == 0) diff --git a/src/mesa/shader/program_parse.tab.c b/src/mesa/shader/program_parse.tab.c index b9ef88b64b..d4f8429488 100644 --- a/src/mesa/shader/program_parse.tab.c +++ b/src/mesa/shader/program_parse.tab.c @@ -2622,7 +2622,7 @@ yyreduce: YYERROR; } - state->prog->OutputsWritten |= (1U << (yyval.dst_reg).Index); + state->prog->OutputsWritten |= BITFIELD64_BIT((yyval.dst_reg).Index); } ;} break; diff --git a/src/mesa/shader/program_parse.y b/src/mesa/shader/program_parse.y index d07bf85b36..8ca6f9805b 100644 --- a/src/mesa/shader/program_parse.y +++ b/src/mesa/shader/program_parse.y @@ -643,7 +643,7 @@ maskedDstReg: dstReg optionalMask optionalCcMask YYERROR; } - state->prog->OutputsWritten |= (1U << $$.Index); + state->prog->OutputsWritten |= BITFIELD64_BIT($$.Index); } } ; diff --git a/src/mesa/shader/programopt.c b/src/mesa/shader/programopt.c index 3b8529592d..a0daac1b80 100644 --- a/src/mesa/shader/programopt.c +++ b/src/mesa/shader/programopt.c @@ -109,7 +109,7 @@ _mesa_insert_mvp_dp4_code(GLcontext *ctx, struct gl_vertex_program *vprog) vprog->Base.Instructions = newInst; vprog->Base.NumInstructions = newLen; vprog->Base.InputsRead |= VERT_BIT_POS; - vprog->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS); + vprog->Base.OutputsWritten |= BITFIELD64_BIT(VERT_RESULT_HPOS); } @@ -211,7 +211,7 @@ _mesa_insert_mvp_mad_code(GLcontext *ctx, struct gl_vertex_program *vprog) vprog->Base.Instructions = newInst; vprog->Base.NumInstructions = newLen; vprog->Base.InputsRead |= VERT_BIT_POS; - vprog->Base.OutputsWritten |= (1 << VERT_RESULT_HPOS); + vprog->Base.OutputsWritten |= BITFIELD64_BIT(VERT_RESULT_HPOS); } @@ -613,7 +613,7 @@ _mesa_nop_fragment_program(GLcontext *ctx, struct gl_fragment_program *prog) prog->Base.Instructions = inst; prog->Base.NumInstructions = 2; prog->Base.InputsRead = 1 << inputAttr; - prog->Base.OutputsWritten = 1 << FRAG_RESULT_COLOR; + prog->Base.OutputsWritten = BITFIELD64_BIT(FRAG_RESULT_COLOR); } @@ -657,7 +657,7 @@ _mesa_nop_vertex_program(GLcontext *ctx, struct gl_vertex_program *prog) prog->Base.Instructions = inst; prog->Base.NumInstructions = 2; prog->Base.InputsRead = 1 << inputAttr; - prog->Base.OutputsWritten = 1 << VERT_RESULT_COL0; + prog->Base.OutputsWritten = BITFIELD64_BIT(VERT_RESULT_COL0); /* * Now insert code to do standard modelview/projection transformation. diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c index 144c126525..0a2bc49780 100644 --- a/src/mesa/shader/slang/slang_link.c +++ b/src/mesa/shader/slang/slang_link.c @@ -515,7 +515,7 @@ _slang_update_inputs_outputs(struct gl_program *prog) } if (inst->DstReg.File == PROGRAM_OUTPUT) { - prog->OutputsWritten |= 1 << inst->DstReg.Index; + prog->OutputsWritten |= BITFIELD64_BIT(inst->DstReg.Index); if (inst->DstReg.RelAddr) { /* If the output attribute is indexed with relative addressing * we know that it must be a varying or texcoord such as @@ -528,14 +528,17 @@ _slang_update_inputs_outputs(struct gl_program *prog) if (prog->Target == GL_VERTEX_PROGRAM_ARB) { if (inst->DstReg.Index == VERT_RESULT_TEX0) { /* mark all texcoord outputs as written */ - const GLbitfield mask = - ((1 << MAX_TEXTURE_COORD_UNITS) - 1) << VERT_RESULT_TEX0; + const GLbitfield64 mask = + BITFIELD64_RANGE(VERT_RESULT_TEX0, + (VERT_RESULT_TEX0 + + MAX_TEXTURE_COORD_UNITS - 1)); prog->OutputsWritten |= mask; } else if (inst->DstReg.Index == VERT_RESULT_VAR0) { /* mark all generic varying outputs as written */ - const GLbitfield mask = - ((1 << MAX_VARYING) - 1) << VERT_RESULT_VAR0; + const GLbitfield64 mask = + BITFIELD64_RANGE(VERT_RESULT_VAR0, + (VERT_RESULT_VAR0 + MAX_VARYING - 1)); prog->OutputsWritten |= mask; } } @@ -807,7 +810,8 @@ _slang_link(GLcontext *ctx, if (shProg->VertexProgram) { _slang_update_inputs_outputs(&shProg->VertexProgram->Base); _slang_count_temporaries(&shProg->VertexProgram->Base); - if (!(shProg->VertexProgram->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) { + if (!(shProg->VertexProgram->Base.OutputsWritten + & BITFIELD64_BIT(VERT_RESULT_HPOS))) { /* the vertex program did not compute a vertex position */ link_error(shProg, "gl_Position was not written by vertex shader\n"); @@ -825,7 +829,7 @@ _slang_link(GLcontext *ctx, if (shProg->FragmentProgram) { const GLbitfield varyingRead = shProg->FragmentProgram->Base.InputsRead >> FRAG_ATTRIB_VAR0; - const GLbitfield varyingWritten = shProg->VertexProgram ? + const GLbitfield64 varyingWritten = shProg->VertexProgram ? shProg->VertexProgram->Base.OutputsWritten >> VERT_RESULT_VAR0 : 0x0; if ((varyingRead & varyingWritten) != varyingRead) { link_error(shProg, @@ -836,9 +840,10 @@ _slang_link(GLcontext *ctx, /* check that gl_FragColor and gl_FragData are not both written to */ if (shProg->FragmentProgram) { - GLbitfield outputsWritten = shProg->FragmentProgram->Base.OutputsWritten; - if ((outputsWritten & ((1 << FRAG_RESULT_COLOR))) && - (outputsWritten >= (1 << FRAG_RESULT_DATA0))) { + const GLbitfield64 outputsWritten = + shProg->FragmentProgram->Base.OutputsWritten; + if ((outputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR)) && + (outputsWritten >= BITFIELD64_BIT(FRAG_RESULT_DATA0))) { link_error(shProg, "Fragment program cannot write both gl_FragColor" " and gl_FragData[].\n"); return; diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index ee649be885..6e311e537e 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -176,7 +176,7 @@ find_translated_vp(struct st_context *st, /* See if we need to translate vertex program to TGSI form */ if (xvp->serialNo != stvp->serialNo) { GLuint outAttr; - const GLbitfield outputsWritten = stvp->Base.Base.OutputsWritten; + const GLbitfield64 outputsWritten = stvp->Base.Base.OutputsWritten; GLuint numVpOuts = 0; GLboolean emitPntSize = GL_FALSE, emitBFC0 = GL_FALSE, emitBFC1 = GL_FALSE; GLbitfield usedGenerics = 0x0; diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 6d02722c13..190b6a5526 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -469,7 +469,7 @@ st_translate_fragment_program(struct st_context *st, */ { uint numColors = 0; - GLbitfield outputsWritten = stfp->Base.Base.OutputsWritten; + GLbitfield64 outputsWritten = stfp->Base.Base.OutputsWritten; /* if z is written, emit that first */ if (outputsWritten & (1 << FRAG_RESULT_DEPTH)) { diff --git a/src/mesa/swrast/s_fragprog.c b/src/mesa/swrast/s_fragprog.c index 77a77f0bcb..a22d34415d 100644 --- a/src/mesa/swrast/s_fragprog.c +++ b/src/mesa/swrast/s_fragprog.c @@ -190,7 +190,7 @@ run_program(GLcontext *ctx, SWspan *span, GLuint start, GLuint end) { SWcontext *swrast = SWRAST_CONTEXT(ctx); const struct gl_fragment_program *program = ctx->FragmentProgram._Current; - const GLbitfield outputsWritten = program->Base.OutputsWritten; + const GLbitfield64 outputsWritten = program->Base.OutputsWritten; struct gl_program_machine *machine = &swrast->FragProgMachine; GLuint i; @@ -201,7 +201,7 @@ run_program(GLcontext *ctx, SWspan *span, GLuint start, GLuint end) if (_mesa_execute_program(ctx, &program->Base, machine)) { /* Store result color */ - if (outputsWritten & (1 << FRAG_RESULT_COLOR)) { + if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR)) { COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], machine->Outputs[FRAG_RESULT_COLOR]); } @@ -212,7 +212,7 @@ run_program(GLcontext *ctx, SWspan *span, GLuint start, GLuint end) */ GLuint buf; for (buf = 0; buf < ctx->DrawBuffer->_NumColorDrawBuffers; buf++) { - if (outputsWritten & (1 << (FRAG_RESULT_DATA0 + buf))) { + if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0 + buf)) { COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0 + buf][i], machine->Outputs[FRAG_RESULT_DATA0 + buf]); } @@ -220,7 +220,7 @@ run_program(GLcontext *ctx, SWspan *span, GLuint start, GLuint end) } /* Store result depth/z */ - if (outputsWritten & (1 << FRAG_RESULT_DEPTH)) { + if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { const GLfloat depth = machine->Outputs[FRAG_RESULT_DEPTH][2]; if (depth <= 0.0) span->array->z[i] = 0; @@ -256,12 +256,12 @@ _swrast_exec_fragment_program( GLcontext *ctx, SWspan *span ) run_program(ctx, span, 0, span->end); - if (program->Base.OutputsWritten & (1 << FRAG_RESULT_COLOR)) { + if (program->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR)) { span->interpMask &= ~SPAN_RGBA; span->arrayMask |= SPAN_RGBA; } - if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) { + if (program->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { span->interpMask &= ~SPAN_Z; span->arrayMask |= SPAN_Z; } diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index f2771cde09..db21b4589d 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -171,7 +171,7 @@ _tnl_InvalidateState( GLcontext *ctx, GLuint new_state ) if (vp) { GLuint i; for (i = 0; i < MAX_VARYING; i++) { - if (vp->Base.OutputsWritten & (1 << (VERT_RESULT_VAR0 + i))) { + if (vp->Base.OutputsWritten & BITFIELD64_BIT(VERT_RESULT_VAR0 + i)) { RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_GENERIC(i)); } diff --git a/src/mesa/tnl/t_vb_program.c b/src/mesa/tnl/t_vb_program.c index c10a27614f..e69f7d5766 100644 --- a/src/mesa/tnl/t_vb_program.c +++ b/src/mesa/tnl/t_vb_program.c @@ -329,7 +329,7 @@ run_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage ) /* make list of outputs to save some time below */ numOutputs = 0; for (i = 0; i < VERT_RESULT_MAX; i++) { - if (program->Base.OutputsWritten & (1 << i)) { + if (program->Base.OutputsWritten & BITFIELD64_BIT(i)) { outputs[numOutputs++] = i; } } @@ -407,14 +407,14 @@ run_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage ) /* Fixup fog and point size results if needed */ if (program->IsNVProgram) { if (ctx->Fog.Enabled && - (program->Base.OutputsWritten & (1 << VERT_RESULT_FOGC)) == 0) { + (program->Base.OutputsWritten & BITFIELD64_BIT(VERT_RESULT_FOGC)) == 0) { for (i = 0; i < VB->Count; i++) { store->results[VERT_RESULT_FOGC].data[i][0] = 1.0; } } if (ctx->VertexProgram.PointSizeEnabled && - (program->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) == 0) { + (program->Base.OutputsWritten & BITFIELD64_BIT(VERT_RESULT_PSIZ)) == 0) { for (i = 0; i < VB->Count; i++) { store->results[VERT_RESULT_PSIZ].data[i][0] = ctx->Point.Size; } @@ -472,7 +472,7 @@ run_vp( GLcontext *ctx, struct tnl_pipeline_stage *stage ) } for (i = 0; i < ctx->Const.MaxVarying; i++) { - if (program->Base.OutputsWritten & (1 << (VERT_RESULT_VAR0 + i))) { + if (program->Base.OutputsWritten & BITFIELD64_BIT(VERT_RESULT_VAR0 + i)) { /* Note: varying results get put into the generic attributes */ VB->AttribPtr[VERT_ATTRIB_GENERIC0+i] = &store->results[VERT_RESULT_VAR0 + i]; -- cgit v1.2.3 From 92d35b91f132deda1fb27d2071a50e8187301fe5 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 18 Nov 2009 03:01:42 -0800 Subject: i965: Pack the brw_wm_prog_key better. --- src/mesa/drivers/dri/i965/brw_wm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers/dri/i965/brw_wm.h') diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 9dcb6e14bb..b9b987ea70 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -76,10 +76,10 @@ struct brw_wm_prog_key { GLushort tex_swizzles[BRW_MAX_TEX_UNIT]; - GLuint program_string_id:32; GLushort origin_x, origin_y; GLushort drawable_height; GLbitfield64 vp_outputs_written; + GLuint program_string_id:32; }; -- cgit v1.2.3