From 2f5f7c07732577f60666e3cee69c75c9b035c145 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 23 Oct 2009 16:55:02 +0100 Subject: i965g: re-starting from the dri driver --- src/gallium/drivers/i965/brw_wm.h | 309 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 309 insertions(+) create mode 100644 src/gallium/drivers/i965/brw_wm.h (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h new file mode 100644 index 0000000000..872b1f3ecf --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm.h @@ -0,0 +1,309 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_WM_H +#define BRW_WM_H + + +#include "shader/prog_instruction.h" +#include "brw_context.h" +#include "brw_eu.h" + +#define SATURATE (1<<5) + +/* A big lookup table is used to figure out which and how many + * additional regs will inserted before the main payload in the WM + * program execution. These mainly relate to depth and stencil + * processing and the early-depth-test optimization. + */ +#define IZ_PS_KILL_ALPHATEST_BIT 0x1 +#define IZ_PS_COMPUTES_DEPTH_BIT 0x2 +#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4 +#define IZ_DEPTH_TEST_ENABLE_BIT 0x8 +#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10 +#define IZ_STENCIL_TEST_ENABLE_BIT 0x20 +#define IZ_BIT_MAX 0x40 + +#define AA_NEVER 0 +#define AA_SOMETIMES 1 +#define AA_ALWAYS 2 + +struct brw_wm_prog_key { + GLuint source_depth_reg:3; + GLuint aa_dest_stencil_reg:3; + GLuint dest_depth_reg:3; + GLuint nr_depth_regs:3; + GLuint computes_depth:1; /* could be derived from program string */ + GLuint source_depth_to_render_target:1; + GLuint flat_shade:1; + GLuint linear_color:1; /**< linear interpolation vs perspective interp */ + GLuint runtime_check_aads_emit:1; + + GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ + GLuint shadowtex_mask:16; + GLuint yuvtex_mask:16; + GLuint yuvtex_swap_mask:16; /* UV swaped */ + + GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; + + GLuint program_string_id:32; + GLuint origin_x, origin_y; + GLuint drawable_height; + GLuint vp_outputs_written; +}; + + +/* A bit of a glossary: + * + * brw_wm_value: A computed value or program input. Values are + * constant, they are created once and are never modified. When a + * fragment program register is written or overwritten, new values are + * created fresh, preserving the rule that values are constant. + * + * brw_wm_ref: A reference to a value. Wherever a value used is by an + * instruction or as a program output, that is tracked with an + * instance of this struct. All references to a value occur after it + * is created. After the last reference, a value is dead and can be + * discarded. + * + * brw_wm_grf: Represents a physical hardware register. May be either + * empty or hold a value. Register allocation is the process of + * assigning values to grf registers. This occurs in pass2 and the + * brw_wm_grf struct is not used before that. + * + * Fragment program registers: These are time-varying constructs that + * are hard to reason about and which we translate away in pass0. A + * single fragment program register element (eg. temp[0].x) will be + * translated to one or more brw_wm_value structs, one for each time + * that temp[0].x is written to during the program. + */ + + + +/* Used in pass2 to track register allocation. + */ +struct brw_wm_grf { + struct brw_wm_value *value; + GLuint nextuse; +}; + +struct brw_wm_value { + struct brw_reg hw_reg; /* emitted to this reg, may not always be there */ + struct brw_wm_ref *lastuse; + struct brw_wm_grf *resident; + GLuint contributes_to_output:1; + GLuint spill_slot:16; /* if non-zero, spill immediately after calculation */ +}; + +struct brw_wm_ref { + struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */ + struct brw_wm_value *value; + struct brw_wm_ref *prevuse; + GLuint unspill_reg:7; /* unspill to reg */ + GLuint emitted:1; + GLuint insn:24; +}; + +struct brw_wm_constref { + const struct brw_wm_ref *ref; + GLfloat constval; +}; + + +struct brw_wm_instruction { + struct brw_wm_value *dst[4]; + struct brw_wm_ref *src[3][4]; + GLuint opcode:8; + GLuint saturate:1; + GLuint writemask:4; + GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ + GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ + GLuint tex_shadow:1; /* do shadow comparison? */ + GLuint eot:1; /* End of thread indicator for FB_WRITE*/ + GLuint target:10; /* target binding table index for FB_WRITE*/ +}; + + +#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) +#define BRW_WM_MAX_GRF 128 /* hardware limit */ +#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) +#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) +#define BRW_WM_MAX_PARAM 256 +#define BRW_WM_MAX_CONST 256 +#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS +#define BRW_WM_MAX_SUBROUTINE 16 + + + +/* New opcodes to track internal operations required for WM unit. + * These are added early so that the registers used can be tracked, + * freed and reused like those of other instructions. + */ +#define WM_PIXELXY (MAX_OPCODE) +#define WM_DELTAXY (MAX_OPCODE + 1) +#define WM_PIXELW (MAX_OPCODE + 2) +#define WM_LINTERP (MAX_OPCODE + 3) +#define WM_PINTERP (MAX_OPCODE + 4) +#define WM_CINTERP (MAX_OPCODE + 5) +#define WM_WPOSXY (MAX_OPCODE + 6) +#define WM_FB_WRITE (MAX_OPCODE + 7) +#define WM_FRONTFACING (MAX_OPCODE + 8) +#define MAX_WM_OPCODE (MAX_OPCODE + 9) + +#define PROGRAM_PAYLOAD (PROGRAM_FILE_MAX) +#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) + +struct brw_wm_compile { + struct brw_compile func; + struct brw_wm_prog_key key; + struct brw_wm_prog_data prog_data; + + struct brw_fragment_program *fp; + + GLfloat (*env_param)[4]; + + enum { + START, + PASS2_DONE + } state; + + /* Initial pass - translate fp instructions to fp instructions, + * simplifying and adding instructions for interpolation and + * framebuffer writes. + */ + struct prog_instruction prog_instructions[BRW_WM_MAX_INSN]; + GLuint nr_fp_insns; + GLuint fp_temp; + GLuint fp_interp_emitted; + GLuint fp_fragcolor_emitted; + + struct prog_src_register pixel_xy; + struct prog_src_register delta_xy; + struct prog_src_register pixel_w; + + + struct brw_wm_value vreg[BRW_WM_MAX_VREG]; + GLuint nr_vreg; + + struct brw_wm_value creg[BRW_WM_MAX_PARAM]; + GLuint nr_creg; + + struct { + struct brw_wm_value depth[4]; /* includes r0/r1 */ + struct brw_wm_value input_interp[FRAG_ATTRIB_MAX]; + } payload; + + + const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4]; + + struct brw_wm_ref undef_ref; + struct brw_wm_value undef_value; + + struct brw_wm_ref refs[BRW_WM_MAX_REF]; + GLuint nr_refs; + + struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; + GLuint nr_insns; + + struct brw_wm_constref constref[BRW_WM_MAX_CONST]; + GLuint nr_constrefs; + + struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2]; + + GLuint grf_limit; + GLuint max_wm_grf; + GLuint last_scratch; + + GLuint cur_inst; /**< index of current instruction */ + + GLboolean out_of_regs; /**< ran out of GRF registers? */ + + /** Mapping from Mesa registers to hardware registers */ + struct { + GLboolean inited; + struct brw_reg reg; + } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + + GLboolean used_grf[BRW_WM_MAX_GRF]; + GLuint first_free_grf; + struct brw_reg stack; + struct brw_reg emit_mask_reg; + GLuint tmp_regs[BRW_WM_MAX_GRF]; + GLuint tmp_index; + GLuint tmp_max; + GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; + GLuint dispatch_width; + + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; +}; + + +GLuint brw_wm_nr_args( GLuint opcode ); +GLuint brw_wm_is_scalar_result( GLuint opcode ); + +void brw_wm_pass_fp( struct brw_wm_compile *c ); +void brw_wm_pass0( struct brw_wm_compile *c ); +void brw_wm_pass1( struct brw_wm_compile *c ); +void brw_wm_pass2( struct brw_wm_compile *c ); +void brw_wm_emit( struct brw_wm_compile *c ); + +void brw_wm_print_value( struct brw_wm_compile *c, + struct brw_wm_value *value ); + +void brw_wm_print_ref( struct brw_wm_compile *c, + struct brw_wm_ref *ref ); + +void brw_wm_print_insn( struct brw_wm_compile *c, + struct brw_wm_instruction *inst ); + +void brw_wm_print_program( struct brw_wm_compile *c, + const char *stage ); + +void brw_wm_lookup_iz( GLuint line_aa, + GLuint lookup, + GLboolean ps_uses_depth, + struct brw_wm_prog_key *key ); + +GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); +void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); + +void emit_ddxy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLboolean is_ddx, + const struct brw_reg *arg0); + +#endif -- cgit v1.2.3 From 57a920cb1a0b6051068e730747b3fb475de88aca Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 23 Oct 2009 17:01:32 +0100 Subject: i965g: wip --- src/gallium/drivers/i965/brw_bo.c | 12 + src/gallium/drivers/i965/brw_cc.c | 180 +---- src/gallium/drivers/i965/brw_clip.c | 127 +-- src/gallium/drivers/i965/brw_clip.h | 5 +- src/gallium/drivers/i965/brw_clip_line.c | 7 - src/gallium/drivers/i965/brw_clip_point.c | 7 - src/gallium/drivers/i965/brw_clip_state.c | 7 +- src/gallium/drivers/i965/brw_clip_tri.c | 7 - src/gallium/drivers/i965/brw_clip_unfilled.c | 5 - src/gallium/drivers/i965/brw_clip_util.c | 7 - src/gallium/drivers/i965/brw_context.c | 135 ++-- src/gallium/drivers/i965/brw_context.h | 7 +- src/gallium/drivers/i965/brw_curbe.c | 89 +-- src/gallium/drivers/i965/brw_defines.h | 4 +- src/gallium/drivers/i965/brw_disasm.c | 2 - src/gallium/drivers/i965/brw_draw.c | 244 +----- src/gallium/drivers/i965/brw_draw_upload.c | 566 ++++--------- src/gallium/drivers/i965/brw_gs.c | 2 +- src/gallium/drivers/i965/brw_pipe_blend.c | 41 + src/gallium/drivers/i965/brw_pipe_debug.c | 2 + src/gallium/drivers/i965/brw_pipe_depth.c | 52 ++ src/gallium/drivers/i965/brw_pipe_fb.c | 25 + src/gallium/drivers/i965/brw_pipe_flush.c | 64 ++ src/gallium/drivers/i965/brw_screen_surface.c | 27 + src/gallium/drivers/i965/brw_sf.c | 4 +- src/gallium/drivers/i965/brw_sf_emit.c | 4 +- src/gallium/drivers/i965/brw_state_upload.c | 63 +- src/gallium/drivers/i965/brw_swtnl.c | 114 +++ src/gallium/drivers/i965/brw_types.h | 11 + src/gallium/drivers/i965/brw_util.c | 8 - src/gallium/drivers/i965/brw_vs.c | 12 +- src/gallium/drivers/i965/brw_vs_emit.c | 250 ++---- src/gallium/drivers/i965/brw_wm.c | 59 +- src/gallium/drivers/i965/brw_wm.h | 1 - src/gallium/drivers/i965/brw_wm_emit.c | 17 +- src/gallium/drivers/i965/brw_wm_fp.c | 193 ++--- src/gallium/drivers/i965/brw_wm_glsl.c | 1060 +------------------------ src/gallium/drivers/i965/brw_wm_pass0.c | 1 - src/gallium/drivers/i965/brw_wm_pass1.c | 81 +- src/gallium/drivers/i965/intel_chipset.h | 4 +- 40 files changed, 907 insertions(+), 2599 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_bo.c create mode 100644 src/gallium/drivers/i965/brw_pipe_blend.c create mode 100644 src/gallium/drivers/i965/brw_pipe_debug.c create mode 100644 src/gallium/drivers/i965/brw_pipe_depth.c create mode 100644 src/gallium/drivers/i965/brw_pipe_fb.c create mode 100644 src/gallium/drivers/i965/brw_pipe_flush.c create mode 100644 src/gallium/drivers/i965/brw_screen_surface.c create mode 100644 src/gallium/drivers/i965/brw_swtnl.c create mode 100644 src/gallium/drivers/i965/brw_types.h (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/drivers/i965/brw_bo.c b/src/gallium/drivers/i965/brw_bo.c new file mode 100644 index 0000000000..e7a4dac666 --- /dev/null +++ b/src/gallium/drivers/i965/brw_bo.c @@ -0,0 +1,12 @@ + + +void brw_buffer_subdata() +{ + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(bo); + memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); + drm_intel_gem_bo_unmap_gtt(bo); + } else { + dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } +} diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index 1088a7a607..9ab5638137 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -62,84 +62,21 @@ const struct brw_tracked_state brw_cc_vp = { }; struct brw_cc_unit_key { - GLboolean stencil, stencil_two_side, color_blend, alpha_enabled; - - GLenum stencil_func[2], stencil_fail_op[2]; - GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; - GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2]; - GLenum logic_op; - - GLenum blend_eq_rgb, blend_eq_a; - GLenum blend_src_rgb, blend_src_a; - GLenum blend_dst_rgb, blend_dst_a; - - GLenum alpha_func; - GLclampf alpha_ref; - - GLboolean dither; - - GLboolean depth_test, depth_write; - GLenum depth_func; + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_blend_state blend; /* no color mask */ }; static void cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) { - GLcontext *ctx = &brw->intel.ctx; - const unsigned back = ctx->Stencil._BackFace; - memset(key, 0, sizeof(*key)); + + key->dsa = brw->curr.dsa.base; + key->blend = brw->curr.blend.base; - key->stencil = ctx->Stencil._Enabled; - key->stencil_two_side = ctx->Stencil._TestTwoSide; - - if (key->stencil) { - key->stencil_func[0] = ctx->Stencil.Function[0]; - key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; - key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; - key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; - key->stencil_ref[0] = ctx->Stencil.Ref[0]; - key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; - key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; - } - if (key->stencil_two_side) { - key->stencil_func[1] = ctx->Stencil.Function[back]; - key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; - key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; - key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; - key->stencil_ref[1] = ctx->Stencil.Ref[back]; - key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; - key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; - } - - if (ctx->Color._LogicOpEnabled) - key->logic_op = ctx->Color.LogicOp; - else - key->logic_op = GL_COPY; - - key->color_blend = ctx->Color.BlendEnabled; - if (key->color_blend) { - key->blend_eq_rgb = ctx->Color.BlendEquationRGB; - key->blend_eq_a = ctx->Color.BlendEquationA; - key->blend_src_rgb = ctx->Color.BlendSrcRGB; - key->blend_dst_rgb = ctx->Color.BlendDstRGB; - key->blend_src_a = ctx->Color.BlendSrcA; - key->blend_dst_a = ctx->Color.BlendDstA; - } - - key->alpha_enabled = ctx->Color.AlphaEnabled; - if (key->alpha_enabled) { - key->alpha_func = ctx->Color.AlphaFunc; - key->alpha_ref = ctx->Color.AlphaRef; - } - - key->dither = ctx->Color.DitherFlag; - - key->depth_test = ctx->Depth.Test; - if (key->depth_test) { - key->depth_func = ctx->Depth.Func; - key->depth_write = ctx->Depth.Mask; - } + /* Clear non-respected values: + */ + key->blend.colormask = 0xf; } /** @@ -153,103 +90,16 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) memset(&cc, 0, sizeof(cc)); - /* _NEW_STENCIL */ - if (key->stencil) { - cc.cc0.stencil_enable = 1; - cc.cc0.stencil_func = - intel_translate_compare_func(key->stencil_func[0]); - cc.cc0.stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[0]); - cc.cc0.stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); - cc.cc0.stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); - cc.cc1.stencil_ref = key->stencil_ref[0]; - cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; - cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; - - if (key->stencil_two_side) { - cc.cc0.bf_stencil_enable = 1; - cc.cc0.bf_stencil_func = - intel_translate_compare_func(key->stencil_func[1]); - cc.cc0.bf_stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[1]); - cc.cc0.bf_stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); - cc.cc0.bf_stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); - cc.cc1.bf_stencil_ref = key->stencil_ref[1]; - cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; - cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; - } - - /* Not really sure about this: - */ - if (key->stencil_write_mask[0] || - (key->stencil_two_side && key->stencil_write_mask[1])) - cc.cc0.stencil_write_enable = 1; - } - - /* _NEW_COLOR */ - if (key->logic_op != GL_COPY) { - cc.cc2.logicop_enable = 1; - cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); - } else if (key->color_blend) { - GLenum eqRGB = key->blend_eq_rgb; - GLenum eqA = key->blend_eq_a; - GLenum srcRGB = key->blend_src_rgb; - GLenum dstRGB = key->blend_dst_rgb; - GLenum srcA = key->blend_src_a; - GLenum dstA = key->blend_dst_a; - - if (eqRGB == GL_MIN || eqRGB == GL_MAX) { - srcRGB = dstRGB = GL_ONE; - } - - if (eqA == GL_MIN || eqA == GL_MAX) { - srcA = dstA = GL_ONE; - } - - cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); - cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); - cc.cc6.blend_function = brw_translate_blend_equation(eqRGB); - - cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); - cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); - cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA); - - cc.cc3.blend_enable = 1; - cc.cc3.ia_blend_enable = (srcA != srcRGB || - dstA != dstRGB || - eqA != eqRGB); - } - - if (key->alpha_enabled) { - cc.cc3.alpha_test = 1; - cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); - cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; - - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); - } - - if (key->dither) { - cc.cc5.dither_enable = 1; - cc.cc6.y_dither_offset = 0; - cc.cc6.x_dither_offset = 0; - } - - /* _NEW_DEPTH */ - if (key->depth_test) { - cc.cc2.depth_test = 1; - cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); - cc.cc2.depth_write_enable = key->depth_write; - } + cc.cc0 = brw->dsa.cc0; + cc.cc1 = brw->dsa.cc1; + cc.cc2 = brw->dsa.cc2; + cc.cc3 = brw->dsa.cc3 | brw->blend.cc3; /* CACHE_NEW_CC_VP */ cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */ - if (INTEL_DEBUG & DEBUG_STATS) - cc.cc5.statistics_enable = 1; + cc.cc5 = brw->blend.cc5 | brw->debug.cc5; + bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, key, sizeof(*key), @@ -286,7 +136,7 @@ static void prepare_cc_unit( struct brw_context *brw ) const struct brw_tracked_state brw_cc_unit = { .dirty = { - .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH, + .mesa = PIPE_NEW_DEPTH_STENCIL_ALPHA | PIPE_NEW_BLEND, .brw = 0, .cache = CACHE_NEW_CC_VP }, diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 20a927cf38..df1b3718d0 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -29,9 +29,9 @@ * Keith Whitwell */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" +#include "pipe/p_state.h" + +#include "util/u_math.h" #include "intel_batchbuffer.h" @@ -83,7 +83,7 @@ static void compile_clip_prog( struct brw_context *brw, delta += ATTR_SIZE; } - c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_attrs = util_count_bits(c.key.attrs); if (BRW_IS_IGDNG(brw)) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ @@ -104,16 +104,16 @@ static void compile_clip_prog( struct brw_context *brw, * do all three: */ switch (key->primitive) { - case GL_TRIANGLES: + case PIPE_PRIM_TRIANGLES: if (key->do_unfilled) brw_emit_unfilled_clip( &c ); else brw_emit_tri_clip( &c ); break; - case GL_LINES: + case PIPE_PRIM_LINES: brw_emit_line_clip( &c ); break; - case GL_POINTS: + case PIPE_PRIM_POINTS: brw_emit_point_clip( &c ); break; default: @@ -143,7 +143,6 @@ static void compile_clip_prog( struct brw_context *brw, */ static void upload_clip_prog(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; struct brw_clip_prog_key key; memset(&key, 0, sizeof(key)); @@ -151,101 +150,51 @@ static void upload_clip_prog(struct brw_context *brw) /* Populate the key: */ /* BRW_NEW_REDUCED_PRIMITIVE */ - key.primitive = brw->intel.reduced_primitive; + key.primitive = brw->reduced_primitive; /* CACHE_NEW_VS_PROG */ key.attrs = brw->vs.prog_data->outputs_written; - /* _NEW_LIGHT */ - key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); - /* _NEW_TRANSFORM */ - key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); + /* PIPE_NEW_RAST */ + key.do_flat_shading = brw->rast.base.flatshade; + /* PIPE_NEW_UCP */ + key.nr_userclip = brw->nr_ucp; if (BRW_IS_IGDNG(brw)) key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; else key.clip_mode = BRW_CLIPMODE_NORMAL; - /* _NEW_POLYGON */ - if (key.primitive == GL_TRIANGLES) { - if (ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) + /* PIPE_NEW_RAST */ + if (key.primitive == PIPE_PRIM_TRIANGLES) { + if (brw->rast->cull_mode = PIPE_WINDING_BOTH) key.clip_mode = BRW_CLIPMODE_REJECT_ALL; else { - GLuint fill_front = CLIP_CULL; - GLuint fill_back = CLIP_CULL; - GLuint offset_front = 0; - GLuint offset_back = 0; - - if (!ctx->Polygon.CullFlag || - ctx->Polygon.CullFaceMode != GL_FRONT) { - switch (ctx->Polygon.FrontMode) { - case GL_FILL: - fill_front = CLIP_FILL; - offset_front = 0; - break; - case GL_LINE: - fill_front = CLIP_LINE; - offset_front = ctx->Polygon.OffsetLine; - break; - case GL_POINT: - fill_front = CLIP_POINT; - offset_front = ctx->Polygon.OffsetPoint; - break; - } + key.fill_ccw = CLIP_CULL; + key.fill_cw = CLIP_CULL; + + if (!(brw->rast->cull_mode & PIPE_WINDING_CCW)) { + key.fill_ccw = translate_fill(brw->rast.fill_ccw); } - if (!ctx->Polygon.CullFlag || - ctx->Polygon.CullFaceMode != GL_BACK) { - switch (ctx->Polygon.BackMode) { - case GL_FILL: - fill_back = CLIP_FILL; - offset_back = 0; - break; - case GL_LINE: - fill_back = CLIP_LINE; - offset_back = ctx->Polygon.OffsetLine; - break; - case GL_POINT: - fill_back = CLIP_POINT; - offset_back = ctx->Polygon.OffsetPoint; - break; - } + if (!(brw->rast->cull_mode & PIPE_WINDING_CW)) { + key.fill_cw = translate_fill(brw->rast.fill_cw); } - if (ctx->Polygon.BackMode != GL_FILL || - ctx->Polygon.FrontMode != GL_FILL) { + if (key.fill_cw != CLIP_FILL || + key.fill_ccw != CLIP_FILL) { key.do_unfilled = 1; - - /* Most cases the fixed function units will handle. Cases where - * one or more polygon faces are unfilled will require help: - */ key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + } + + key.offset_ccw = brw->rast.offset_ccw; + key.offset_cw = brw->rast.offset_cw; + + if (brw->rast.light_twoside && + key.fill_cw != CLIP_CULL) + key.copy_bfc_cw = 1; - if (offset_back || offset_front) { - /* _NEW_POLYGON, _NEW_BUFFERS */ - key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale; - key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD; - } - - switch (ctx->Polygon.FrontFace) { - case GL_CCW: - key.fill_ccw = fill_front; - key.fill_cw = fill_back; - key.offset_ccw = offset_front; - key.offset_cw = offset_back; - if (ctx->Light.Model.TwoSide && - key.fill_cw != CLIP_CULL) - key.copy_bfc_cw = 1; - break; - case GL_CW: - key.fill_cw = fill_front; - key.fill_ccw = fill_back; - key.offset_cw = offset_front; - key.offset_ccw = offset_back; - if (ctx->Light.Model.TwoSide && - key.fill_ccw != CLIP_CULL) - key.copy_bfc_ccw = 1; - break; - } + if (brw->rast.light_twoside && + key.fill_ccw != CLIP_CULL) + key.copy_bfc_ccw = 1; } } } @@ -262,10 +211,8 @@ static void upload_clip_prog(struct brw_context *brw) const struct brw_tracked_state brw_clip_prog = { .dirty = { - .mesa = (_NEW_LIGHT | - _NEW_TRANSFORM | - _NEW_POLYGON | - _NEW_BUFFERS), + .mesa = (PIPE_NEW_RAST | + PIPE_NEW_UCP), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index 957df441ab..d80ec819b9 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -43,6 +43,7 @@ */ struct brw_clip_prog_key { GLuint attrs:32; + GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; @@ -51,12 +52,10 @@ struct brw_clip_prog_key { GLuint fill_ccw:2; /* includes cull information */ GLuint offset_cw:1; GLuint offset_ccw:1; - GLuint pad0:17; - GLuint copy_bfc_cw:1; GLuint copy_bfc_ccw:1; GLuint clip_mode:3; - GLuint pad1:27; + GLuint pad1:12; GLfloat offset_factor; GLfloat offset_units; diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c index 048ca620fa..6b4da25644 100644 --- a/src/gallium/drivers/i965/brw_clip_line.c +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -29,13 +29,6 @@ * Keith Whitwell */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c index 8458f61c5a..b2cf7b2011 100644 --- a/src/gallium/drivers/i965/brw_clip_point.c +++ b/src/gallium/drivers/i965/brw_clip_point.c @@ -29,13 +29,6 @@ * Keith Whitwell */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 234b3744bf..72e27205e2 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -32,7 +32,6 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "main/macros.h" struct brw_clip_unit_key { unsigned int total_grf; @@ -66,8 +65,8 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) key->nr_urb_entries = brw->urb.nr_clip_entries; key->urb_size = brw->urb.vsize; - /* _NEW_TRANSOFORM */ - key->depth_clamp = ctx->Transform.DepthClamp; + /* */ + key->depth_clamp = 0; // XXX: add this to gallium: ctx->Transform.DepthClamp; } static dri_bo * @@ -175,7 +174,7 @@ static void upload_clip_unit( struct brw_context *brw ) const struct brw_tracked_state brw_clip_unit = { .dirty = { - .mesa = _NEW_TRANSFORM, + .mesa = 0, .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c index 0efd77225e..d8feca6a87 100644 --- a/src/gallium/drivers/i965/brw_clip_tri.c +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -29,13 +29,6 @@ * Keith Whitwell */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c index ad1bfa435f..4baff55806 100644 --- a/src/gallium/drivers/i965/brw_clip_unfilled.c +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -29,11 +29,6 @@ * Keith Whitwell */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - #include "intel_batchbuffer.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 5a73abdfee..7a6c46ce07 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -30,13 +30,6 @@ */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index c300c33adc..bf0ec89e13 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -52,122 +52,77 @@ #include "utils.h" -/*************************************** - * Mesa's Driver Functions - ***************************************/ - -static void brwUseProgram(GLcontext *ctx, GLuint program) -{ - _mesa_use_program(ctx, program); -} - -static void brwInitProgFuncs( struct dd_function_table *functions ) -{ - functions->UseProgram = brwUseProgram; -} -static void brwInitDriverFunctions( struct dd_function_table *functions ) -{ - intelInitDriverFunctions( functions ); - - brwInitFragProgFuncs( functions ); - brwInitProgFuncs( functions ); - brw_init_queryobj_functions(functions); - - functions->Viewport = intel_viewport; -} GLboolean brwCreateContext( const __GLcontextModes *mesaVis, __DRIcontextPrivate *driContextPriv, void *sharedContextPrivate) { - struct dd_function_table functions; struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); - struct intel_context *intel = &brw->intel; - GLcontext *ctx = &intel->ctx; if (!brw) { - _mesa_printf("%s: failed to alloc context\n", __FUNCTION__); - return GL_FALSE; - } - - brwInitVtbl( brw ); - brwInitDriverFunctions( &functions ); - - if (!intelInitContext( intel, mesaVis, driContextPriv, - sharedContextPrivate, &functions )) { - _mesa_printf("%s: failed to init intel context\n", __FUNCTION__); - FREE(brw); + debug_printf("%s: failed to alloc context\n", __FUNCTION__); return GL_FALSE; } - /* Initialize swrast, tnl driver tables: */ - intelInitSpanFuncs(ctx); - - TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; - - ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; - ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ - ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, - ctx->Const.MaxTextureImageUnits); - ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ - - /* Mesa limits textures to 4kx4k; it would be nice to fix that someday - */ - ctx->Const.MaxTextureLevels = 13; - ctx->Const.Max3DTextureLevels = 9; - ctx->Const.MaxCubeTextureLevels = 12; - ctx->Const.MaxTextureRectSize = (1<<12); - - ctx->Const.MaxTextureMaxAnisotropy = 16.0; - - /* if conformance mode is set, swrast can handle any size AA point */ - ctx->Const.MaxPointSizeAA = 255.0; - /* We want the GLSL compiler to emit code that uses condition codes */ ctx->Shader.EmitCondCodes = GL_TRUE; ctx->Shader.EmitNVTempInitialization = GL_TRUE; - ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); - ctx->Const.VertexProgram.MaxAluInstructions = 0; - ctx->Const.VertexProgram.MaxTexInstructions = 0; - ctx->Const.VertexProgram.MaxTexIndirections = 0; - ctx->Const.VertexProgram.MaxNativeAluInstructions = 0; - ctx->Const.VertexProgram.MaxNativeTexInstructions = 0; - ctx->Const.VertexProgram.MaxNativeTexIndirections = 0; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; - ctx->Const.VertexProgram.MaxNativeTemps = 256; - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; - ctx->Const.VertexProgram.MaxNativeParameters = 1024; - ctx->Const.VertexProgram.MaxEnvParams = - MIN2(ctx->Const.VertexProgram.MaxNativeParameters, - ctx->Const.VertexProgram.MaxEnvParams); - - ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024); - ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024); - ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024); - ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024); - ctx->Const.FragmentProgram.MaxNativeAttribs = 12; - ctx->Const.FragmentProgram.MaxNativeTemps = 256; - ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; - ctx->Const.FragmentProgram.MaxNativeParameters = 1024; - ctx->Const.FragmentProgram.MaxEnvParams = - MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, - ctx->Const.FragmentProgram.MaxEnvParams); + brw_init_query( brw ); brw_init_state( brw ); + brw_draw_init( brw ); brw->state.dirty.mesa = ~0; brw->state.dirty.brw = ~0; brw->emit_state_always = 0; - ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; - ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - make_empty_list(&brw->query.active_head); - brw_draw_init( brw ); return GL_TRUE; } +/** + * called from intelDestroyContext() + */ +static void brw_destroy_context( struct intel_context *intel ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + int i; + + brw_destroy_state(brw); + brw_draw_destroy( brw ); + + _mesa_free(brw->wm.compile_data); + + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); + brw->state.nr_color_regions = 0; + intel_region_release(&brw->state.depth_region); + + dri_bo_unreference(brw->curbe.curbe_bo); + dri_bo_unreference(brw->vs.prog_bo); + dri_bo_unreference(brw->vs.state_bo); + dri_bo_unreference(brw->vs.bind_bo); + dri_bo_unreference(brw->gs.prog_bo); + dri_bo_unreference(brw->gs.state_bo); + dri_bo_unreference(brw->clip.prog_bo); + dri_bo_unreference(brw->clip.state_bo); + dri_bo_unreference(brw->clip.vp_bo); + dri_bo_unreference(brw->sf.prog_bo); + dri_bo_unreference(brw->sf.state_bo); + dri_bo_unreference(brw->sf.vp_bo); + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) + dri_bo_unreference(brw->wm.sdc_bo[i]); + dri_bo_unreference(brw->wm.bind_bo); + for (i = 0; i < BRW_WM_MAX_SURF; i++) + dri_bo_unreference(brw->wm.surf_bo[i]); + dri_bo_unreference(brw->wm.sampler_bo); + dri_bo_unreference(brw->wm.prog_bo); + dri_bo_unreference(brw->wm.state_bo); + dri_bo_unreference(brw->cc.prog_bo); + dri_bo_unreference(brw->cc.state_bo); + dri_bo_unreference(brw->cc.vp_bo); +} diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index fa3e32c7ff..009e28b227 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -115,7 +115,6 @@ * Handles blending and (presumably) depth and stencil testing. */ -#define BRW_FALLBACK_TEXTURE 0x1 #define BRW_MAX_CURBE (32*16) struct brw_context; @@ -450,11 +449,9 @@ struct brw_query_object { */ struct brw_context { - struct intel_context intel; /**< base class, must be first field */ GLuint primitive; GLboolean emit_state_always; - GLboolean tmp_fallback; GLboolean no_batch_wrap; struct { @@ -692,7 +689,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /*====================================================================== * brw_queryobj.c */ -void brw_init_queryobj_functions(struct dd_function_table *functions); +void brw_init_query(struct brw_context *brw); void brw_prepare_query_begin(struct brw_context *brw); void brw_emit_query_begin(struct brw_context *brw); void brw_emit_query_end(struct brw_context *brw); @@ -730,7 +727,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst); * macros used previously: */ static INLINE struct brw_context * -brw_context( GLcontext *ctx ) +brw_context( struct pipe_context *ctx ) { return (struct brw_context *)ctx; } diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 4be6c77aa1..3e32c4983d 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -30,14 +30,6 @@ */ - -#include "main/glheader.h" -#include "main/context.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/prog_statevars.h" #include "intel_batchbuffer.h" #include "intel_regions.h" #include "brw_context.h" @@ -64,31 +56,17 @@ static void calculate_curbe_offsets( struct brw_context *brw ) GLuint nr_clip_regs = 0; GLuint total_regs; - /* _NEW_TRANSFORM */ - if (ctx->Transform.ClipPlanesEnabled) { - GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled); + /* PIPE_NEW_UCP */ + if (brw->nr_ucp) { + GLuint nr_planes = 6 + brw->nr_ucp; nr_clip_regs = (nr_planes * 4 + 15) / 16; } total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; - /* This can happen - what to do? Probably rather than falling - * back, the best thing to do is emit programs which code the - * constants as immediate values. Could do this either as a static - * cap on WM and VS, or adaptively. - * - * Unfortunately, this is currently dependent on the results of the - * program generation process (in the case of wm), so this would - * introduce the need to re-generate programs in the event of a - * curbe allocation failure. - */ - /* Max size is 32 - just large enough to - * hold the 128 parameters allowed by - * the fragment and vertex program - * api's. It's not clear what happens - * when both VP and FP want to use 128 - * parameters, though. + /* When this is > 32, want to use a true constant buffer to hold + * the extra constants. */ assert(total_regs <= 32); @@ -113,8 +91,8 @@ static void calculate_curbe_offsets( struct brw_context *brw ) brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; brw->curbe.total_size = reg; - if (0) - _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", + if (BRW_DEBUG & DEBUG_CURBE) + debug_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", brw->curbe.wm_start, brw->curbe.wm_size, brw->curbe.clip_start, @@ -129,7 +107,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) const struct brw_tracked_state brw_curbe_offsets = { .dirty = { - .mesa = _NEW_TRANSFORM, + .mesa = PIPE_NEW_UCP, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = CACHE_NEW_WM_PROG }, @@ -204,11 +182,13 @@ static void prepare_constant_buffer(struct brw_context *brw) if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; - _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + /* map fs constant buffer */ /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) buf[offset + i] = *brw->wm.prog_data->param[i]; + + /* unmap fs constant buffer */ } @@ -228,18 +208,15 @@ static void prepare_constant_buffer(struct brw_context *brw) buf[offset + i * 4 + 3] = fixed_plane[i][3]; } - /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to - * clip-space: + /* Clip planes: */ - assert(MAX_CLIP_PLANES == 6); - for (j = 0; j < MAX_CLIP_PLANES; j++) { - if (ctx->Transform.ClipPlanesEnabled & (1<Transform._ClipUserPlane[j][0]; - buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1]; - buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2]; - buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3]; - i++; - } + assert(brw->nr_ucp <= 6); + for (j = 0; j < brw->nr_ucp; j++) { + buf[offset + i * 4 + 0] = brw->ucp[j][0]; + buf[offset + i * 4 + 1] = brw->ucp[j][1]; + buf[offset + i * 4 + 2] = brw->ucp[j][2]; + buf[offset + i * 4 + 3] = brw->ucp[j][3]; + i++; } } @@ -248,13 +225,7 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - if (brw->vertex_program->IsNVProgram) - _mesa_load_tracked_matrices(ctx); - - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + /* map vs constant buffer */ /* XXX just use a memcpy here */ for (i = 0; i < nr; i++) { @@ -264,14 +235,16 @@ static void prepare_constant_buffer(struct brw_context *brw) buf[offset + i * 4 + 2] = value[2]; buf[offset + i * 4 + 3] = value[3]; } + + /* unmap vs constant buffer */ } if (0) { for (i = 0; i < sz*16; i+=4) - _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); - _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n", + debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", brw->curbe.last_buf, buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); @@ -282,12 +255,12 @@ static void prepare_constant_buffer(struct brw_context *brw) bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { /* constants have not changed */ - _mesa_free(buf); + FREE(buf); } else { /* constants have changed */ if (brw->curbe.last_buf) - _mesa_free(brw->curbe.last_buf); + FREE(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; @@ -353,15 +326,11 @@ static void emit_constant_buffer(struct brw_context *brw) ADVANCE_BATCH(); } -/* This tracked state is unique in that the state it monitors varies - * dynamically depending on the parameters tracked by the fragment and - * vertex programs. This is the template used as a starting point, - * each context will maintain a copy of this internally and update as - * required. - */ const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, + .mesa = (PIPE_NEW_FS_CONSTANTS | + PIPE_NEW_VS_CONSTANTS | + PIPE_NEW_UCP), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h index 78d457ad2b..282c5b18f4 100644 --- a/src/gallium/drivers/i965/brw_defines.h +++ b/src/gallium/drivers/i965/brw_defines.h @@ -840,8 +840,8 @@ #include "intel_chipset.h" -#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID)) -#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID)) +#define BRW_IS_G4X(brw) (IS_G4X((brw)->brw_screen->deviceID)) +#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->brw_screen->deviceID)) #define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) #define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) #define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index 9fef230507..a84c581c03 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -27,8 +27,6 @@ #include #include -#include "main/mtypes.h" - #include "brw_context.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 44bb7bd588..8cd117c24f 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -39,14 +39,13 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_state.h" -#include "brw_fallback.h" #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" #define FILE_DEBUG_FLAG DEBUG_BATCH -static GLuint prim_to_hw_prim[GL_POLYGON+1] = { +static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = { _3DPRIM_POINTLIST, _3DPRIM_LINELIST, _3DPRIM_LINELOOP, @@ -60,19 +59,6 @@ static GLuint prim_to_hw_prim[GL_POLYGON+1] = { }; -static const GLenum reduced_prim[GL_POLYGON+1] = { - GL_POINTS, - GL_LINES, - GL_LINES, - GL_LINES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES -}; - /* When the primitive changes, set a state bit and re-validate. Not * the nicest and would rather deal with this by having all the @@ -196,102 +182,6 @@ static void brw_merge_inputs( struct brw_context *brw, brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; } -/* XXX: could split the primitive list to fallback only on the - * non-conformant primitives. - */ -static GLboolean check_fallbacks( struct brw_context *brw, - const struct _mesa_prim *prim, - GLuint nr_prims ) -{ - GLcontext *ctx = &brw->intel.ctx; - GLuint i; - - /* If we don't require strict OpenGL conformance, never - * use fallbacks. If we're forcing fallbacks, always - * use fallfacks. - */ - if (brw->intel.conformance_mode == 0) - return GL_FALSE; - - if (brw->intel.conformance_mode == 2) - return GL_TRUE; - - if (ctx->Polygon.SmoothFlag) { - for (i = 0; i < nr_prims; i++) - if (reduced_prim[prim[i].mode] == GL_TRIANGLES) - return GL_TRUE; - } - - /* BRW hardware will do AA lines, but they are non-conformant it - * seems. TBD whether we keep this fallback: - */ - if (ctx->Line.SmoothFlag) { - for (i = 0; i < nr_prims; i++) - if (reduced_prim[prim[i].mode] == GL_LINES) - return GL_TRUE; - } - - /* Stipple -- these fallbacks could be resolved with a little - * bit of work? - */ - if (ctx->Line.StippleFlag) { - for (i = 0; i < nr_prims; i++) { - /* GS doesn't get enough information to know when to reset - * the stipple counter?!? - */ - if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) - return GL_TRUE; - - if (prim[i].mode == GL_POLYGON && - (ctx->Polygon.FrontMode == GL_LINE || - ctx->Polygon.BackMode == GL_LINE)) - return GL_TRUE; - } - } - - if (ctx->Point.SmoothFlag) { - for (i = 0; i < nr_prims; i++) - if (prim[i].mode == GL_POINTS) - return GL_TRUE; - } - - /* BRW hardware doesn't handle GL_CLAMP texturing correctly; - * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP - * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and - * we want strict conformance, force the fallback. - * Right now, we only do this for 2D textures. - */ - { - int u; - for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; - if (texUnit->Enabled) { - if (texUnit->Enabled & TEXTURE_1D_BIT) { - if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { - return GL_TRUE; - } - } - if (texUnit->Enabled & TEXTURE_2D_BIT) { - if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { - return GL_TRUE; - } - } - if (texUnit->Enabled & TEXTURE_3D_BIT) { - if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { - return GL_TRUE; - } - } - } - } - } - - /* Nothing stopping us from the fast path now */ - return GL_FALSE; -} - /* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ @@ -308,23 +198,12 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, GLboolean retval = GL_FALSE; GLboolean warn = GL_FALSE; GLboolean first_time = GL_TRUE; + uint32_t hw_prim; GLuint i; if (ctx->NewState) _mesa_update_state( ctx ); - /* We have to validate the textures *before* checking for fallbacks; - * otherwise, the software fallback won't be able to rely on the - * texture state, the firstLevel and lastLevel fields won't be - * set in the intel texture object (they'll both be 0), and the - * software fallback will segfault if it attempts to access any - * texture level other than level 0. - */ - brw_validate_textures( brw ); - - if (check_fallbacks(brw, prim, nr_prims)) - return GL_FALSE; - /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); @@ -336,90 +215,30 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, brw->vb.max_index = max_index; brw->state.dirty.brw |= BRW_NEW_VERTICES; - /* Have to validate state quite late. Will rebuild tnl_program, - * which depends on varying information. - * - * Note this is where brw->vs->prog_data.inputs_read is calculated, - * so can't access it earlier. - */ - - LOCK_HARDWARE(intel); - - if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) { - UNLOCK_HARDWARE(intel); - return GL_TRUE; - } - - for (i = 0; i < nr_prims; i++) { - uint32_t hw_prim; - - /* Flush the batch if it's approaching full, so that we don't wrap while - * we've got validated state that needs to be in the same batch as the - * primitives. This fraction is just a guess (minimal full state plus - * a primitive is around 512 bytes), and would be better if we had - * an upper bound of how much we might emit in a single - * brw_try_draw_prims(). - */ - intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4, - LOOP_CLIPRECTS); - - hw_prim = brw_set_prim(brw, prim[i].mode); - - if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) { - first_time = GL_FALSE; - - brw_validate_state(brw); - - /* Various fallback checks: */ - if (brw->intel.Fallback) - goto out; - - /* Check that we can fit our state in with our existing batchbuffer, or - * flush otherwise. - */ - if (dri_bufmgr_check_aperture_space(brw->state.validated_bos, - brw->state.validated_bo_count)) { - static GLboolean warned; - intel_batchbuffer_flush(intel->batch); - - /* Validate the state after we flushed the batch (which would have - * changed the set of dirty state). If we still fail to - * check_aperture, warn of what's happening, but attempt to continue - * on since it may succeed anyway, and the user would probably rather - * see a failure and a warning than a fallback. - */ - brw_validate_state(brw); - if (!warned && - dri_bufmgr_check_aperture_space(brw->state.validated_bos, - brw->state.validated_bo_count)) { - warn = GL_TRUE; - warned = GL_TRUE; - } - } - - brw_upload_state(brw); - } + hw_prim = brw_set_prim(brw, prim[i].mode); - brw_emit_prim(brw, &prim[i], hw_prim); + brw_validate_state(brw); - retval = GL_TRUE; - } + /* Check that we can fit our state in with our existing batchbuffer, or + * flush otherwise. + */ + ret = dri_bufmgr_check_aperture_space(brw->state.validated_bos, + brw->state.validated_bo_count); + if (ret) + return ret; + + ret = brw_upload_state(brw); + if (ret) + return ret; + + ret = brw_emit_prim(brw, &prim[i], hw_prim); + if (ret) + return ret; if (intel->always_flush_batch) intel_batchbuffer_flush(intel->batch); - out: - UNLOCK_HARDWARE(intel); - - brw_state_cache_check_size(brw); - - if (warn) - fprintf(stderr, "i965: Single primitive emit potentially exceeded " - "available aperture space\n"); - if (!retval) - DBG("%s failed\n", __FUNCTION__); - - return retval; + return 0; } void brw_draw_prims( GLcontext *ctx, @@ -431,37 +250,26 @@ void brw_draw_prims( GLcontext *ctx, GLuint min_index, GLuint max_index ) { - GLboolean retval; + enum pipe_error ret; if (!vbo_all_varyings_in_vbos(arrays)) { if (!index_bounds_valid) vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); - - /* Decide if we want to rebase. If so we end up recursing once - * only into this function. - */ - if (min_index != 0) { - vbo_rebase_prims(ctx, arrays, - prim, nr_prims, - ib, min_index, max_index, - brw_draw_prims ); - return; - } } /* Make a first attempt at drawing: */ - retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); /* Otherwise, we really are out of memory. Pass the drawing * command to the software tnl module and which will in turn call * swrast to do the drawing. */ - if (!retval) { - _swsetup_Wakeup(ctx); - _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + if (ret != 0) { + intel_batchbuffer_flush(intel->batch); + ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + assert(ret == 0); } - } void brw_draw_init( struct brw_context *brw ) diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index a3ff6c58d8..ad3ef6b7dd 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -25,13 +25,9 @@ * **************************************************************************/ +#include "pipe/p_context.h" -#include "main/glheader.h" -#include "main/bufferobj.h" -#include "main/context.h" -#include "main/state.h" -#include "main/api_validate.h" -#include "main/enums.h" +#include "util/u_upload_mgr.h" #include "brw_draw.h" #include "brw_defines.h" @@ -43,303 +39,157 @@ #include "intel_buffer_objects.h" #include "intel_tex.h" -static GLuint double_types[5] = { - 0, - BRW_SURFACEFORMAT_R64_FLOAT, - BRW_SURFACEFORMAT_R64G64_FLOAT, - BRW_SURFACEFORMAT_R64G64B64_FLOAT, - BRW_SURFACEFORMAT_R64G64B64A64_FLOAT -}; - -static GLuint float_types[5] = { - 0, - BRW_SURFACEFORMAT_R32_FLOAT, - BRW_SURFACEFORMAT_R32G32_FLOAT, - BRW_SURFACEFORMAT_R32G32B32_FLOAT, - BRW_SURFACEFORMAT_R32G32B32A32_FLOAT -}; - -static GLuint uint_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R32_UNORM, - BRW_SURFACEFORMAT_R32G32_UNORM, - BRW_SURFACEFORMAT_R32G32B32_UNORM, - BRW_SURFACEFORMAT_R32G32B32A32_UNORM -}; - -static GLuint uint_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R32_USCALED, - BRW_SURFACEFORMAT_R32G32_USCALED, - BRW_SURFACEFORMAT_R32G32B32_USCALED, - BRW_SURFACEFORMAT_R32G32B32A32_USCALED -}; - -static GLuint int_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R32_SNORM, - BRW_SURFACEFORMAT_R32G32_SNORM, - BRW_SURFACEFORMAT_R32G32B32_SNORM, - BRW_SURFACEFORMAT_R32G32B32A32_SNORM -}; - -static GLuint int_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R32_SSCALED, - BRW_SURFACEFORMAT_R32G32_SSCALED, - BRW_SURFACEFORMAT_R32G32B32_SSCALED, - BRW_SURFACEFORMAT_R32G32B32A32_SSCALED -}; - -static GLuint ushort_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R16_UNORM, - BRW_SURFACEFORMAT_R16G16_UNORM, - BRW_SURFACEFORMAT_R16G16B16_UNORM, - BRW_SURFACEFORMAT_R16G16B16A16_UNORM -}; - -static GLuint ushort_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R16_USCALED, - BRW_SURFACEFORMAT_R16G16_USCALED, - BRW_SURFACEFORMAT_R16G16B16_USCALED, - BRW_SURFACEFORMAT_R16G16B16A16_USCALED -}; - -static GLuint short_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R16_SNORM, - BRW_SURFACEFORMAT_R16G16_SNORM, - BRW_SURFACEFORMAT_R16G16B16_SNORM, - BRW_SURFACEFORMAT_R16G16B16A16_SNORM -}; - -static GLuint short_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R16_SSCALED, - BRW_SURFACEFORMAT_R16G16_SSCALED, - BRW_SURFACEFORMAT_R16G16B16_SSCALED, - BRW_SURFACEFORMAT_R16G16B16A16_SSCALED -}; -static GLuint ubyte_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R8_UNORM, - BRW_SURFACEFORMAT_R8G8_UNORM, - BRW_SURFACEFORMAT_R8G8B8_UNORM, - BRW_SURFACEFORMAT_R8G8B8A8_UNORM -}; -static GLuint ubyte_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R8_USCALED, - BRW_SURFACEFORMAT_R8G8_USCALED, - BRW_SURFACEFORMAT_R8G8B8_USCALED, - BRW_SURFACEFORMAT_R8G8B8A8_USCALED -}; - -static GLuint byte_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R8_SNORM, - BRW_SURFACEFORMAT_R8G8_SNORM, - BRW_SURFACEFORMAT_R8G8B8_SNORM, - BRW_SURFACEFORMAT_R8G8B8A8_SNORM -}; -static GLuint byte_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R8_SSCALED, - BRW_SURFACEFORMAT_R8G8_SSCALED, - BRW_SURFACEFORMAT_R8G8B8_SSCALED, - BRW_SURFACEFORMAT_R8G8B8A8_SSCALED -}; - - -/** - * Given vertex array type/size/format/normalized info, return - * the appopriate hardware surface type. - * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. - */ -static GLuint get_surface_type( GLenum type, GLuint size, - GLenum format, GLboolean normalized ) +unsigned brw_translate_surface_format( unsigned id ) { - if (INTEL_DEBUG & DEBUG_VERTS) - _mesa_printf("type %s size %d normalized %d\n", - _mesa_lookup_enum_by_nr(type), size, normalized); - - if (normalized) { - switch (type) { - case GL_DOUBLE: return double_types[size]; - case GL_FLOAT: return float_types[size]; - case GL_INT: return int_types_norm[size]; - case GL_SHORT: return short_types_norm[size]; - case GL_BYTE: return byte_types_norm[size]; - case GL_UNSIGNED_INT: return uint_types_norm[size]; - case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; - case GL_UNSIGNED_BYTE: - if (format == GL_BGRA) { - /* See GL_EXT_vertex_array_bgra */ - assert(size == 4); - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - } - else { - return ubyte_types_norm[size]; - } - default: assert(0); return 0; - } - } - else { - assert(format == GL_RGBA); /* sanity check */ - switch (type) { - case GL_DOUBLE: return double_types[size]; - case GL_FLOAT: return float_types[size]; - case GL_INT: return int_types_scale[size]; - case GL_SHORT: return short_types_scale[size]; - case GL_BYTE: return byte_types_scale[size]; - case GL_UNSIGNED_INT: return uint_types_scale[size]; - case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; - case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; - default: assert(0); return 0; - } + switch (id) { + case PIPE_FORMAT_R64_FLOAT: + return BRW_SURFACEFORMAT_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return BRW_SURFACEFORMAT_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return BRW_SURFACEFORMAT_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return BRW_SURFACEFORMAT_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return BRW_SURFACEFORMAT_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return BRW_SURFACEFORMAT_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return BRW_SURFACEFORMAT_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return BRW_SURFACEFORMAT_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return BRW_SURFACEFORMAT_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return BRW_SURFACEFORMAT_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return BRW_SURFACEFORMAT_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return BRW_SURFACEFORMAT_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return BRW_SURFACEFORMAT_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return BRW_SURFACEFORMAT_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return BRW_SURFACEFORMAT_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return BRW_SURFACEFORMAT_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return BRW_SURFACEFORMAT_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return BRW_SURFACEFORMAT_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return BRW_SURFACEFORMAT_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return BRW_SURFACEFORMAT_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return BRW_SURFACEFORMAT_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return BRW_SURFACEFORMAT_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return BRW_SURFACEFORMAT_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return BRW_SURFACEFORMAT_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return BRW_SURFACEFORMAT_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return BRW_SURFACEFORMAT_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return BRW_SURFACEFORMAT_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return BRW_SURFACEFORMAT_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; + + default: + assert(0); + return 0; } } - -static GLuint get_size( GLenum type ) -{ - switch (type) { - case GL_DOUBLE: return sizeof(GLdouble); - case GL_FLOAT: return sizeof(GLfloat); - case GL_INT: return sizeof(GLint); - case GL_SHORT: return sizeof(GLshort); - case GL_BYTE: return sizeof(GLbyte); - case GL_UNSIGNED_INT: return sizeof(GLuint); - case GL_UNSIGNED_SHORT: return sizeof(GLushort); - case GL_UNSIGNED_BYTE: return sizeof(GLubyte); - default: return 0; - } -} - -static GLuint get_index_type(GLenum type) +static unsigned get_index_type(int type) { switch (type) { - case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE; - case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD; - case GL_UNSIGNED_INT: return BRW_INDEX_DWORD; + case 1: return BRW_INDEX_BYTE; + case 2: return BRW_INDEX_WORD; + case 4: return BRW_INDEX_DWORD; default: assert(0); return 0; } } -static void wrap_buffers( struct brw_context *brw, - GLuint size ) -{ - if (size < BRW_UPLOAD_INIT_SIZE) - size = BRW_UPLOAD_INIT_SIZE; - - brw->vb.upload.offset = 0; - - if (brw->vb.upload.bo != NULL) - dri_bo_unreference(brw->vb.upload.bo); - brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", - size, 1); - - /* Set the internal VBO\ to no-backing-store. We only use them as a - * temporary within a brw_try_draw_prims while the lock is held. - */ - /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH - FAKE TO PUSH THIS STUFF */ -// if (!brw->intel.ttm) -// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL); -} - -static void get_space( struct brw_context *brw, - GLuint size, - dri_bo **bo_return, - GLuint *offset_return ) -{ - size = ALIGN(size, 64); - - if (brw->vb.upload.bo == NULL || - brw->vb.upload.offset + size > brw->vb.upload.bo->size) { - wrap_buffers(brw, size); - } - - assert(*bo_return == NULL); - dri_bo_reference(brw->vb.upload.bo); - *bo_return = brw->vb.upload.bo; - *offset_return = brw->vb.upload.offset; - brw->vb.upload.offset += size; -} - -static void -copy_array_to_vbo_array( struct brw_context *brw, - struct brw_vertex_element *element, - GLuint dst_stride) -{ - struct intel_context *intel = &brw->intel; - GLuint size = element->count * dst_stride; - - get_space(brw, size, &element->bo, &element->offset); - if (element->glarray->StrideB == 0) { - assert(element->count == 1); - element->stride = 0; - } else { - element->stride = dst_stride; - } - - if (dst_stride == element->glarray->StrideB) { - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - memcpy((char *)element->bo->virtual + element->offset, - element->glarray->Ptr, size); - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - dri_bo_subdata(element->bo, - element->offset, - size, - element->glarray->Ptr); - } - } else { - char *dest; - const unsigned char *src = element->glarray->Ptr; - int i; - - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - dest = element->bo->virtual; - dest += element->offset; - - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } - - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - void *data; - - data = _mesa_malloc(dst_stride * element->count); - dest = data; - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } - - dri_bo_subdata(element->bo, - element->offset, - size, - data); - - _mesa_free(data); - } - } -} -static void brw_prepare_vertices(struct brw_context *brw) +static boolean brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); @@ -358,123 +208,38 @@ static void brw_prepare_vertices(struct brw_context *brw) if (0) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); - /* Accumulate the list of enabled arrays. */ - brw->vb.nr_enabled = 0; - while (vs_inputs) { - GLuint i = _mesa_ffsll(vs_inputs) - 1; - struct brw_vertex_element *input = &brw->vb.inputs[i]; - vs_inputs &= ~(1 << i); - brw->vb.enabled[brw->vb.nr_enabled++] = input; - } - - /* XXX: In the rare cases where this happens we fallback all - * the way to software rasterization, although a tnl fallback - * would be sufficient. I don't know of *any* real world - * cases with > 17 vertex attributes enabled, so it probably - * isn't an issue at this point. - */ - if (brw->vb.nr_enabled >= BRW_VEP_MAX) { - intel->Fallback = 1; - return; - } for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; - if (_mesa_is_bufferobj(input->glarray->BufferObj)) { - struct intel_buffer_object *intel_buffer = - intel_buffer_object(input->glarray->BufferObj); - - /* Named buffer object: Just reference its contents directly. */ - dri_bo_unreference(input->bo); - input->bo = intel_bufferobj_buffer(intel, intel_buffer, - INTEL_READ); - dri_bo_reference(input->bo); - input->offset = (unsigned long)input->glarray->Ptr; - input->stride = input->glarray->StrideB; - input->count = input->glarray->_MaxElement; - - /* This is a common place to reach if the user mistakenly supplies - * a pointer in place of a VBO offset. If we just let it go through, - * we may end up dereferencing a pointer beyond the bounds of the - * GTT. We would hope that the VBO's max_index would save us, but - * Mesa appears to hand us min/max values not clipped to the - * array object's _MaxElement, and _MaxElement frequently appears - * to be wrong anyway. - * - * The VBO spec allows application termination in this case, and it's - * probably a service to the poor programmer to do so rather than - * trying to just not render. - */ - assert(input->offset < input->bo->size); - } else { - input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; - if (input->bo != NULL) { - /* Already-uploaded vertex data is present from a previous - * prepare_vertices, but we had to re-validate state due to - * check_aperture failing and a new batch being produced. - */ - continue; - } - - /* Queue the buffer object up to be uploaded in the next pass, - * when we've decided if we're doing interleaved or not. - */ - if (input->attrib == VERT_ATTRIB_POS) { - /* Position array not properly enabled: - */ - if (input->glarray->StrideB == 0) { - intel->Fallback = 1; - return; - } - - interleave = input->glarray->StrideB; - ptr = input->glarray->Ptr; - } - else if (interleave != input->glarray->StrideB || - (const unsigned char *)input->glarray->Ptr - ptr < 0 || - (const unsigned char *)input->glarray->Ptr - ptr > interleave) - { - interleave = 0; - } - - upload[nr_uploads++] = input; - - /* We rebase drawing to start at element zero only when - * varyings are not in vbos, which means we can end up - * uploading non-varying arrays (stride != 0) when min_index - * is zero. This doesn't matter as the amount to upload is - * the same for these arrays whether the draw call is rebased - * or not - we just have to upload the one element. - */ - assert(min_index == 0 || input->glarray->StrideB == 0); - } - } - - /* Handle any arrays to be uploaded. */ - if (nr_uploads > 1 && interleave && interleave <= 256) { - /* All uploads are interleaved, so upload the arrays together as - * interleaved. First, upload the contents and set up upload[0]. - */ - copy_array_to_vbo_array(brw, upload[0], interleave); - - for (i = 1; i < nr_uploads; i++) { - /* Then, just point upload[i] at upload[0]'s buffer. */ - upload[i]->stride = interleave; - upload[i]->offset = upload[0]->offset + - ((const unsigned char *)upload[i]->glarray->Ptr - ptr); - upload[i]->bo = upload[0]->bo; - dri_bo_reference(upload[i]->bo); + if (brw_is_user_buffer(vb)) { + u_upload_buffer( brw->upload, + min_index * vb->stride, + (max_index + 1 - min_index) * vb->stride, + &offset, + &buffer ); } - } - else { - /* Upload non-interleaved arrays */ - for (i = 0; i < nr_uploads; i++) { - copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); + else + { + offset = 0; + buffer = vb->buffer; + count = stride == 0 ? 1 : max_index + 1 - min_index; } + + /* Named buffer object: Just reference its contents directly. */ + dri_bo_unreference(input->bo); + input->bo = intel_bufferobj_buffer(intel, intel_buffer, + INTEL_READ); + dri_bo_reference(input->bo); + + input->offset = (unsigned long)offset; + input->stride = vb->stride; + input->count = count; + + assert(input->offset < input->bo->size); } brw_prepare_query_begin(brw); @@ -632,13 +397,8 @@ static void brw_prepare_indices(struct brw_context *brw) /* Straight upload */ - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(bo); - memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); - drm_intel_gem_bo_unmap_gtt(bo); - } else { - dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); - } + brw_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } else { offset = (GLuint) (unsigned long) index_buffer->ptr; brw->ib.start_vertex_offset = 0; diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 48c2b9a41c..5ec0c585fe 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -58,7 +58,7 @@ static void compile_gs_prog( struct brw_context *brw, /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ - c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_attrs = util_count_bits(c.key.attrs); if (BRW_IS_IGDNG(brw)) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c new file mode 100644 index 0000000000..b351794dce --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -0,0 +1,41 @@ + + /* _NEW_COLOR */ + if (key->logic_op != GL_COPY) { + cc.cc2.logicop_enable = 1; + cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; + + if (eqRGB == GL_MIN || eqRGB == GL_MAX) { + srcRGB = dstRGB = GL_ONE; + } + + if (eqA == GL_MIN || eqA == GL_MAX) { + srcA = dstA = GL_ONE; + } + + cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); + cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); + cc.cc6.blend_function = brw_translate_blend_equation(eqRGB); + + cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); + cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA); + + cc.cc3.blend_enable = 1; + cc.cc3.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); + } + + if (key->dither) { + cc.cc5.dither_enable = 1; + cc.cc6.y_dither_offset = 0; + cc.cc6.x_dither_offset = 0; + } + diff --git a/src/gallium/drivers/i965/brw_pipe_debug.c b/src/gallium/drivers/i965/brw_pipe_debug.c new file mode 100644 index 0000000000..34d6d4028a --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_debug.c @@ -0,0 +1,2 @@ + if (INTEL_DEBUG & DEBUG_STATS) + cc.cc5.statistics_enable = 1; diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c new file mode 100644 index 0000000000..da29bc8bcb --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_depth.c @@ -0,0 +1,52 @@ + /* _NEW_STENCIL */ + if (key->dsa.stencil[0].enable) { + cc.cc0.stencil_enable = 1; + cc.cc0.stencil_func = + intel_translate_compare_func(key->stencil_func[0]); + cc.cc0.stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[0]); + cc.cc0.stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); + cc.cc0.stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); + cc.cc1.stencil_ref = key->stencil_ref[0]; + cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; + cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; + + if (key->stencil_two_side) { + cc.cc0.bf_stencil_enable = 1; + cc.cc0.bf_stencil_func = + intel_translate_compare_func(key->stencil_func[1]); + cc.cc0.bf_stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[1]); + cc.cc0.bf_stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); + cc.cc0.bf_stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); + cc.cc1.bf_stencil_ref = key->stencil_ref[1]; + cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; + cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; + } + + /* Not really sure about this: + */ + if (key->stencil_write_mask[0] || + (key->stencil_two_side && key->stencil_write_mask[1])) + cc.cc0.stencil_write_enable = 1; + } + + + if (key->alpha_enabled) { + cc.cc3.alpha_test = 1; + cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); + cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); + } + + /* _NEW_DEPTH */ + if (key->depth_test) { + cc.cc2.depth_test = 1; + cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); + cc.cc2.depth_write_enable = key->depth_write; + } diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c new file mode 100644 index 0000000000..d4ae332f46 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -0,0 +1,25 @@ + +/** + * called from intelDrawBuffer() + */ +static void brw_set_draw_region( struct intel_context *intel, + struct intel_region *color_regions[], + struct intel_region *depth_region, + GLuint num_color_regions) +{ + struct brw_context *brw = brw_context(&intel->ctx); + GLuint i; + + /* release old color/depth regions */ + if (brw->state.depth_region != depth_region) + brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); + intel_region_release(&brw->state.depth_region); + + /* reference new color/depth regions */ + for (i = 0; i < num_color_regions; i++) + intel_region_reference(&brw->state.color_regions[i], color_regions[i]); + intel_region_reference(&brw->state.depth_region, depth_region); + brw->state.nr_color_regions = num_color_regions; +} diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c new file mode 100644 index 0000000000..008f623151 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -0,0 +1,64 @@ + +/** + * called from intel_batchbuffer_flush and children before sending a + * batchbuffer off. + */ +static void brw_finish_batch(struct intel_context *intel) +{ + struct brw_context *brw = brw_context(&intel->ctx); + brw_emit_query_end(brw); +} + + +/** + * called from intelFlushBatchLocked + */ +static void brw_new_batch( struct intel_context *intel ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + /* Check that we didn't just wrap our batchbuffer at a bad time. */ + assert(!brw->no_batch_wrap); + + brw->curbe.need_new_bo = GL_TRUE; + + /* Mark all context state as needing to be re-emitted. + * This is probably not as severe as on 915, since almost all of our state + * is just in referenced buffers. + */ + brw->state.dirty.brw |= BRW_NEW_CONTEXT; + + brw->state.dirty.mesa |= ~0; + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; + + /* Move to the end of the current upload buffer so that we'll force choosing + * a new buffer next time. + */ + if (brw->vb.upload.bo != NULL) { + dri_bo_unreference(brw->vb.upload.bo); + brw->vb.upload.bo = NULL; + brw->vb.upload.offset = 0; + } +} + + +static void brw_note_fence( struct intel_context *intel, GLuint fence ) +{ + brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; +} + +/* called from intelWaitForIdle() and intelFlush() + * + * For now, just flush everything. Could be smarter later. + */ +static GLuint brw_flush_cmd( void ) +{ + struct brw_mi_flush flush; + flush.opcode = CMD_MI_FLUSH; + flush.pad = 0; + flush.flags = BRW_FLUSH_STATE_CACHE; + return *(GLuint *)&flush; +} + + diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c new file mode 100644 index 0000000000..d199d0b81a --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -0,0 +1,27 @@ + /* _NEW_BUFFERS */ + if (IS_965(intel->intelScreen->deviceID) && + !IS_G4X(intel->intelScreen->deviceID)) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + /* The original gen4 hardware couldn't set up WM surfaces pointing + * at an offset within a tile, which can happen when rendering to + * anything but the base level of a texture or the +X face/0 depth. + * This was fixed with the 4 Series hardware. + * + * For these original chips, you would have to make the depth and + * color destination surfaces include information on the texture + * type, LOD, face, and various limits to use them as a destination. + * I would have done this, but there's also a nasty requirement that + * the depth and the color surfaces all be of the same LOD, which + * may be a worse requirement than this alignment. (Also, we may + * want to just demote the texture to untiled, instead). + */ + if (irb->region && + irb->region->tiling != I915_TILING_NONE && + (irb->region->draw_offset & 4095)) { + DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n"); + return GL_TRUE; + } + } diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index e1c2c7777b..90513245ee 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -59,9 +59,9 @@ static void compile_sf_prog( struct brw_context *brw, brw_init_compile(brw, &c.func); c.key = *key; - c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_attrs = util_count_bits(c.key.attrs); c.nr_attr_regs = (c.nr_attrs+1)/2; - c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); + c.nr_setup_attrs = util_count_bits(c.key.attrs & DO_SETUP_BITS); c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index ca8f97f9f9..4cc427a935 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -150,7 +150,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); - GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; if (!nr) @@ -188,7 +188,7 @@ static void do_flatshade_line( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); - GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; if (!nr) diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index b817b741e7..6801084616 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -270,7 +270,7 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) /*********************************************************************** * Emit all state: */ -void brw_validate_state( struct brw_context *brw ) +enum pipe_error brw_validate_state( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; @@ -278,10 +278,6 @@ void brw_validate_state( struct brw_context *brw ) GLuint i; brw_clear_validated_bos(brw); - - state->mesa |= brw->intel.NewGLState; - brw->intel.NewGLState = 0; - brw_add_validated_bo(brw, intel->batch->buf); if (brw->emit_state_always) { @@ -290,36 +286,23 @@ void brw_validate_state( struct brw_context *brw ) state->cache |= ~0; } - if (brw->fragment_program != ctx->FragmentProgram._Current) { - brw->fragment_program = ctx->FragmentProgram._Current; - brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; - } - - if (brw->vertex_program != ctx->VertexProgram._Current) { - brw->vertex_program = ctx->VertexProgram._Current; - brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; - } - if (state->mesa == 0 && state->cache == 0 && state->brw == 0) - return; + return 0; if (brw->state.dirty.brw & BRW_NEW_CONTEXT) brw_clear_batch_cache(brw); - brw->intel.Fallback = 0; - /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { const struct brw_tracked_state *atom = atoms[i]; - if (brw->intel.Fallback) - break; - if (check_state(state, &atom->dirty)) { if (atom->prepare) { - atom->prepare(brw); + ret = atom->prepare(brw); + if (ret) + return ret; } } } @@ -329,17 +312,18 @@ void brw_validate_state( struct brw_context *brw ) * If this fails, we can experience GPU lock-ups. */ { - const struct brw_fragment_program *fp; - fp = brw_fragment_program_const(brw->fragment_program); + const struct brw_fragment_program *fp = brw->fragment_program; if (fp) { - assert((fp->tex_units_used & ctx->Texture._EnabledUnits) - == fp->tex_units_used); + assert(fp->info.max_sampler <= brw->nr_samplers && + fp->info.max_texture <= brw->nr_textures); } } + + return 0; } -void brw_upload_state(struct brw_context *brw) +enum pipe_error brw_upload_state(struct brw_context *brw) { struct brw_state_flags *state = &brw->state.dirty; int i; @@ -356,7 +340,7 @@ void brw_upload_state(struct brw_context *brw) _mesa_memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < Elements(atoms); i++) { const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; @@ -364,12 +348,11 @@ void brw_upload_state(struct brw_context *brw) atom->dirty.brw || atom->dirty.cache); - if (brw->intel.Fallback) - break; - if (check_state(state, &atom->dirty)) { if (atom->emit) { - atom->emit( brw ); + ret = atom->emit( brw ); + if (ret) + return ret; } } @@ -388,12 +371,11 @@ void brw_upload_state(struct brw_context *brw) for (i = 0; i < Elements(atoms); i++) { const struct brw_tracked_state *atom = atoms[i]; - if (brw->intel.Fallback) - break; - if (check_state(state, &atom->dirty)) { if (atom->emit) { - atom->emit( brw ); + ret = atom->emit( brw ); + if (ret) + return ret; } } } @@ -407,10 +389,11 @@ void brw_upload_state(struct brw_context *brw) brw_print_dirty_count(mesa_bits, state->mesa); brw_print_dirty_count(brw_bits, state->brw); brw_print_dirty_count(cache_bits, state->cache); - fprintf(stderr, "\n"); + debug_printf("\n"); } } - - if (!brw->intel.Fallback) - memset(state, 0, sizeof(*state)); + + /* Clear dirty flags: + */ + memset(state, 0, sizeof(*state)); } diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c new file mode 100644 index 0000000000..6684f442d5 --- /dev/null +++ b/src/gallium/drivers/i965/brw_swtnl.c @@ -0,0 +1,114 @@ + +/* XXX: could split the primitive list to fallback only on the + * non-conformant primitives. + */ +static GLboolean check_fallbacks( struct brw_context *brw, + const struct _mesa_prim *prim, + GLuint nr_prims ) +{ + GLcontext *ctx = &brw->intel.ctx; + GLuint i; + + /* If we don't require strict OpenGL conformance, never + * use fallbacks. If we're forcing fallbacks, always + * use fallfacks. + */ + if (brw->intel.conformance_mode == 0) + return GL_FALSE; + + if (brw->intel.conformance_mode == 2) + return GL_TRUE; + + if (ctx->Polygon.SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (reduced_prim[prim[i].mode] == GL_TRIANGLES) + return GL_TRUE; + } + + /* BRW hardware will do AA lines, but they are non-conformant it + * seems. TBD whether we keep this fallback: + */ + if (ctx->Line.SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (reduced_prim[prim[i].mode] == GL_LINES) + return GL_TRUE; + } + + /* Stipple -- these fallbacks could be resolved with a little + * bit of work? + */ + if (ctx->Line.StippleFlag) { + for (i = 0; i < nr_prims; i++) { + /* GS doesn't get enough information to know when to reset + * the stipple counter?!? + */ + if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) + return GL_TRUE; + + if (prim[i].mode == GL_POLYGON && + (ctx->Polygon.FrontMode == GL_LINE || + ctx->Polygon.BackMode == GL_LINE)) + return GL_TRUE; + } + } + + if (ctx->Point.SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (prim[i].mode == GL_POINTS) + return GL_TRUE; + } + + /* BRW hardware doesn't handle GL_CLAMP texturing correctly; + * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP + * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and + * we want strict conformance, force the fallback. + * Right now, we only do this for 2D textures. + */ + { + int u; + for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; + if (texUnit->Enabled) { + if (texUnit->Enabled & TEXTURE_1D_BIT) { + if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_2D_BIT) { + if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_3D_BIT) { + if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { + return GL_TRUE; + } + } + } + } + } + + /* Exceeding hw limits on number of VS inputs? + */ + if (brw->nr_ve == 0 || + brw->nr_ve >= BRW_VEP_MAX) { + return TRUE; + } + + /* Position array with zero stride? + */ + if (brw->vs[brw->ve[0]]->stride == 0) + return TRUE; + + + + /* Nothing stopping us from the fast path now */ + return GL_FALSE; +} + + + + diff --git a/src/gallium/drivers/i965/brw_types.h b/src/gallium/drivers/i965/brw_types.h new file mode 100644 index 0000000000..32b62848da --- /dev/null +++ b/src/gallium/drivers/i965/brw_types.h @@ -0,0 +1,11 @@ +#ifndef BRW_TYPES_H +#define BRW_TYPES_H + +typedef GLuint uint32_t; +typedef GLubyte uint8_t; +typedef GLushort uint16_t; +/* no GLenum, translate all away */ + +typedef GLboolean uint8_t; + +#endif diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c index ce21aa4869..17f671a8fa 100644 --- a/src/gallium/drivers/i965/brw_util.c +++ b/src/gallium/drivers/i965/brw_util.c @@ -35,14 +35,6 @@ #include "brw_util.h" #include "brw_defines.h" -GLuint brw_count_bits( GLuint val ) -{ - GLuint i; - for (i = 0; val ; val >>= 1) - if (val & 1) - i++; - return i; -} GLuint brw_translate_blend_equation( GLenum mode ) diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index f0c79efbd9..53a5560105 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -61,9 +61,7 @@ static void do_vs_prog( struct brw_context *brw, } if (0) - _mesa_print_program(&c.vp->program.Base); - - + tgsi_dump(&c.vp->tokens, 0); /* Emit GEN4 code. */ @@ -96,9 +94,9 @@ static void brw_upload_vs_prog(struct brw_context *brw) * the inputs it asks for, whether they are varying or not. */ key.program_string_id = vp->id; - key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); - key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL); + key.nr_userclip = brw->nr_userclip; + key.copy_edgeflag = (brw->rast->fill_ccw != PIPE_POLYGON_MODE_FILL || + brw->rast->fill_cw != PIPE_POLYGON_MODE_FILL); /* Make an early check for the key. */ @@ -116,7 +114,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_POLYGON, + .mesa = PIPE_NEW_UCP | PIPE_NEW_RAST, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 1638ef8111..7f20c4baca 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -33,7 +33,7 @@ #include "main/macros.h" #include "shader/program.h" #include "shader/prog_parameter.h" -#include "shader/prog_print.h" +#include "pipe/p_shader_tokens.h" #include "brw_context.h" #include "brw_vs.h" @@ -129,6 +129,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } } + /* If there are no inputs, we'll still be reading one attribute's worth * because it's required -- see urb_read_length setting. */ @@ -226,6 +227,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * vertex urb, so is half the amount: */ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; + /* Setting this field to 0 leads to undefined behavior according to the * the VS_STATE docs. Our VUEs will always have at least one attribute * sitting in them, even if it's padding. @@ -960,9 +962,6 @@ static void emit_arl( struct brw_vs_compile *c, /** * Return the brw reg for the given instruction's src argument. - * Will return mangled results for SWZ op. The emit_swz() function - * ignores this result and recalculates taking extended swizzles into - * account. */ static struct brw_reg get_arg( struct brw_vs_compile *c, const struct prog_instruction *inst, @@ -1024,74 +1023,6 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, } -static void emit_swz( struct brw_vs_compile *c, - struct brw_reg dst, - const struct prog_instruction *inst) -{ - const GLuint argIndex = 0; - const struct prog_src_register src = inst->SrcReg[argIndex]; - struct brw_compile *p = &c->func; - GLuint zeros_mask = 0; - GLuint ones_mask = 0; - GLuint src_mask = 0; - GLubyte src_swz[4]; - GLboolean need_tmp = (src.Negate && - dst.file != BRW_GENERAL_REGISTER_FILE); - struct brw_reg tmp = dst; - GLuint i; - - if (need_tmp) - tmp = get_tmp(c); - - for (i = 0; i < 4; i++) { - if (dst.dw1.bits.writemask & (1<vp->program.Base.Instructions[insn]; - for (i = 0; i < 3; i++) { - struct prog_src_register *src = &inst->SrcReg[i]; - GLuint index = src->Index; - GLuint file = src->File; - if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS) - c->output_regs[index].used_in_src = GL_TRUE; - } - } - /* Static register allocation */ brw_vs_alloc_regs(c); @@ -1362,18 +1279,14 @@ void brw_vs_emit(struct brw_vs_compile *c ) _mesa_print_instruction(inst); #endif - /* Get argument regs. SWZ is special and does this itself. + /* Get argument regs. */ - if (inst->Opcode != OPCODE_SWZ) - for (i = 0; i < 3; i++) { - const struct prog_src_register *src = &inst->SrcReg[i]; - index = src->Index; - file = src->File; - if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - args[i] = c->output_regs[index].reg; - else - args[i] = get_arg(c, inst, i); - } + for (i = 0; i < 3; i++) { + const struct prog_src_register *src = &inst->SrcReg[i]; + index = src->Index; + file = src->File; + args[i] = get_arg(c, inst, i); + } /* Get dest regs. Note that it is possible for a reg to be both * dst and arg, given the static allocation of registers. So @@ -1381,10 +1294,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) */ index = inst->DstReg.Index; file = inst->DstReg.File; - if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - dst = c->output_regs[index].reg; - else - dst = get_dst(c, inst->DstReg); + dst = get_dst(c, inst->DstReg); if (inst->SaturateMode != SATURATE_OFF) { _mesa_problem(NULL, "Unsupported saturate %d in vertex shader", @@ -1392,151 +1302,144 @@ void brw_vs_emit(struct brw_vs_compile *c ) } switch (inst->Opcode) { - case OPCODE_ABS: + case TGSI_OPCODE_ABS: brw_MOV(p, dst, brw_abs(args[0])); break; - case OPCODE_ADD: + case TGSI_OPCODE_ADD: brw_ADD(p, dst, args[0], args[1]); break; - case OPCODE_COS: + case TGSI_OPCODE_COS: emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_DP3: + case TGSI_OPCODE_DP3: brw_DP3(p, dst, args[0], args[1]); break; - case OPCODE_DP4: + case TGSI_OPCODE_DP4: brw_DP4(p, dst, args[0], args[1]); break; - case OPCODE_DPH: + case TGSI_OPCODE_DPH: brw_DPH(p, dst, args[0], args[1]); break; - case OPCODE_NRM3: + case TGSI_OPCODE_NRM3: emit_nrm(c, dst, args[0], 3); break; - case OPCODE_NRM4: + case TGSI_OPCODE_NRM4: emit_nrm(c, dst, args[0], 4); break; - case OPCODE_DST: + case TGSI_OPCODE_DST: unalias2(c, dst, args[0], args[1], emit_dst_noalias); break; - case OPCODE_EXP: + case TGSI_OPCODE_EXP: unalias1(c, dst, args[0], emit_exp_noalias); break; - case OPCODE_EX2: + case TGSI_OPCODE_EX2: emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_ARL: + case TGSI_OPCODE_ARL: emit_arl(c, dst, args[0]); break; - case OPCODE_FLR: + case TGSI_OPCODE_FLR: brw_RNDD(p, dst, args[0]); break; - case OPCODE_FRC: + case TGSI_OPCODE_FRC: brw_FRC(p, dst, args[0]); break; - case OPCODE_LOG: + case TGSI_OPCODE_LOG: unalias1(c, dst, args[0], emit_log_noalias); break; - case OPCODE_LG2: + case TGSI_OPCODE_LG2: emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_LIT: + case TGSI_OPCODE_LIT: unalias1(c, dst, args[0], emit_lit_noalias); break; - case OPCODE_LRP: + case TGSI_OPCODE_LRP: unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias); break; - case OPCODE_MAD: + case TGSI_OPCODE_MAD: brw_MOV(p, brw_acc_reg(), args[2]); brw_MAC(p, dst, args[0], args[1]); break; - case OPCODE_MAX: + case TGSI_OPCODE_MAX: emit_max(p, dst, args[0], args[1]); break; - case OPCODE_MIN: + case TGSI_OPCODE_MIN: emit_min(p, dst, args[0], args[1]); break; - case OPCODE_MOV: + case TGSI_OPCODE_MOV: brw_MOV(p, dst, args[0]); break; - case OPCODE_MUL: + case TGSI_OPCODE_MUL: brw_MUL(p, dst, args[0], args[1]); break; - case OPCODE_POW: + case TGSI_OPCODE_POW: emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); break; - case OPCODE_RCP: + case TGSI_OPCODE_RCP: emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_RSQ: + case TGSI_OPCODE_RSQ: emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); break; - - case OPCODE_SEQ: + case TGSI_OPCODE_SEQ: emit_seq(p, dst, args[0], args[1]); break; - case OPCODE_SIN: + case TGSI_OPCODE_SIN: emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_SNE: + case TGSI_OPCODE_SNE: emit_sne(p, dst, args[0], args[1]); break; - case OPCODE_SGE: + case TGSI_OPCODE_SGE: emit_sge(p, dst, args[0], args[1]); break; - case OPCODE_SGT: + case TGSI_OPCODE_SGT: emit_sgt(p, dst, args[0], args[1]); break; - case OPCODE_SLT: + case TGSI_OPCODE_SLT: emit_slt(p, dst, args[0], args[1]); break; - case OPCODE_SLE: + case TGSI_OPCODE_SLE: emit_sle(p, dst, args[0], args[1]); break; - case OPCODE_SUB: + case TGSI_OPCODE_SUB: brw_ADD(p, dst, args[0], negate(args[1])); break; - case OPCODE_SWZ: - /* The args[0] value can't be used here as it won't have - * correctly encoded the full swizzle: - */ - emit_swz(c, dst, inst); - break; - case OPCODE_TRUNC: + case TGSI_OPCODE_TRUNC: /* round toward zero */ brw_RNDZ(p, dst, args[0]); break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: emit_xpd(p, dst, args[0], args[1]); break; - case OPCODE_IF: + case TGSI_OPCODE_IF: assert(if_depth < MAX_IF_DEPTH); if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); /* Note that brw_IF smashes the predicate_control field. */ if_inst[if_depth]->header.predicate_control = get_predicate(inst); if_depth++; break; - case OPCODE_ELSE: + case TGSI_OPCODE_ELSE: if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; - case OPCODE_ENDIF: + case TGSI_OPCODE_ENDIF: assert(if_depth > 0); brw_ENDIF(p, if_inst[--if_depth]); break; - case OPCODE_BGNLOOP: + case TGSI_OPCODE_BGNLOOP: loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; - case OPCODE_BRK: + case TGSI_OPCODE_BRK: brw_set_predicate_control(p, get_predicate(inst)); brw_BREAK(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_CONT: + case TGSI_OPCODE_CONT: brw_set_predicate_control(p, get_predicate(inst)); brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDLOOP: { struct brw_instruction *inst0, *inst1; GLuint br = 1; @@ -1550,23 +1453,23 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* patch all the BREAK/CONT instructions from last BEGINLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { + if (inst0->header.opcode == BRW_TGSI_OPCODE_BREAK) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + else if (inst0->header.opcode == BRW_TGSI_OPCODE_CONTINUE) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; } } } break; - case OPCODE_BRA: + case TGSI_OPCODE_BRA: brw_set_predicate_control(p, get_predicate(inst)); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_CAL: + case TGSI_OPCODE_CAL: brw_set_access_mode(p, BRW_ALIGN_1); brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); brw_set_access_mode(p, BRW_ALIGN_16); @@ -1575,27 +1478,27 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_save_call(p, inst->Comment, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; - case OPCODE_RET: + case TGSI_OPCODE_RET: brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(-4)); brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); brw_set_access_mode(p, BRW_ALIGN_16); break; - case OPCODE_END: + case TGSI_OPCODE_END: end_offset = p->nr_insn; /* this instruction will get patched later to jump past subroutine * code, etc. */ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; - case OPCODE_PRINT: + case TGSI_OPCODE_PRINT: /* no-op */ break; - case OPCODE_BGNSUB: + case TGSI_OPCODE_BGNSUB: brw_save_label(p, inst->Comment, p->nr_insn); break; - case OPCODE_ENDSUB: + case TGSI_OPCODE_ENDSUB: /* no-op */ break; default: @@ -1618,33 +1521,6 @@ void brw_vs_emit(struct brw_vs_compile *c ) hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; } - if ((inst->DstReg.File == PROGRAM_OUTPUT) - && (inst->DstReg.Index != VERT_RESULT_HPOS) - && c->output_regs[inst->DstReg.Index].used_in_src) { - brw_MOV(p, get_dst(c, inst->DstReg), dst); - } - - /* Result color clamping. - * - * When destination register is an output register and - * it's primary/secondary front/back color, we have to clamp - * the result to [0,1]. This is done by enabling the - * saturation bit for the last instruction. - * - * We don't use brw_set_saturate() as it modifies - * p->current->header.saturate, which affects all the subsequent - * instructions. Instead, we directly modify the header - * of the last (already stored) instruction. - */ - if (inst->DstReg.File == PROGRAM_OUTPUT) { - if ((inst->DstReg.Index == VERT_RESULT_COL0) - || (inst->DstReg.Index == VERT_RESULT_COL1) - || (inst->DstReg.Index == VERT_RESULT_BFC0) - || (inst->DstReg.Index == VERT_RESULT_BFC1)) { - p->store[p->nr_insn-1].header.saturate = 1; - } - } - release_tmps(c); } diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 2292de94c4..20d31880b4 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -269,61 +269,46 @@ static void brw_wm_populate_key( struct brw_context *brw, uses_depth, key); + /* Revisit this, figure out if it's really useful, and either push + * it into the state tracker so that everyone benefits (use to + * create fs varients with TEX rather than TXP), or discard. + */ + key->proj_attrib_mask = ~0; /*brw->wm.input_size_masks[4-1];*/ - /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; - - /* _NEW_LIGHT */ - key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + /* PIPE_NEW_RAST */ + key->flat_shade = brw->rast.flat_shade; - /* _NEW_HINT */ - key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + /* This can be determined by looking at the INTERP mode each input decl. + */ + key->linear_color = 0; /* _NEW_TEXTURE */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; - - if (unit->_ReallyEnabled) { - const struct gl_texture_object *t = unit->_Current; - const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + if (i < brw->nr_textures) { + const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; + const struct gl_texture_object *t = unit->_Current; + const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + if (img->InternalFormat == GL_YCBCR_MESA) { key->yuvtex_mask |= 1 << i; if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR) - key->yuvtex_swap_mask |= 1 << i; + key->yuvtex_swap_mask |= 1 << i; } - key->tex_swizzles[i] = t->_Swizzle; + key->tex_swizzles[i] = t->_Swizzle; + + if (0) + key->shadowtex_mask |= 1<tex_swizzles[i] = SWIZZLE_NOOP; } } - /* Shadow */ - key->shadowtex_mask = fp->program.Base.ShadowSamplers; - /* _NEW_BUFFERS */ - /* - * Include the draw buffer origin and height so that we can calculate - * fragment position values relative to the bottom left of the drawable, - * from the incoming screen origin relative position we get as part of our - * payload. - * - * We could avoid recompiling by including this as a constant referenced by - * our program, but if we were to do that it would also be nice to handle - * getting that constant updated at batchbuffer submit time (when we - * hold the lock and know where the buffer really is) rather than at emit - * time when we don't hold the lock and are just guessing. We could also - * just avoid using this as key data if the program doesn't use - * fragment.position. - * - * This pretty much becomes moot with DRI2 and redirected buffers anyway, - * as our origins will always be zero then. - */ + /* _NEW_FRAMEBUFFER */ if (brw->intel.driDrawable != NULL) { - key->origin_x = brw->intel.driDrawable->x; - key->origin_y = brw->intel.driDrawable->y; - key->drawable_height = brw->intel.driDrawable->h; + key->drawable_height = brw->fb.cbufs[0].height; } /* CACHE_NEW_VS_PROG */ diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 872b1f3ecf..756a680150 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -76,7 +76,6 @@ struct brw_wm_prog_key { GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; GLuint program_string_id:32; - GLuint origin_x, origin_y; GLuint drawable_height; GLuint vp_outputs_written; }; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index bf80a2942a..9c47c46a3d 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -125,23 +125,21 @@ static void emit_wpos_xy(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; - /* Calculate the pixel offset from window bottom left into destination - * X and Y channels. - */ if (mask & WRITEMASK_X) { - /* X' = X - origin */ - brw_ADD(p, + /* X' = X */ + brw_MOV(p, dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(0 - c->key.origin_x)); + retype(arg0[0], BRW_REGISTER_TYPE_W)); } + /* XXX: is this needed any more, or is this a NOOP? + */ if (mask & WRITEMASK_Y) { - /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + /* Y' = height - 1 - Y */ brw_ADD(p, dst[1], negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), - brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + brw_imm_d(c->key.drawable_height - 1)); } } @@ -1376,7 +1374,6 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_MOV: - case OPCODE_SWZ: emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 4e3edfbbff..5f47d86f71 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -30,25 +30,12 @@ */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" +#include "pipe/p_shader_constants.h" + #include "brw_context.h" #include "brw_wm.h" #include "brw_util.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/prog_statevars.h" - - -/** An invalid texture target */ -#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS - -/** An invalid texture unit */ -#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT - -#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS #define X 0 #define Y 1 @@ -68,11 +55,6 @@ static const char *wm_opcode_strings[] = { "FRONTFACING", }; -#if 0 -static const char *wm_file_strings[] = { - "PAYLOAD" -}; -#endif /*********************************************************************** @@ -165,13 +147,13 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c ) } c->fp_temp |= 1<<(bit-1); - return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1)); + return dst_reg(PROGRAM_TEMPORARY, c->first_internal_temp+(bit-1)); } static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) { - c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP)); + c->fp_temp &= ~(1 << (temp.Index - c->first_internal_temp)); } @@ -192,58 +174,29 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c, return inst; } -static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, - GLuint op, - struct prog_dst_register dest, - GLuint saturate, - GLuint tex_src_unit, - GLuint tex_src_target, - GLuint tex_shadow, - struct prog_src_register src0, - struct prog_src_register src1, - struct prog_src_register src2 ) +static struct prog_instruction * emit_op(struct brw_wm_compile *c, + GLuint op, + struct prog_dst_register dest, + GLuint saturate, + struct prog_src_register src0, + struct prog_src_register src1, + struct prog_src_register src2 ) { struct prog_instruction *inst = get_fp_inst(c); - assert(tex_src_unit < BRW_MAX_TEX_UNIT || - tex_src_unit == TEX_UNIT_NONE); - assert(tex_src_target < NUM_TEXTURE_TARGETS || - tex_src_target == TEX_TARGET_NONE); - - /* update mask of which texture units are referenced by this program */ - if (tex_src_unit != TEX_UNIT_NONE) - c->fp->tex_units_used |= (1 << tex_src_unit); - memset(inst, 0, sizeof(*inst)); inst->Opcode = op; inst->DstReg = dest; inst->SaturateMode = saturate; - inst->TexSrcUnit = tex_src_unit; - inst->TexSrcTarget = tex_src_target; - inst->TexShadow = tex_shadow; inst->SrcReg[0] = src0; inst->SrcReg[1] = src1; inst->SrcReg[2] = src2; return inst; } - - -static struct prog_instruction * emit_op(struct brw_wm_compile *c, - GLuint op, - struct prog_dst_register dest, - GLuint saturate, - struct prog_src_register src0, - struct prog_src_register src1, - struct prog_src_register src2 ) -{ - return emit_tex_op(c, op, dest, saturate, - TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */ - src0, src1, src2); -} -/* Many Mesa opcodes produce the same value across all the result channels. +/* Many opcodes produce the same value across all the result channels. * We'd rather not have to support that splatting in the opcode implementations, * and brw_wm_pass*.c wants to optimize them out by shuffling references around * anyway. We can easily get both by emitting the opcode to one channel, and @@ -267,7 +220,7 @@ static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); if (other_channel_mask != 0) { inst = emit_op(c, - OPCODE_MOV, + TGSI_OPCODE_MOV, dst_mask(inst0->DstReg, other_channel_mask), 0, src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), @@ -356,7 +309,9 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) } static void emit_interp( struct brw_wm_compile *c, - GLuint idx ) + GLuint semantic, + GLuint semantic_index, + GLuint interp_mode ) { struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); @@ -366,7 +321,7 @@ static void emit_interp( struct brw_wm_compile *c, * multiplied by 1/W in the SF program, and LINTERP on those * which have not: */ - switch (idx) { + switch (semantic) { case FRAG_ATTRIB_WPOS: /* Have to treat wpos.xy specially: */ @@ -390,8 +345,8 @@ static void emit_interp( struct brw_wm_compile *c, deltas, src_undef()); break; - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: + + case TGSI_SEMANTIC_COLOR: if (c->key.flat_shade) { emit_op(c, WM_CINTERP, @@ -402,25 +357,13 @@ static void emit_interp( struct brw_wm_compile *c, src_undef()); } else { - if (c->key.linear_color) { - emit_op(c, - WM_LINTERP, - dst, - 0, - interp, - deltas, - src_undef()); - } - else { - /* perspective-corrected color interpolation */ - emit_op(c, - WM_PINTERP, - dst, - 0, - interp, - deltas, - get_pixel_w(c)); - } + emit_op(c, + translate_interp_mode(interp_mode), + dst, + 0, + interp, + deltas, + src_undef()); } break; case FRAG_ATTRIB_FOGC: @@ -434,7 +377,7 @@ static void emit_interp( struct brw_wm_compile *c, get_pixel_w(c)); emit_op(c, - OPCODE_MOV, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_YZW), 0, src_swizzle(interp, @@ -468,7 +411,7 @@ static void emit_interp( struct brw_wm_compile *c, get_pixel_w(c)); emit_op(c, - OPCODE_MOV, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_ZW), 0, src_swizzle(interp, @@ -482,7 +425,7 @@ static void emit_interp( struct brw_wm_compile *c, default: emit_op(c, - WM_PINTERP, + translate_interp_mode(interp_mode), dst, 0, interp, @@ -490,8 +433,6 @@ static void emit_interp( struct brw_wm_compile *c, get_pixel_w(c)); break; } - - c->fp_interp_emitted |= 1<SaturateMode, src0, @@ -596,7 +537,7 @@ static void precalc_dst( struct brw_wm_compile *c, /* dst.xz = swz src0.1zzz */ swz = emit_op(c, - OPCODE_SWZ, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_XZ), inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), @@ -609,7 +550,7 @@ static void precalc_dst( struct brw_wm_compile *c, /* dst.w = mov src1.w */ emit_op(c, - OPCODE_MOV, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_W), inst->SaturateMode, src1, @@ -631,7 +572,7 @@ static void precalc_lit( struct brw_wm_compile *c, /* dst.xw = swz src0.1111 */ swz = emit_op(c, - OPCODE_SWZ, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_XW), 0, src_swizzle1(src0, SWIZZLE_ONE), @@ -643,7 +584,7 @@ static void precalc_lit( struct brw_wm_compile *c, if (dst.WriteMask & WRITEMASK_YZ) { emit_op(c, - OPCODE_LIT, + TGSI_OPCODE_LIT, dst_mask(dst, WRITEMASK_YZ), inst->SaturateMode, src0, @@ -681,7 +622,7 @@ static void precalc_tex( struct brw_wm_compile *c, coord = src_reg_from_dst(tmpcoord); /* tmpcoord = src0 (i.e.: coord = src0) */ - out = emit_op(c, OPCODE_MOV, + out = emit_op(c, TGSI_OPCODE_MOV, tmpcoord, 0, src0, @@ -691,7 +632,7 @@ static void precalc_tex( struct brw_wm_compile *c, out->SrcReg[0].Abs = 1; /* tmp0 = MAX(coord.X, coord.Y) */ - emit_op(c, OPCODE_MAX, + emit_op(c, TGSI_OPCODE_MAX, tmp0, 0, src_swizzle1(coord, X), @@ -699,7 +640,7 @@ static void precalc_tex( struct brw_wm_compile *c, src_undef()); /* tmp1 = MAX(tmp0, coord.Z) */ - emit_op(c, OPCODE_MAX, + emit_op(c, TGSI_OPCODE_MAX, tmp1, 0, tmp0src, @@ -707,7 +648,7 @@ static void precalc_tex( struct brw_wm_compile *c, src_undef()); /* tmp0 = 1 / tmp1 */ - emit_op(c, OPCODE_RCP, + emit_op(c, TGSI_OPCODE_RCP, dst_mask(tmp0, WRITEMASK_X), 0, tmp1src, @@ -715,7 +656,7 @@ static void precalc_tex( struct brw_wm_compile *c, src_undef()); /* tmpCoord = src0 * tmp0 */ - emit_op(c, OPCODE_MUL, + emit_op(c, TGSI_OPCODE_MUL, tmpcoord, 0, src0, @@ -738,7 +679,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } */ emit_op(c, - OPCODE_MUL, + TGSI_OPCODE_MUL, tmpcoord, 0, inst->SrcReg[0], @@ -785,7 +726,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp = TEX ... */ emit_tex_op(c, - OPCODE_TEX, + TGSI_OPCODE_TEX, tmp, inst->SaturateMode, unit, @@ -798,7 +739,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp.xyz = ADD TMP, C0 */ emit_op(c, - OPCODE_ADD, + TGSI_OPCODE_ADD, dst_mask(tmp, WRITEMASK_XYZ), 0, tmpsrc, @@ -809,7 +750,7 @@ static void precalc_tex( struct brw_wm_compile *c, */ emit_op(c, - OPCODE_MUL, + TGSI_OPCODE_MUL, dst_mask(tmp, WRITEMASK_Y), 0, tmpsrc, @@ -824,7 +765,7 @@ static void precalc_tex( struct brw_wm_compile *c, */ emit_op(c, - OPCODE_MAD, + TGSI_OPCODE_MAD, dst_mask(dst, WRITEMASK_XYZ), 0, swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), @@ -834,7 +775,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* RGB.y = MAD YUV.z, C1.w, RGB.y */ emit_op(c, - OPCODE_MAD, + TGSI_OPCODE_MAD, dst_mask(dst, WRITEMASK_Y), 0, src_swizzle1(tmpsrc, Z), @@ -846,7 +787,7 @@ static void precalc_tex( struct brw_wm_compile *c, else { /* ordinary RGBA tex instruction */ emit_tex_op(c, - OPCODE_TEX, + TGSI_OPCODE_TEX, inst->DstReg, inst->SaturateMode, unit, @@ -861,7 +802,7 @@ static void precalc_tex( struct brw_wm_compile *c, if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) { /* swizzle the result of the TEX instruction */ struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg); - emit_op(c, OPCODE_SWZ, + emit_op(c, TGSI_OPCODE_MOV, inst->DstReg, SATURATE_OFF, /* saturate already done above */ src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), @@ -884,7 +825,7 @@ static GLboolean projtex( struct brw_wm_compile *c, const struct prog_src_register src = inst->SrcReg[0]; GLboolean retVal; - assert(inst->Opcode == OPCODE_TXP); + assert(inst->Opcode == TGSI_OPCODE_TXP); /* Only try to detect the simplest cases. Could detect (later) * cases where we are trying to emit code like RCP {1.0}, MUL x, @@ -921,7 +862,7 @@ static void precalc_txp( struct brw_wm_compile *c, /* tmp0.w = RCP inst.arg[0][3] */ emit_op(c, - OPCODE_RCP, + TGSI_OPCODE_RCP, dst_mask(tmp, WRITEMASK_W), 0, src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), @@ -931,7 +872,7 @@ static void precalc_txp( struct brw_wm_compile *c, /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww */ emit_op(c, - OPCODE_MUL, + TGSI_OPCODE_MUL, dst_mask(tmp, WRITEMASK_XYZ), 0, src0, @@ -1015,6 +956,7 @@ static void validate_src_regs( struct brw_wm_compile *c, GLuint idx = inst->SrcReg[i].Index; if (!(c->fp_interp_emitted & (1<fp_interp_emitted |= 1<Opcode) { - case OPCODE_SWZ: + case TGSI_OPCODE_ABS: out = emit_insn(c, inst); - out->Opcode = OPCODE_MOV; - break; - - case OPCODE_ABS: - out = emit_insn(c, inst); - out->Opcode = OPCODE_MOV; + out->Opcode = TGSI_OPCODE_MOV; out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; break; - case OPCODE_SUB: + case TGSI_OPCODE_SUB: out = emit_insn(c, inst); - out->Opcode = OPCODE_ADD; + out->Opcode = TGSI_OPCODE_ADD; out->SrcReg[1].Negate ^= NEGATE_XYZW; break; - case OPCODE_SCS: + case TGSI_OPCODE_SCS: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask &= WRITEMASK_XY; break; - case OPCODE_DST: + case TGSI_OPCODE_DST: precalc_dst(c, inst); break; - case OPCODE_LIT: + case TGSI_OPCODE_LIT: precalc_lit(c, inst); break; - case OPCODE_TEX: + case TGSI_OPCODE_TEX: precalc_tex(c, inst); break; - case OPCODE_TXP: + case TGSI_OPCODE_TXP: precalc_txp(c, inst); break; - case OPCODE_TXB: + case TGSI_OPCODE_TXB: out = emit_insn(c, inst); out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask &= WRITEMASK_XYZ; break; - case OPCODE_KIL: + case TGSI_OPCODE_KIL: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask = 0; break; - case OPCODE_END: + case TGSI_OPCODE_END: emit_fb_write(c); break; - case OPCODE_PRINT: - break; default: if (brw_wm_is_scalar_result(inst->Opcode)) emit_scalar_insn(c, inst); diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index c9fe1dd8ad..d836e2fb34 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -6,9 +6,6 @@ #include "brw_eu.h" #include "brw_wm.h" -enum _subroutine { - SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 -}; static struct brw_reg get_dst_reg(struct brw_wm_compile *c, const struct prog_instruction *inst, @@ -32,10 +29,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) case OPCODE_CAL: case OPCODE_BRK: case OPCODE_RET: - case OPCODE_NOISE1: - case OPCODE_NOISE2: - case OPCODE_NOISE3: - case OPCODE_NOISE4: case OPCODE_BGNLOOP: return GL_TRUE; default: @@ -1495,1036 +1488,7 @@ static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) 0, 16, 2 ); } -/* One-, two- and three-dimensional Perlin noise, similar to the description - in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */ -static void noise1_sub( struct brw_wm_compile *c ) { - struct brw_compile *p = &c->func; - struct brw_reg param, - x0, x1, /* gradients at each end */ - t, tmp[ 2 ], /* float temporaries */ - itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */ - int i; - int mark = mark_tmps( c ); - - x0 = alloc_tmp( c ); - x1 = alloc_tmp( c ); - t = alloc_tmp( c ); - tmp[ 0 ] = alloc_tmp( c ); - tmp[ 1 ] = alloc_tmp( c ); - itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD ); - itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD ); - itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD ); - itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD ); - itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD ); - - param = lookup_tmp( c, mark - 2 ); - - brw_set_access_mode( p, BRW_ALIGN_1 ); - - brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ - - /* Arrange the two end coordinates into scalars (itmp0/itmp1) to - be hashed. Also compute the remainder (offset within the unit - length), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param ); - brw_FRC( p, param, param ); - brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) ); - brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ - brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ - - /* We're now ready to perform the hashing. The two hashes are - interleaved for performance. The hash function used is - designed to rapidly achieve avalanche and require only 32x16 - bit multiplication, and 16-bit swizzles (which we get for - free). We can't use immediate operands in the multiplies, - because immediates are permitted only in src1 and the 16-bit - factor is permitted only in src0. */ - for( i = 0; i < 2; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] ); - for( i = 0; i < 2; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - for( i = 0; i < 2; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] ); - for( i = 0; i < 2; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - for( i = 0; i < 2; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); - for( i = 0; i < 2; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - - /* Now we want to initialise the two gradients based on the - hashes. Format conversion from signed integer to float leaves - everything scaled too high by a factor of pow( 2, 31 ), but - we correct for that right at the end. */ - brw_ADD( p, t, param, brw_imm_f( -1.0 ) ); - brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) ); - brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) ); - - brw_MUL( p, x0, x0, param ); - brw_MUL( p, x1, x1, t ); - - /* We interpolate between the gradients using the polynomial - 6t^5 - 15t^4 + 10t^3 (Perlin). */ - brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); - brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the - pipeline */ - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); - brw_MUL( p, param, tmp[ 0 ], param ); - brw_MUL( p, x1, x1, param ); - brw_ADD( p, x0, x0, x1 ); - /* scale by pow( 2, -30 ), to compensate for the format conversion - above and an extra factor of 2 so that a single gradient covers - the [-1,1] range */ - brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) ); - - release_tmps( c, mark ); -} - -static void emit_noise1( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct brw_compile *p = &c->func; - struct brw_reg src, param, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - int mark = mark_tmps( c ); - - assert( mark == 0 ); - - src = get_src_reg( c, inst, 0, 0 ); - - param = alloc_tmp( c ); - - brw_MOV( p, param, src ); - - invoke_subroutine( c, SUB_NOISE1, noise1_sub ); - - /* Fill in the result: */ - brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) - brw_set_saturate( p, 0 ); - - release_tmps( c, mark ); -} - -static void noise2_sub( struct brw_wm_compile *c ) { - - struct brw_compile *p = &c->func; - struct brw_reg param0, param1, - x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */ - t, tmp[ 4 ], /* float temporaries */ - itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */ - int i; - int mark = mark_tmps( c ); - - x0y0 = alloc_tmp( c ); - x0y1 = alloc_tmp( c ); - x1y0 = alloc_tmp( c ); - x1y1 = alloc_tmp( c ); - t = alloc_tmp( c ); - for( i = 0; i < 4; i++ ) { - tmp[ i ] = alloc_tmp( c ); - itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); - } - itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD ); - itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD ); - itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD ); - - param0 = lookup_tmp( c, mark - 3 ); - param1 = lookup_tmp( c, mark - 2 ); - - brw_set_access_mode( p, BRW_ALIGN_1 ); - - /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to - be hashed. Also compute the remainders (offsets within the unit - square), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); - brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); - brw_FRC( p, param0, param0 ); - brw_FRC( p, param1, param1 ); - brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ - brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ), - low_words( itmp[ 1 ] ) ); - brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ - brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ - brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) ); - brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) ); - brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) ); - - /* We're now ready to perform the hashing. The four hashes are - interleaved for performance. The hash function used is - designed to rapidly achieve avalanche and require only 32x16 - bit multiplication, and 16-bit swizzles (which we get for - free). We can't use immediate operands in the multiplies, - because immediates are permitted only in src1 and the 16-bit - factor is permitted only in src0. */ - for( i = 0; i < 4; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - - /* Now we want to initialise the four gradients based on the - hashes. Format conversion from signed integer to float leaves - everything scaled too high by a factor of pow( 2, 15 ), but - we correct for that right at the end. */ - brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); - brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); - brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) ); - brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) ); - - brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param0 ); - brw_MUL( p, x0y1, x0y1, param0 ); - - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 ); - brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t ); - brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t ); - - brw_ADD( p, x0y0, x0y0, tmp[ 0 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 2 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 1 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 3 ] ); - - /* We interpolate between the gradients using the polynomial - 6t^5 - 15t^4 + 10t^3 (Perlin). */ - brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) ); - brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); - brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the - pipeline */ - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); - brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the - pipeline */ - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); - brw_MUL( p, param0, tmp[ 0 ], param0 ); - brw_MUL( p, param1, tmp[ 1 ], param1 ); - - /* Here we interpolate in the y dimension... */ - brw_MUL( p, x0y1, x0y1, param1 ); - brw_MUL( p, x1y1, x1y1, param1 ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. There are horrible register dependencies here, - but we have nothing else to do. */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, param0 ); - brw_ADD( p, x0y0, x0y0, x1y0 ); - - /* scale by pow( 2, -15 ), as described above */ - brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) ); - - release_tmps( c, mark ); -} - -static void emit_noise2( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, param0, param1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - int mark = mark_tmps( c ); - - assert( mark == 0 ); - - src0 = get_src_reg( c, inst, 0, 0 ); - src1 = get_src_reg( c, inst, 0, 1 ); - - param0 = alloc_tmp( c ); - param1 = alloc_tmp( c ); - - brw_MOV( p, param0, src0 ); - brw_MOV( p, param1, src1 ); - - invoke_subroutine( c, SUB_NOISE2, noise2_sub ); - - /* Fill in the result: */ - brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) - brw_set_saturate( p, 0 ); - - release_tmps( c, mark ); -} - -/** - * The three-dimensional case is much like the one- and two- versions above, - * but since the number of corners is rapidly growing we now pack 16 16-bit - * hashes into each register to extract more parallelism from the EUs. - */ -static void noise3_sub( struct brw_wm_compile *c ) { - - struct brw_compile *p = &c->func; - struct brw_reg param0, param1, param2, - x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ - xi, yi, zi, /* interpolation coefficients */ - t, tmp[ 8 ], /* float temporaries */ - itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ - wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ - int i; - int mark = mark_tmps( c ); - - x0y0 = alloc_tmp( c ); - x0y1 = alloc_tmp( c ); - x1y0 = alloc_tmp( c ); - x1y1 = alloc_tmp( c ); - xi = alloc_tmp( c ); - yi = alloc_tmp( c ); - zi = alloc_tmp( c ); - t = alloc_tmp( c ); - for( i = 0; i < 8; i++ ) { - tmp[ i ] = alloc_tmp( c ); - itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); - wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); - } - - param0 = lookup_tmp( c, mark - 4 ); - param1 = lookup_tmp( c, mark - 3 ); - param2 = lookup_tmp( c, mark - 2 ); - - brw_set_access_mode( p, BRW_ALIGN_1 ); - - /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to - be hashed. Also compute the remainders (offsets within the unit - cube), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); - brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); - brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 ); - brw_FRC( p, param0, param0 ); - brw_FRC( p, param1, param1 ); - brw_FRC( p, param2, param2 ); - /* Since we now have only 16 bits of precision in the hash, we must - be more careful about thorough mixing to maintain entropy as we - squash the input vector into a small scalar. */ - brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) ); - brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) ); - brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ), - brw_imm_uw( 0x9B93 ) ); - brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), - brw_imm_uw( 0xBC8F ) ); - - /* Temporarily disable the execution mask while we work with ExecSize=16 - channels (the mask is set for ExecSize=8 and is probably incorrect). - Although this might cause execution of unwanted channels, the code - writes only to temporary registers and has no side effects, so - disabling the mask is harmless. */ - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); - brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); - brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); - - /* We're now ready to perform the hashing. The eight hashes are - interleaved for performance. The hash function used is - designed to rapidly achieve avalanche and require only 16x16 - bit multiplication, and 8-bit swizzles (which we get for - free). */ - for( i = 0; i < 4; i++ ) - brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), - odd_bytes( wtmp[ i ] ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), - odd_bytes( wtmp[ i ] ) ); - brw_pop_insn_state( p ); - - /* Now we want to initialise the four rear gradients based on the - hashes. Format conversion from signed integer to float leaves - everything scaled too high by a factor of pow( 2, 15 ), but - we correct for that right at the end. */ - /* x component */ - brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); - brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); - brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); - brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param0 ); - brw_MUL( p, x0y1, x0y1, param0 ); - - /* y component */ - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); - - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - - /* z component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* We interpolate between the gradients using the polynomial - 6t^5 - 15t^4 + 10t^3 (Perlin). */ - brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) ); - brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) ); - brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) ); - brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) ); - brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) ); - brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) ); - brw_MUL( p, xi, xi, param0 ); - brw_MUL( p, yi, yi, param1 ); - brw_MUL( p, zi, zi, param2 ); - brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) ); - brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) ); - brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) ); - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */ - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */ - brw_MUL( p, xi, xi, param0 ); - brw_MUL( p, yi, yi, param1 ); - brw_MUL( p, zi, zi, param2 ); - brw_MUL( p, xi, xi, param0 ); - brw_MUL( p, yi, yi, param1 ); - brw_MUL( p, zi, zi, param2 ); - brw_MUL( p, xi, xi, param0 ); - brw_MUL( p, yi, yi, param1 ); - brw_MUL( p, zi, zi, param2 ); - - /* Here we interpolate in the y dimension... */ - brw_MUL( p, x0y1, x0y1, yi ); - brw_MUL( p, x1y1, x1y1, yi ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. Leave the result in tmp[ 0 ] (see below)... */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, xi ); - brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); - - /* Now do the same thing for the front four gradients... */ - /* x component */ - brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); - brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); - brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param0 ); - brw_MUL( p, x0y1, x0y1, param0 ); - - /* y component */ - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - brw_ADD( p, t, param2, brw_imm_f( -1.0 ) ); - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); - - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - - /* z component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* The interpolation coefficients are still around from last time, so - again interpolate in the y dimension... */ - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); - brw_MUL( p, x0y1, x0y1, yi ); - brw_MUL( p, x1y1, x1y1, yi ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. The rear face is in tmp[ 0 ] (see above), so this - time put the front face in tmp[ 1 ] and we're nearly there... */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, xi ); - brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); - - /* The final interpolation, in the z dimension: */ - brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); - - /* scale by pow( 2, -15 ), as described above */ - brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); - - release_tmps( c, mark ); -} - -static void emit_noise3( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, src2, param0, param1, param2, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - int mark = mark_tmps( c ); - - assert( mark == 0 ); - - src0 = get_src_reg( c, inst, 0, 0 ); - src1 = get_src_reg( c, inst, 0, 1 ); - src2 = get_src_reg( c, inst, 0, 2 ); - - param0 = alloc_tmp( c ); - param1 = alloc_tmp( c ); - param2 = alloc_tmp( c ); - - brw_MOV( p, param0, src0 ); - brw_MOV( p, param1, src1 ); - brw_MOV( p, param2, src2 ); - - invoke_subroutine( c, SUB_NOISE3, noise3_sub ); - - /* Fill in the result: */ - brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) - brw_set_saturate( p, 0 ); - - release_tmps( c, mark ); -} - -/** - * For the four-dimensional case, the little micro-optimisation benefits - * we obtain by unrolling all the loops aren't worth the massive bloat it - * now causes. Instead, we loop twice around performing a similar operation - * to noise3, once for the w=0 cube and once for the w=1, with a bit more - * code to glue it all together. - */ -static void noise4_sub( struct brw_wm_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg param[ 4 ], - x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ - w0, /* noise for the w=0 cube */ - floors[ 2 ], /* integer coordinates of base corner of hypercube */ - interp[ 4 ], /* interpolation coefficients */ - t, tmp[ 8 ], /* float temporaries */ - itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ - wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ - int i, j; - int mark = mark_tmps( c ); - GLuint loop, origin; - - x0y0 = alloc_tmp( c ); - x0y1 = alloc_tmp( c ); - x1y0 = alloc_tmp( c ); - x1y1 = alloc_tmp( c ); - t = alloc_tmp( c ); - w0 = alloc_tmp( c ); - floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); - floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); - - for( i = 0; i < 4; i++ ) { - param[ i ] = lookup_tmp( c, mark - 5 + i ); - interp[ i ] = alloc_tmp( c ); - } - - for( i = 0; i < 8; i++ ) { - tmp[ i ] = alloc_tmp( c ); - itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); - wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); - } - - brw_set_access_mode( p, BRW_ALIGN_1 ); - - /* We only want 16 bits of precision from the integral part of each - co-ordinate, but unfortunately the RNDD semantics would saturate - at 16 bits if we performed the operation directly to a 16-bit - destination. Therefore, we round to 32-bit temporaries where - appropriate, and then store only the lower 16 bits. */ - brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] ); - brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] ); - brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] ); - brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] ); - brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) ); - brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) ); - - /* Modify the flag register here, because the side effect is useful - later (see below). We know for certain that all flags will be - cleared, since the FRC instruction cannot possibly generate - negative results. Even for exceptional inputs (infinities, denormals, - NaNs), the architecture guarantees that the L conditional is false. */ - brw_set_conditionalmod( p, BRW_CONDITIONAL_L ); - brw_FRC( p, param[ 0 ], param[ 0 ] ); - brw_set_predicate_control( p, BRW_PREDICATE_NONE ); - for( i = 1; i < 4; i++ ) - brw_FRC( p, param[ i ], param[ i ] ); - - /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first - of all. */ - for( i = 0; i < 4; i++ ) - brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) ); - for( i = 0; i < 4; i++ ) - brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); - for( i = 0; i < 4; i++ ) - brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) ); - for( j = 0; j < 3; j++ ) - for( i = 0; i < 4; i++ ) - brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); - - /* Mark the current address, as it will be a jump destination. The - following code will be executed twice: first, with the flag - register clear indicating the w=0 case, and second with flags - set for w=1. */ - loop = p->nr_insn; - - /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to - be hashed. Since we have only 16 bits of precision in the hash, we - must be careful about thorough mixing to maintain entropy as we - squash the input vector into a small scalar. */ - brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ), - brw_imm_uw( 0xBC8F ) ); - brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ), - brw_imm_uw( 0xD0BD ) ); - brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ), - brw_imm_uw( 0x9B93 ) ); - brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ), - brw_imm_uw( 0xA359 ) ); - brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), - brw_imm_uw( 0xBC8F ) ); - - /* Temporarily disable the execution mask while we work with ExecSize=16 - channels (the mask is set for ExecSize=8 and is probably incorrect). - Although this might cause execution of unwanted channels, the code - writes only to temporary registers and has no side effects, so - disabling the mask is harmless. */ - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); - brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); - brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); - - /* We're now ready to perform the hashing. The eight hashes are - interleaved for performance. The hash function used is - designed to rapidly achieve avalanche and require only 16x16 - bit multiplication, and 8-bit swizzles (which we get for - free). */ - for( i = 0; i < 4; i++ ) - brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), - odd_bytes( wtmp[ i ] ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), - odd_bytes( wtmp[ i ] ) ); - brw_pop_insn_state( p ); - - /* Now we want to initialise the four rear gradients based on the - hashes. Format conversion from signed integer to float leaves - everything scaled too high by a factor of pow( 2, 15 ), but - we correct for that right at the end. */ - /* x component */ - brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); - brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); - brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); - brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param[ 0 ] ); - brw_MUL( p, x0y1, x0y1, param[ 0 ] ); - - /* y component */ - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - /* prepare t for the w component (used below): w the first time through - the loop; w - 1 the second time) */ - brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); - brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); - p->current->header.predicate_inverse = 1; - brw_MOV( p, t, param[ 3 ] ); - p->current->header.predicate_inverse = 0; - brw_set_predicate_control( p, BRW_PREDICATE_NONE ); - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); - - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - - /* z component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* w component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* Here we interpolate in the y dimension... */ - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); - brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); - brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. Leave the result in tmp[ 0 ] (see below)... */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); - brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); - - /* Now do the same thing for the front four gradients... */ - /* x component */ - brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); - brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); - brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param[ 0 ] ); - brw_MUL( p, x0y1, x0y1, param[ 0 ] ); - - /* y component */ - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) ); - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); - - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - - /* z component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - /* prepare t for the w component (used below): w the first time through - the loop; w - 1 the second time) */ - brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); - brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); - p->current->header.predicate_inverse = 1; - brw_MOV( p, t, param[ 3 ] ); - p->current->header.predicate_inverse = 0; - brw_set_predicate_control( p, BRW_PREDICATE_NONE ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* w component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* Interpolate in the y dimension: */ - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); - brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); - brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. The rear face is in tmp[ 0 ] (see above), so this - time put the front face in tmp[ 1 ] and we're nearly there... */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); - brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); - - /* Another interpolation, in the z dimension: */ - brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); - - /* Exit the loop if we've computed both cubes... */ - origin = p->nr_insn; - brw_push_insn_state( p ); - brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) ); - brw_pop_insn_state( p ); - - /* Save the result for the w=0 case, and increment the w coordinate: */ - brw_MOV( p, w0, tmp[ 0 ] ); - brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ), - brw_imm_uw( 1 ) ); - - /* Loop around for the other cube. Explicitly set the flag register - (unfortunately we must spend an extra instruction to do this: we - can't rely on a side effect of the previous MOV or ADD because - conditional modifiers which are normally true might be false in - exceptional circumstances, e.g. given a NaN input; the add to - brw_ip_reg() is not suitable because the IP is not an 8-vector). */ - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) ); - brw_ADD( p, brw_ip_reg(), brw_ip_reg(), - brw_imm_d( ( loop - p->nr_insn ) << 4 ) ); - brw_pop_insn_state( p ); - - /* Patch the previous conditional branch now that we know the - destination address. */ - brw_set_src1( p->store + origin, - brw_imm_d( ( p->nr_insn - origin ) << 4 ) ); - - /* The very last interpolation. */ - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 ); - - /* scale by pow( 2, -15 ), as described above */ - brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); - - release_tmps( c, mark ); -} - -static void emit_noise4( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - int mark = mark_tmps( c ); - - assert( mark == 0 ); - - src0 = get_src_reg( c, inst, 0, 0 ); - src1 = get_src_reg( c, inst, 0, 1 ); - src2 = get_src_reg( c, inst, 0, 2 ); - src3 = get_src_reg( c, inst, 0, 3 ); - - param0 = alloc_tmp( c ); - param1 = alloc_tmp( c ); - param2 = alloc_tmp( c ); - param3 = alloc_tmp( c ); - - brw_MOV( p, param0, src0 ); - brw_MOV( p, param1, src1 ); - brw_MOV( p, param2, src2 ); - brw_MOV( p, param3, src3 ); - - invoke_subroutine( c, SUB_NOISE4, noise4_sub ); - - /* Fill in the result: */ - brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) - brw_set_saturate( p, 0 ); - - release_tmps( c, mark ); -} static void emit_wpos_xy(struct brw_wm_compile *c, const struct prog_instruction *inst) @@ -2543,19 +1507,18 @@ static void emit_wpos_xy(struct brw_wm_compile *c, * X and Y channels. */ if (mask & WRITEMASK_X) { - /* X' = X - origin_x */ - brw_ADD(p, + /* X' = X */ + brw_MOV(p, dst[0], - retype(src0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(0 - c->key.origin_x)); + retype(src0[0], BRW_REGISTER_TYPE_W)); } if (mask & WRITEMASK_Y) { - /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + /* Y' = height - 1 - Y */ brw_ADD(p, dst[1], negate(retype(src0[1], BRW_REGISTER_TYPE_W)), - brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + brw_imm_d(c->key.drawable_height - 1)); } } @@ -2827,7 +1790,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_trunc(c, inst); break; case OPCODE_MOV: - case OPCODE_SWZ: emit_mov(c, inst); break; case OPCODE_DP3: @@ -2903,18 +1865,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_MAD: emit_mad(c, inst); break; - case OPCODE_NOISE1: - emit_noise1(c, inst); - break; - case OPCODE_NOISE2: - emit_noise2(c, inst); - break; - case OPCODE_NOISE3: - emit_noise3(c, inst); - break; - case OPCODE_NOISE4: - emit_noise4(c, inst); - break; case OPCODE_TEX: emit_tex(c, inst); break; diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 6279258339..0c411b57f5 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -422,7 +422,6 @@ void brw_wm_pass0( struct brw_wm_compile *c ) */ switch (inst->Opcode) { case OPCODE_MOV: - case OPCODE_SWZ: if (!inst->SaturateMode) { pass0_precalc_mov(c, inst); } diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index b449394029..d940ec09a9 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -120,7 +120,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) GLuint writemask; GLuint read0, read1, read2; - if (inst->opcode == OPCODE_KIL) { + if (inst->opcode == TGSI_OPCODE_KIL) { track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */ continue; } @@ -154,76 +154,75 @@ void brw_wm_pass1( struct brw_wm_compile *c ) /* Mark all inputs which contribute to the marked outputs: */ switch (inst->opcode) { - case OPCODE_ABS: - case OPCODE_FLR: - case OPCODE_FRC: - case OPCODE_MOV: - case OPCODE_SWZ: - case OPCODE_TRUNC: + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_FRC: + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_TRUNC: read0 = writemask; break; - case OPCODE_SUB: - case OPCODE_SLT: - case OPCODE_SLE: - case OPCODE_SGE: - case OPCODE_SGT: - case OPCODE_SEQ: - case OPCODE_SNE: - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MUL: read0 = writemask; read1 = writemask; break; - case OPCODE_DDX: - case OPCODE_DDY: + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: read0 = writemask; break; - case OPCODE_MAD: - case OPCODE_CMP: - case OPCODE_LRP: + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_CMP: + case TGSI_OPCODE_LRP: read0 = writemask; read1 = writemask; read2 = writemask; break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ; if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ; if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY; read1 = read0; break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - case OPCODE_SCS: + case TGSI_OPCODE_COS: + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_SCS: case WM_CINTERP: case WM_PIXELXY: read0 = WRITEMASK_X; break; - case OPCODE_POW: + case TGSI_OPCODE_POW: read0 = WRITEMASK_X; read1 = WRITEMASK_X; break; - case OPCODE_TEX: - case OPCODE_TXP: + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: read0 = get_texcoord_mask(inst->tex_idx); if (inst->tex_shadow) read0 |= WRITEMASK_Z; break; - case OPCODE_TXB: + case TGSI_OPCODE_TXB: /* Shadow ignored for txb. */ read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W; @@ -254,28 +253,28 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read2 = WRITEMASK_W; /* pixel w */ break; - case OPCODE_DP3: + case TGSI_OPCODE_DP3: read0 = WRITEMASK_XYZ; read1 = WRITEMASK_XYZ; break; - case OPCODE_DPH: + case TGSI_OPCODE_DPH: read0 = WRITEMASK_XYZ; read1 = WRITEMASK_XYZW; break; - case OPCODE_DP4: + case TGSI_OPCODE_DP4: read0 = WRITEMASK_XYZW; read1 = WRITEMASK_XYZW; break; - case OPCODE_LIT: + case TGSI_OPCODE_LIT: read0 = WRITEMASK_XYW; break; - case OPCODE_DST: + case TGSI_OPCODE_DST: case WM_FRONTFACING: - case OPCODE_KIL_NV: + case TGSI_OPCODE_KIL_NV: default: break; } diff --git a/src/gallium/drivers/i965/intel_chipset.h b/src/gallium/drivers/i965/intel_chipset.h index 3dc8653a73..3c38f1676c 100644 --- a/src/gallium/drivers/i965/intel_chipset.h +++ b/src/gallium/drivers/i965/intel_chipset.h @@ -66,7 +66,6 @@ #define PCI_CHIP_Q45_G 0x2E12 #define PCI_CHIP_G45_G 0x2E22 #define PCI_CHIP_G41_G 0x2E32 -#define PCI_CHIP_B43_G 0x2E42 #define PCI_CHIP_ILD_G 0x0042 #define PCI_CHIP_ILM_G 0x0046 @@ -84,8 +83,7 @@ #define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ devid == PCI_CHIP_Q45_G || \ devid == PCI_CHIP_G45_G || \ - devid == PCI_CHIP_G41_G || \ - devid == PCI_CHIP_B43_G) + devid == PCI_CHIP_G41_G) #define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) #define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) -- cgit v1.2.3 From 074606a806df755ecbb84e0a1182c66fd0b2a8dd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 24 Oct 2009 13:18:34 +0100 Subject: i965g: more files compiling --- src/gallium/drivers/i965/brw_batchbuffer.h | 124 ++++++++++++ src/gallium/drivers/i965/brw_cc.c | 16 +- src/gallium/drivers/i965/brw_clip.c | 80 +++----- src/gallium/drivers/i965/brw_clip.h | 7 +- src/gallium/drivers/i965/brw_clip_unfilled.c | 2 +- src/gallium/drivers/i965/brw_clip_util.c | 2 +- src/gallium/drivers/i965/brw_context.c | 2 +- src/gallium/drivers/i965/brw_context.h | 89 ++++----- src/gallium/drivers/i965/brw_curbe.c | 10 +- src/gallium/drivers/i965/brw_defines.h | 4 +- src/gallium/drivers/i965/brw_draw.c | 12 +- src/gallium/drivers/i965/brw_draw_upload.c | 2 +- src/gallium/drivers/i965/brw_eu.h | 32 +++- src/gallium/drivers/i965/brw_eu_emit.c | 4 +- src/gallium/drivers/i965/brw_gs.c | 2 +- src/gallium/drivers/i965/brw_gs_emit.c | 2 +- src/gallium/drivers/i965/brw_misc_state.c | 2 +- src/gallium/drivers/i965/brw_pipe_flush.c | 2 +- src/gallium/drivers/i965/brw_pipe_query.c | 4 +- src/gallium/drivers/i965/brw_pipe_rast.c | 46 +++++ src/gallium/drivers/i965/brw_pipe_rast.h | 14 ++ src/gallium/drivers/i965/brw_pipe_shader.c | 159 ++++++++++++++++ src/gallium/drivers/i965/brw_reg.h | 79 ++++++++ src/gallium/drivers/i965/brw_screen.h | 78 ++++++++ src/gallium/drivers/i965/brw_screen_surface.c | 4 +- src/gallium/drivers/i965/brw_sf.c | 2 +- src/gallium/drivers/i965/brw_sf.h | 1 - src/gallium/drivers/i965/brw_sf_emit.c | 2 +- src/gallium/drivers/i965/brw_state.h | 2 +- src/gallium/drivers/i965/brw_state_batch.c | 6 +- src/gallium/drivers/i965/brw_state_cache.c | 2 +- src/gallium/drivers/i965/brw_state_upload.c | 2 +- src/gallium/drivers/i965/brw_tex_layout.c | 2 +- src/gallium/drivers/i965/brw_urb.c | 2 +- src/gallium/drivers/i965/brw_util.h | 5 +- src/gallium/drivers/i965/brw_vs.c | 3 +- src/gallium/drivers/i965/brw_vs.h | 1 - src/gallium/drivers/i965/brw_vs_emit.c | 82 ++++---- src/gallium/drivers/i965/brw_winsys.h | 243 ++++++++++++++++++++++++ src/gallium/drivers/i965/brw_wm.h | 1 - src/gallium/drivers/i965/brw_wm_debug.c | 2 +- src/gallium/drivers/i965/brw_wm_emit.c | 84 ++++---- src/gallium/drivers/i965/brw_wm_fp.c | 60 +++--- src/gallium/drivers/i965/brw_wm_pass0.c | 1 - src/gallium/drivers/i965/brw_wm_pass1.c | 68 +++---- src/gallium/drivers/i965/brw_wm_surface_state.c | 2 +- src/gallium/drivers/i965/intel_batchbuffer.h | 168 ---------------- 47 files changed, 1027 insertions(+), 492 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_batchbuffer.h create mode 100644 src/gallium/drivers/i965/brw_pipe_rast.c create mode 100644 src/gallium/drivers/i965/brw_pipe_rast.h create mode 100644 src/gallium/drivers/i965/brw_pipe_shader.c create mode 100644 src/gallium/drivers/i965/brw_reg.h create mode 100644 src/gallium/drivers/i965/brw_screen.h create mode 100644 src/gallium/drivers/i965/brw_winsys.h delete mode 100644 src/gallium/drivers/i965/intel_batchbuffer.h (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h new file mode 100644 index 0000000000..76b3c1bf69 --- /dev/null +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -0,0 +1,124 @@ +#ifndef BRW_BATCHBUFFER_H +#define BRW_BATCHBUFFER_H + +#include "brw_types.h" +#include "brw_winsys.h" +#include "brw_reg.h" + +#define BATCH_SZ 16384 +#define BATCH_RESERVED 16 + +/* All ignored: + */ +enum cliprect_mode { + IGNORE_CLIPRECTS, + LOOP_CLIPRECTS, + NO_LOOP_CLIPRECTS, + REFERENCES_CLIPRECTS +}; + +void brw_batchbuffer_free(struct brw_batchbuffer *batch); + +void _brw_batchbuffer_flush(struct brw_batchbuffer *batch, + const char *file, int line); + +#define brw_batchbuffer_flush(batch) \ + _brw_batchbuffer_flush(batch, __FILE__, __LINE__) + +void brw_batchbuffer_reset(struct brw_batchbuffer *batch); + + +/* Unlike bmBufferData, this currently requires the buffer be mapped. + * Consider it a convenience function wrapping multple + * intel_buffer_dword() calls. + */ +void brw_batchbuffer_data(struct brw_batchbuffer *batch, + const void *data, GLuint bytes, + enum cliprect_mode cliprect_mode); + +void brw_batchbuffer_release_space(struct brw_batchbuffer *batch, + GLuint bytes); + +GLboolean brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, + struct brw_winsys_buffer *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); + +/* Inline functions - might actually be better off with these + * non-inlined. Certainly better off switching all command packets to + * be passed as structs rather than dwords, but that's a little bit of + * work... + */ +static INLINE GLint +brw_batchbuffer_space(struct brw_batchbuffer *batch) +{ + return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); +} + + +static INLINE void +brw_batchbuffer_emit_dword(struct brw_batchbuffer *batch, GLuint dword) +{ + assert(batch->map); + assert(brw_batchbuffer_space(batch) >= 4); + *(GLuint *) (batch->ptr) = dword; + batch->ptr += 4; +} + +static INLINE boolean +brw_batchbuffer_require_space(struct brw_batchbuffer *batch, + GLuint sz, + enum cliprect_mode cliprect_mode) +{ + assert(sz < batch->size - 8); + if (brw_batchbuffer_space(batch) < sz) { + assert(0); + return FALSE; + } + + /* All commands should be executed once regardless of cliprect + * mode. + */ + (void)cliprect_mode; +} + +/* Here are the crusty old macros, to be removed: + */ +#define BATCH_LOCALS + +#define BEGIN_BATCH(n, cliprect_mode) do { \ + brw_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \ + assert(intel->batch->emit.start_ptr == NULL); \ + intel->batch->emit.total = (n) * 4; \ + intel->batch->emit.start_ptr = intel->batch->ptr; \ +} while (0) + +#define OUT_BATCH(d) brw_batchbuffer_emit_dword(intel->batch, d) + +#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ + assert((unsigned) (delta) < buf->size); \ + brw_batchbuffer_emit_reloc(intel->batch, buf, \ + read_domains, write_domain, delta); \ +} while (0) + +#define ADVANCE_BATCH() do { \ + unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \ + assert(intel->batch->emit.start_ptr != NULL); \ + if (_n != intel->batch->emit.total) { \ + fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", \ + _n, intel->batch->emit.total); \ + abort(); \ + } \ + intel->batch->emit.start_ptr = NULL; \ +} while(0) + + +static INLINE void +brw_batchbuffer_emit_mi_flush(struct brw_batchbuffer *batch) +{ + brw_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS); + brw_batchbuffer_emit_dword(batch, MI_FLUSH); +} + +#endif diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index bf2743ebbe..c8e7851d75 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -65,7 +65,7 @@ static void prepare_cc_vp( struct brw_context *brw ) memset(&ccv, 0, sizeof(ccv)); /* PIPE_NEW_VIEWPORT */ - calc_sane_viewport( &brw->vp, &svp ); + calc_sane_viewport( &brw->curr.vp, &svp ); ccv.min_depth = svp.near; ccv.max_depth = svp.far; @@ -109,13 +109,13 @@ static void cc_unit_populate_key(const struct brw_context *brw, struct brw_cc_unit_key *key) { - key->cc0 = brw->dsa->cc0; - key->cc1 = brw->dsa->cc1; - key->cc2 = brw->dsa->cc2; - key->cc3 = combine_cc3( brw->dsa->cc3, brw->blend->cc3 ); - key->cc5 = brw->blend->cc5; - key->cc6 = brw->blend->cc6; - key->cc7 = brw->blend->cc7; + key->cc0 = brw->curr.dsa->cc0; + key->cc1 = brw->curr.dsa->cc1; + key->cc2 = brw->curr.dsa->cc2; + key->cc3 = combine_cc3( brw->curr.dsa->cc3, brw->curr.blend->cc3 ); + key->cc5 = brw->curr.blend->cc5; + key->cc6 = brw->curr.blend->cc6; + key->cc7 = brw->curr.blend->cc7; } /** diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index d82ebeb9a9..591e904705 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -33,13 +33,14 @@ #include "util/u_math.h" -#include "intel_batchbuffer.h" - +#include "brw_screen.h" +#include "brw_batchbuffer.h" #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" #include "brw_util.h" #include "brw_state.h" +#include "brw_pipe_rast.h" #include "brw_clip.h" @@ -77,13 +78,16 @@ static void compile_clip_prog( struct brw_context *brw, else delta = REG_SIZE; - for (i = 0; i < VERT_RESULT_MAX; i++) - if (c.key.attrs & (1<curr.rast->clip_key, sizeof key); + /* BRW_NEW_REDUCED_PRIMITIVE */ key.primitive = brw->reduced_primitive; - /* CACHE_NEW_VS_PROG */ - key.attrs = brw->vs.prog_data->outputs_written; - /* PIPE_NEW_RAST */ - key.do_flat_shading = brw->rast.base.flatshade; - /* PIPE_NEW_UCP */ - key.nr_userclip = brw->nr_ucp; - if (BRW_IS_IGDNG(brw)) - key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; - else - key.clip_mode = BRW_CLIPMODE_NORMAL; + /* PIPE_NEW_VS */ + key.nr_attrs = brw->curr.vs->info.file_max[TGSI_FILE_OUTPUT] + 1; - /* PIPE_NEW_RAST */ - if (key.primitive == PIPE_PRIM_TRIANGLES) { - if (brw->rast->cull_mode = PIPE_WINDING_BOTH) - key.clip_mode = BRW_CLIPMODE_REJECT_ALL; - else { - key.fill_ccw = CLIP_CULL; - key.fill_cw = CLIP_CULL; - - if (!(brw->rast->cull_mode & PIPE_WINDING_CCW)) { - key.fill_ccw = translate_fill(brw->rast.fill_ccw); - } - - if (!(brw->rast->cull_mode & PIPE_WINDING_CW)) { - key.fill_cw = translate_fill(brw->rast.fill_cw); - } - - if (key.fill_cw != CLIP_FILL || - key.fill_ccw != CLIP_FILL) { - key.do_unfilled = 1; - key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; - } - - key.offset_ccw = brw->rast.offset_ccw; - key.offset_cw = brw->rast.offset_cw; - - if (brw->rast.light_twoside && - key.fill_cw != CLIP_CULL) - key.copy_bfc_cw = 1; - - if (brw->rast.light_twoside && - key.fill_ccw != CLIP_CULL) - key.copy_bfc_ccw = 1; - } - } - } + /* PIPE_NEW_CLIP */ + key.nr_userclip = brw->curr.ucp.nr; brw->sws->bo_unreference(brw->clip.prog_bo); brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG, @@ -212,7 +178,7 @@ static void upload_clip_prog(struct brw_context *brw) const struct brw_tracked_state brw_clip_prog = { .dirty = { .mesa = (PIPE_NEW_RAST | - PIPE_NEW_UCP), + PIPE_NEW_CLIP), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index d80ec819b9..cfe51bf292 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -42,8 +42,7 @@ * up polygon offset and flatshading at this point: */ struct brw_clip_prog_key { - GLuint attrs:32; - + GLuint nr_attrs:5; GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; @@ -55,7 +54,7 @@ struct brw_clip_prog_key { GLuint copy_bfc_cw:1; GLuint copy_bfc_ccw:1; GLuint clip_mode:3; - GLuint pad1:12; + GLuint pad1:7; GLfloat offset_factor; GLfloat offset_units; @@ -117,7 +116,7 @@ struct brw_clip_compile { GLuint last_mrf; GLuint header_position_offset; - GLuint offset[VERT_ATTRIB_MAX]; + GLuint offset[PIPE_MAX_SHADER_OUTPUTS]; GLboolean need_ff_sync; }; diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c index 4baff55806..8501599aef 100644 --- a/src/gallium/drivers/i965/brw_clip_unfilled.c +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -29,7 +29,7 @@ * Keith Whitwell */ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "brw_defines.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 7a6c46ce07..60bfd3538e 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -93,7 +93,7 @@ void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) /* value.xyz *= value.rhw */ brw_set_access_mode(p, BRW_ALIGN_16); - brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); + brw_MUL(p, brw_writemask(pos, BRW_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); brw_set_access_mode(p, BRW_ALIGN_1); } diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index 063ada5772..07a5420d6e 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -38,7 +38,7 @@ #include "brw_state.h" #include "brw_vs.h" #include "brw_screen_tex.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 6699d3bdb6..3a2fece45c 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -36,6 +36,8 @@ #include "brw_structs.h" #include "brw_winsys.h" #include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "tgsi/tgsi_scan.h" /* Glossary: @@ -143,6 +145,27 @@ struct brw_blend_state { }; +struct brw_rasterizer_state; + + +struct brw_vertex_shader { + const struct tgsi_token *tokens; + struct tgsi_shader_info info; + + struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; +}; + + +struct brw_fragment_shader { + const struct tgsi_token *tokens; + struct tgsi_shader_info info; + + GLboolean isGLSL; + + struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; +}; @@ -157,6 +180,7 @@ struct brw_blend_state { #define PIPE_NEW_VERTEX_SHADER 0x2 #define PIPE_NEW_FRAGMENT_CONSTS 0x2 #define PIPE_NEW_VERTEX_CONSTS 0x2 +#define PIPE_NEW_CLIP 0x2 #define BRW_NEW_URB_FENCE 0x1 @@ -196,25 +220,6 @@ struct brw_state_flags { }; -struct brw_vertex_program { - const struct tgsi_token *tokens; - GLuint id; - struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ - GLboolean use_const_buffer; -}; - - -/** Subclass of Mesa fragment program */ -struct brw_fragment_program { - const struct tgsi_token *tokens; - - GLuint id; /**< serial no. to identify frag progs, never re-used */ - GLboolean isGLSL; /**< any IF/LOOP/CONT/BREAK instructions */ - - struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ - GLboolean use_const_buffer; -}; - /* Data about a particular attempt to compile a program. Note that * there can be many of these, each in a different GL state @@ -452,24 +457,29 @@ struct brw_query_object { */ struct brw_context { - struct pipe_context *pipe; - struct pipe_screen *screen; - + struct pipe_context pipe; + + struct brw_screen *brw_screen; struct brw_winsys_screen *sws; GLuint primitive; + GLuint reduced_primitive; GLboolean emit_state_always; GLboolean no_batch_wrap; /* Active vertex program: */ - const struct gl_vertex_program *vertex_program; - const struct gl_fragment_program *fragment_program; - struct pipe_framebuffer_state fb; - struct brw_depth_stencil_alpha_state *dsa; - struct brw_blend_state *blend; - struct pipe_viewport_state vp; + struct { + const struct brw_vertex_shader *vs; + const struct brw_fragment_shader *fs; + const struct brw_blend_state *blend; + const struct brw_rasterizer_state *rast; + const struct brw_depth_stencil_alpha_state *dsa; + struct pipe_framebuffer_state fb; + struct pipe_viewport_state vp; + struct pipe_clip_state ucp; + } curr; struct { struct brw_state_flags dirty; @@ -719,29 +729,6 @@ brw_context( struct pipe_context *ctx ) return (struct brw_context *)ctx; } -static INLINE struct brw_vertex_program * -brw_vertex_program(struct gl_vertex_program *p) -{ - return (struct brw_vertex_program *) p; -} - -static INLINE const struct brw_vertex_program * -brw_vertex_program_const(const struct gl_vertex_program *p) -{ - return (const struct brw_vertex_program *) p; -} - -static INLINE struct brw_fragment_program * -brw_fragment_program(struct gl_fragment_program *p) -{ - return (struct brw_fragment_program *) p; -} - -static INLINE const struct brw_fragment_program * -brw_fragment_program_const(const struct gl_fragment_program *p) -{ - return (const struct brw_fragment_program *) p; -} diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 33ea9a00f7..f2524d75e2 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -30,7 +30,7 @@ */ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "intel_regions.h" #include "brw_context.h" #include "brw_defines.h" @@ -55,8 +55,8 @@ static void calculate_curbe_offsets( struct brw_context *brw ) GLuint nr_clip_regs = 0; GLuint total_regs; - /* PIPE_NEW_UCP */ - if (brw->nr_ucp) { + /* PIPE_NEW_CLIP */ + if (brw->curr.ucp.nr) { GLuint nr_planes = 6 + brw->nr_ucp; nr_clip_regs = (nr_planes * 4 + 15) / 16; } @@ -106,7 +106,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) const struct brw_tracked_state brw_curbe_offsets = { .dirty = { - .mesa = PIPE_NEW_UCP, + .mesa = PIPE_NEW_CLIP, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = CACHE_NEW_WM_PROG }, @@ -327,7 +327,7 @@ const struct brw_tracked_state brw_constant_buffer = { .dirty = { .mesa = (PIPE_NEW_FS_CONSTANTS | PIPE_NEW_VS_CONSTANTS | - PIPE_NEW_UCP), + PIPE_NEW_CLIP), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h index 282c5b18f4..1dc64ddc8f 100644 --- a/src/gallium/drivers/i965/brw_defines.h +++ b/src/gallium/drivers/i965/brw_defines.h @@ -840,8 +840,8 @@ #include "intel_chipset.h" -#define BRW_IS_G4X(brw) (IS_G4X((brw)->brw_screen->deviceID)) -#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->brw_screen->deviceID)) +#define BRW_IS_G4X(brw) (IS_G4X((brw)->brw_screen->pci_id)) +#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->brw_screen->pci_id)) #define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) #define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) #define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 856999f3ef..741537309a 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -31,7 +31,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "intel_buffer_objects.h" #define FILE_DEBUG_FLAG DEBUG_BATCH @@ -133,7 +133,7 @@ static void brw_emit_prim(struct brw_context *brw, ADVANCE_BATCH(); } if (prim_packet.verts_per_instance) { - intel_batchbuffer_data( brw->intel.batch, &prim_packet, + brw_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); } if (intel->always_flush_cache) { @@ -224,7 +224,7 @@ static GLboolean brw_try_draw_prims( struct brw_context *brw, return ret; if (intel->always_flush_batch) - intel_batchbuffer_flush(intel->batch); + brw_batchbuffer_flush(intel->batch); return 0; } @@ -249,12 +249,10 @@ void brw_draw_prims( struct brw_context *brw, */ ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); - /* Otherwise, we really are out of memory. Pass the drawing - * command to the software tnl module and which will in turn call - * swrast to do the drawing. + /* Otherwise, flush and retry: */ if (ret != 0) { - intel_batchbuffer_flush(intel->batch); + brw_batchbuffer_flush(intel->batch); ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); assert(ret == 0); } diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index dce015d79f..1ab65d60c4 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -35,7 +35,7 @@ #include "brw_state.h" #include "brw_fallback.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "intel_buffer_objects.h" #include "intel_tex.h" diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h index 30603bdd0e..46d52a473b 100644 --- a/src/gallium/drivers/i965/brw_eu.h +++ b/src/gallium/drivers/i965/brw_eu.h @@ -35,7 +35,6 @@ #include "brw_structs.h" #include "brw_defines.h" -#include "shader/prog_instruction.h" #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) @@ -45,6 +44,23 @@ #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) +#define BRW_WRITEMASK_NONE 0x00 +#define BRW_WRITEMASK_X 0x01 +#define BRW_WRITEMASK_Y 0x02 +#define BRW_WRITEMASK_XY 0x03 +#define BRW_WRITEMASK_Z 0x04 +#define BRW_WRITEMASK_XZ 0x05 +#define BRW_WRITEMASK_YZ 0x06 +#define BRW_WRITEMASK_XYZ 0x07 +#define BRW_WRITEMASK_W 0x08 +#define BRW_WRITEMASK_XW 0x09 +#define BRW_WRITEMASK_YW 0x0A +#define BRW_WRITEMASK_XYW 0x0B +#define BRW_WRITEMASK_ZW 0x0C +#define BRW_WRITEMASK_XZW 0x0D +#define BRW_WRITEMASK_YZW 0x0E +#define BRW_WRITEMASK_XYZW 0x0F + #define REG_SIZE (8*4) @@ -157,7 +173,7 @@ static INLINE int type_sz( GLuint type ) * \param width one of BRW_WIDTH_x * \param hstride one of BRW_HORIZONTAL_STRIDE_x * \param swizzle one of BRW_SWIZZLE_x - * \param writemask WRITEMASK_X/Y/Z/W bitfield + * \param writemask BRW_WRITEMASK_X/Y/Z/W bitfield */ static INLINE struct brw_reg brw_reg( GLuint file, GLuint nr, @@ -215,7 +231,7 @@ static INLINE struct brw_reg brw_vec16_reg( GLuint file, BRW_WIDTH_16, BRW_HORIZONTAL_STRIDE_1, BRW_SWIZZLE_XYZW, - WRITEMASK_XYZW); + BRW_WRITEMASK_XYZW); } /** Construct float[8] register */ @@ -231,7 +247,7 @@ static INLINE struct brw_reg brw_vec8_reg( GLuint file, BRW_WIDTH_8, BRW_HORIZONTAL_STRIDE_1, BRW_SWIZZLE_XYZW, - WRITEMASK_XYZW); + BRW_WRITEMASK_XYZW); } /** Construct float[4] register */ @@ -247,7 +263,7 @@ static INLINE struct brw_reg brw_vec4_reg( GLuint file, BRW_WIDTH_4, BRW_HORIZONTAL_STRIDE_1, BRW_SWIZZLE_XYZW, - WRITEMASK_XYZW); + BRW_WRITEMASK_XYZW); } /** Construct float[2] register */ @@ -263,7 +279,7 @@ static INLINE struct brw_reg brw_vec2_reg( GLuint file, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_1, BRW_SWIZZLE_XYXY, - WRITEMASK_XY); + BRW_WRITEMASK_XY); } /** Construct float[1] register */ @@ -279,7 +295,7 @@ static INLINE struct brw_reg brw_vec1_reg( GLuint file, BRW_WIDTH_1, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_XXXX, - WRITEMASK_X); + BRW_WRITEMASK_X); } @@ -510,7 +526,7 @@ static INLINE struct brw_reg brw_ip_reg( void ) BRW_WIDTH_1, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_XYZW, /* NOTE! */ - WRITEMASK_XYZW); /* NOTE! */ + BRW_WRITEMASK_XYZW); /* NOTE! */ } static INLINE struct brw_reg brw_acc_reg( void ) diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index 241cdc33f8..f6b8843e01 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -1276,7 +1276,7 @@ void brw_SAMPLE(struct brw_compile *p, * instruction, so that is a guide for whether a workaround is * needed. */ - if (writemask != WRITEMASK_XYZW) { + if (writemask != BRW_WRITEMASK_XYZW) { GLuint dst_offset = 0; GLuint i, newmask = 0, len = 0; @@ -1299,7 +1299,7 @@ void brw_SAMPLE(struct brw_compile *p, else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); - newmask = ~newmask & WRITEMASK_XYZW; + newmask = ~newmask & BRW_WRITEMASK_XYZW; brw_push_insn_state(p); diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 58930e7964..692ce46679 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -29,7 +29,7 @@ * Keith Whitwell */ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "brw_defines.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c index 9ec206d7e8..fd8e2acced 100644 --- a/src/gallium/drivers/i965/brw_gs_emit.c +++ b/src/gallium/drivers/i965/brw_gs_emit.c @@ -30,7 +30,7 @@ */ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "brw_defines.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index d33bf40a01..eb39be8545 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -31,7 +31,7 @@ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "intel_regions.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index d5b7bd3b83..e85a1a9c1b 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -1,6 +1,6 @@ /** - * called from intel_batchbuffer_flush and children before sending a + * called from brw_batchbuffer_flush and children before sending a * batchbuffer off. */ static void brw_finish_batch(struct intel_context *intel) diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c index 0b9ba0c0ed..55242ac6ad 100644 --- a/src/gallium/drivers/i965/brw_pipe_query.c +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -42,7 +42,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "intel_reg.h" /** Waits on the query object's BO and totals the results for this query */ @@ -122,7 +122,7 @@ brw_end_query(struct pipe_context *pipe, struct pipe_query *q) */ if (query->bo) { brw_emit_query_end(brw); - intel_batchbuffer_flush(brw->batch); + brw_batchbuffer_flush(brw->batch); brw->sws->bo_unreference(brw->query.bo); brw->query.bo = NULL; diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c new file mode 100644 index 0000000000..ff64dbd48d --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_rast.c @@ -0,0 +1,46 @@ + +static void +calculate_clip_key_rast() +{ + if (BRW_IS_IGDNG(brw)) + key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; + else + key.clip_mode = BRW_CLIPMODE_NORMAL; + + key.do_flat_shading = brw->rast->templ.flatshade; + + if (key.primitive == PIPE_PRIM_TRIANGLES) { + if (brw->rast->templ.cull_mode = PIPE_WINDING_BOTH) + key.clip_mode = BRW_CLIPMODE_REJECT_ALL; + else { + key.fill_ccw = CLIP_CULL; + key.fill_cw = CLIP_CULL; + + if (!(brw->rast->templ.cull_mode & PIPE_WINDING_CCW)) { + key.fill_ccw = translate_fill(brw->rast.fill_ccw); + } + + if (!(brw->rast->templ.cull_mode & PIPE_WINDING_CW)) { + key.fill_cw = translate_fill(brw->rast.fill_cw); + } + + if (key.fill_cw != CLIP_FILL || + key.fill_ccw != CLIP_FILL) { + key.do_unfilled = 1; + key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + } + + key.offset_ccw = brw->rast.templ.offset_ccw; + key.offset_cw = brw->rast.templ.offset_cw; + + if (brw->rast.templ.light_twoside && + key.fill_cw != CLIP_CULL) + key.copy_bfc_cw = 1; + + if (brw->rast.templ.light_twoside && + key.fill_ccw != CLIP_CULL) + key.copy_bfc_ccw = 1; + } + } + } +} diff --git a/src/gallium/drivers/i965/brw_pipe_rast.h b/src/gallium/drivers/i965/brw_pipe_rast.h new file mode 100644 index 0000000000..6ceaa1fb09 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_rast.h @@ -0,0 +1,14 @@ +#ifndef BRW_PIPE_RAST_H +#define BRW_PIPE_RAST_H + +#include "brw_clip.h" + +struct brw_rasterizer_state { + struct pipe_rasterizer_state templ; /* for draw module */ + + /* Precalculated hardware state: + */ + struct brw_clip_prog_key clip_key; +}; + +#endif diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c new file mode 100644 index 0000000000..fbb772d18c --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -0,0 +1,159 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_context.h" +#include "brw_util.h" +#include "brw_wm.h" + +static void brwBindProgram( struct brw_context *brw, + GLenum target, + struct gl_program *prog ) +{ + struct brw_context *brw = brw_context(ctx); + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + break; + case GL_FRAGMENT_PROGRAM_ARB: + brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; + break; + } +} + +static struct gl_program *brwNewProgram( structg brw_context *brw, + GLenum target, + GLuint id ) +{ + struct brw_context *brw = brw_context(ctx); + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: { + struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program); + if (prog) { + prog->id = brw->program_id++; + + return _mesa_init_vertex_program( ctx, &prog->program, + target, id ); + } + else + return NULL; + } + + case GL_FRAGMENT_PROGRAM_ARB: { + struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program); + if (prog) { + prog->id = brw->program_id++; + + return _mesa_init_fragment_program( ctx, &prog->program, + target, id ); + } + else + return NULL; + } + + default: + return _mesa_new_program(ctx, target, id); + } +} + +static void brwDeleteProgram( struct brw_context *brw, + struct gl_program *prog ) +{ + if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; + struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); + brw->sws->bo_unreference(brw_fprog->const_buffer); + } + + _mesa_delete_program( ctx, prog ); +} + + +static GLboolean brwIsProgramNative( struct brw_context *brw, + GLenum target, + struct gl_program *prog ) +{ + return GL_TRUE; +} + +static void brwProgramStringNotify( struct brw_context *brw, + GLenum target, + struct gl_program *prog ) +{ + struct brw_context *brw = brw_context(ctx); + + if (target == GL_FRAGMENT_PROGRAM_ARB) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; + struct brw_fragment_program *newFP = brw_fragment_program(fprog); + const struct brw_fragment_program *curFP = + brw_fragment_program_const(brw->fragment_program); + + if (fprog->FogOption) { + _mesa_append_fog_code(ctx, fprog); + fprog->FogOption = GL_NONE; + } + + if (newFP == curFP) + brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; + newFP->id = brw->program_id++; + newFP->isGLSL = brw_wm_is_glsl(fprog); + } + else if (target == GL_VERTEX_PROGRAM_ARB) { + struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; + struct brw_vertex_program *newVP = brw_vertex_program(vprog); + const struct brw_vertex_program *curVP = + brw_vertex_program_const(brw->vertex_program); + + if (newVP == curVP) + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + if (newVP->program.IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, &newVP->program); + } + newVP->id = brw->program_id++; + + /* Also tell tnl about it: + */ + _tnl_program_string(ctx, target, prog); + } +} + +void brwInitFragProgFuncs( struct dd_function_table *functions ) +{ + assert(functions->ProgramStringNotify == _tnl_program_string); + + functions->BindProgram = brwBindProgram; + functions->NewProgram = brwNewProgram; + functions->DeleteProgram = brwDeleteProgram; + functions->IsProgramNative = brwIsProgramNative; + functions->ProgramStringNotify = brwProgramStringNotify; +} + diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h new file mode 100644 index 0000000000..a640104d71 --- /dev/null +++ b/src/gallium/drivers/i965/brw_reg.h @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_REG_H +#define BRW_REG_H + +#define CMD_MI (0x0 << 29) +#define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) + +#define MI_NOOP (CMD_MI | 0) +#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) +#define MI_FLUSH (CMD_MI | (4 << 23)) + +#define _3DSTATE_DRAWRECT_INFO_I965 (CMD_3D | (3 << 27) | (1 << 24) | 0x2) + +/** @{ + * + * PIPE_CONTROL operation, a combination MI_FLUSH and register write with + * additional flushing control. + */ +#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | 2) +#define PIPE_CONTROL_NO_WRITE (0 << 14) +#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14) +#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14) +#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14) +#define PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define PIPE_CONTROL_WRITE_FLUSH (1 << 12) +#define PIPE_CONTROL_INSTRUCTION_FLUSH (1 << 11) +#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) +#define PIPE_CONTROL_PPGTT_WRITE (0 << 2) +#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) + +/** @} */ + +#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6) +#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4) +#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6) + +/* BR00 */ +#define XY_BLT_WRITE_ALPHA (1 << 21) +#define XY_BLT_WRITE_RGB (1 << 20) +#define XY_SRC_TILED (1 << 15) +#define XY_DST_TILED (1 << 11) + +/* BR13 */ +#define BR13_565 (0x1 << 24) +#define BR13_8888 (0x3 << 24) + +#define FENCE_LINEAR 0 +#define FENCE_XMAJOR 1 +#define FENCE_YMAJOR 2 + + +#endif diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h new file mode 100644 index 0000000000..716b55c52b --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen.h @@ -0,0 +1,78 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_SCREEN_H +#define BRW_SCREEN_H + +#include "pipe/p_state.h" +#include "pipe/p_screen.h" + + +struct brw_winsys_screen; + + +/** + * Subclass of pipe_screen + */ +struct brw_screen +{ + struct pipe_screen base; + + struct brw_winsys_screen *sws; + + boolean is_i945; + uint pci_id; +}; + +/** + * Subclass of pipe_transfer + */ +struct brw_transfer +{ + struct pipe_transfer base; + + unsigned offset; +}; + + +/* + * Cast wrappers + */ +static INLINE struct brw_screen * +brw_screen(struct pipe_screen *pscreen) +{ + return (struct brw_screen *) pscreen; +} + +static INLINE struct brw_transfer * +brw_transfer(struct pipe_transfer *transfer) +{ + return (struct brw_transfer *)transfer; +} + + +#endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c index d199d0b81a..544be6a089 100644 --- a/src/gallium/drivers/i965/brw_screen_surface.c +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -1,6 +1,6 @@ /* _NEW_BUFFERS */ - if (IS_965(intel->intelScreen->deviceID) && - !IS_G4X(intel->intelScreen->deviceID)) { + if (IS_965(brw->brw_screen->pci_id) && + !IS_G4X(brw->brw_screen->pci_id)) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; struct intel_renderbuffer *irb = intel_renderbuffer(rb); diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index 0115f77c08..54202cbd12 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -30,7 +30,7 @@ */ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "brw_defines.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h index 26c2e8891a..c99116b8b1 100644 --- a/src/gallium/drivers/i965/brw_sf.h +++ b/src/gallium/drivers/i965/brw_sf.h @@ -34,7 +34,6 @@ #define BRW_SF_H -#include "shader/program.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index c98d7ec13a..4acb2b7d72 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -30,7 +30,7 @@ */ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "brw_defines.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index b716097bfc..02657eaba7 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -157,7 +157,7 @@ void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer /*********************************************************************** * brw_state_batch.c */ -#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) +#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) GLboolean brw_cached_batch_struct( struct brw_context *brw, diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c index 9568794625..b285837070 100644 --- a/src/gallium/drivers/i965/brw_state_batch.c +++ b/src/gallium/drivers/i965/brw_state_batch.c @@ -32,7 +32,7 @@ #include "brw_state.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" @@ -47,7 +47,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, struct header *newheader = (struct header *)data; if (brw->emit_state_always) { - intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } @@ -74,7 +74,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, emit: memcpy(item->header, newheader, sz); - intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c index 91d0f80297..1b5f27cc16 100644 --- a/src/gallium/drivers/i965/brw_state_cache.c +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -57,7 +57,7 @@ */ #include "brw_state.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" /* XXX: Fixme - have to include these to get the sizes of the prog_key * structs: diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index b68b6cb21a..842380e38f 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -33,7 +33,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" /* This is used to initialize brw->state.atoms[]. We could use this * list directly except for a single atom, brw_constant_buffer, which diff --git a/src/gallium/drivers/i965/brw_tex_layout.c b/src/gallium/drivers/i965/brw_tex_layout.c index 75cdc18912..813cd31f49 100644 --- a/src/gallium/drivers/i965/brw_tex_layout.c +++ b/src/gallium/drivers/i965/brw_tex_layout.c @@ -47,7 +47,7 @@ GLboolean brw_miptree_layout(struct brw_context *brw, switch (mt->target) { case GL_TEXTURE_CUBE_MAP: - if (IS_IGDNG(intel->intelScreen->deviceID)) { + if (IS_IGDNG(brw->brw_screen->pci_id)) { GLuint align_h = 2, align_w = 4; GLuint level; GLuint x = 0; diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c index 8c6f4355a6..18d79c5ebb 100644 --- a/src/gallium/drivers/i965/brw_urb.c +++ b/src/gallium/drivers/i965/brw_urb.c @@ -31,7 +31,7 @@ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_util.h b/src/gallium/drivers/i965/brw_util.h index 37c3acbc11..b5f9a36e7b 100644 --- a/src/gallium/drivers/i965/brw_util.h +++ b/src/gallium/drivers/i965/brw_util.h @@ -36,9 +36,8 @@ #include "brw_types.h" extern GLuint brw_count_bits( GLuint val ); -extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList); -extern GLuint brw_translate_blend_factor( GLenum factor ); -extern GLuint brw_translate_blend_equation( GLenum mode ); +extern GLuint brw_translate_blend_factor( unsigned factor ); +extern GLuint brw_translate_blend_equation( unsigned mode ); diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index 97e523c3ee..dcd687ac34 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -34,7 +34,6 @@ #include "brw_vs.h" #include "brw_util.h" #include "brw_state.h" -#include "shader/prog_print.h" @@ -113,7 +112,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = PIPE_NEW_UCP | PIPE_NEW_RAST, + .mesa = PIPE_NEW_CLIP | PIPE_NEW_RAST, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h index 4a591365c9..54f7d7d7c4 100644 --- a/src/gallium/drivers/i965/brw_vs.h +++ b/src/gallium/drivers/i965/brw_vs.h @@ -36,7 +36,6 @@ #include "brw_context.h" #include "brw_eu.h" -#include "shader/program.h" struct brw_vs_prog_key { diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 6adb743017..e946944295 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -192,7 +192,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) BRW_WIDTH_8, BRW_HORIZONTAL_STRIDE_1, BRW_SWIZZLE_XXXX, - WRITEMASK_X); + BRW_WRITEMASK_X); reg++; } @@ -487,7 +487,7 @@ static void emit_exp_noalias( struct brw_vs_compile *c, struct brw_compile *p = &c->func; - if (dst.dw1.bits.writemask & WRITEMASK_X) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_X) { struct brw_reg tmp = get_tmp(c); struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); @@ -499,23 +499,23 @@ static void emit_exp_noalias( struct brw_vs_compile *c, /* Adjust exponent for floating point: * exp += 127 */ - brw_ADD(p, brw_writemask(tmp_d, WRITEMASK_X), tmp_d, brw_imm_d(127)); + brw_ADD(p, brw_writemask(tmp_d, BRW_WRITEMASK_X), tmp_d, brw_imm_d(127)); /* Install exponent and sign. * Excess drops off the edge: */ - brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), WRITEMASK_X), + brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), BRW_WRITEMASK_X), tmp_d, brw_imm_d(23)); release_tmp(c, tmp); } - if (dst.dw1.bits.writemask & WRITEMASK_Y) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y) { /* result[1] = arg0.x - floor(arg0.x) */ - brw_FRC(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0, 0)); + brw_FRC(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0, 0)); } - if (dst.dw1.bits.writemask & WRITEMASK_Z) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) { /* As with the LOG instruction, we might be better off just * doing a taylor expansion here, seeing as we have to do all * the prep work. @@ -525,14 +525,14 @@ static void emit_exp_noalias( struct brw_vs_compile *c, */ emit_math1(c, BRW_MATH_FUNCTION_EXP, - brw_writemask(dst, WRITEMASK_Z), + brw_writemask(dst, BRW_WRITEMASK_Z), brw_swizzle1(arg0, 0), BRW_MATH_PRECISION_FULL); } - if (dst.dw1.bits.writemask & WRITEMASK_W) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) { /* result[3] = 1.0; */ - brw_MOV(p, brw_writemask(dst, WRITEMASK_W), brw_imm_f(1)); + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), brw_imm_f(1)); } } @@ -562,36 +562,36 @@ static void emit_log_noalias( struct brw_vs_compile *c, * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 * result[1].i = (x.i & ((1<<23)-1) + (127<<23) */ - if (dst.dw1.bits.writemask & WRITEMASK_XZ) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_XZ) { brw_AND(p, - brw_writemask(tmp_ud, WRITEMASK_X), + brw_writemask(tmp_ud, BRW_WRITEMASK_X), brw_swizzle1(arg0_ud, 0), brw_imm_ud((1U<<31)-1)); brw_SHR(p, - brw_writemask(tmp_ud, WRITEMASK_X), + brw_writemask(tmp_ud, BRW_WRITEMASK_X), tmp_ud, brw_imm_ud(23)); brw_ADD(p, - brw_writemask(tmp, WRITEMASK_X), + brw_writemask(tmp, BRW_WRITEMASK_X), retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ brw_imm_d(-127)); } - if (dst.dw1.bits.writemask & WRITEMASK_YZ) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_YZ) { brw_AND(p, - brw_writemask(tmp_ud, WRITEMASK_Y), + brw_writemask(tmp_ud, BRW_WRITEMASK_Y), brw_swizzle1(arg0_ud, 0), brw_imm_ud((1<<23)-1)); brw_OR(p, - brw_writemask(tmp_ud, WRITEMASK_Y), + brw_writemask(tmp_ud, BRW_WRITEMASK_Y), tmp_ud, brw_imm_ud(127<<23)); } - if (dst.dw1.bits.writemask & WRITEMASK_Z) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) { /* result[2] = result[0] + LOG2(result[1]); */ /* Why bother? The above is just a hint how to do this with a @@ -606,19 +606,19 @@ static void emit_log_noalias( struct brw_vs_compile *c, */ emit_math1(c, BRW_MATH_FUNCTION_LOG, - brw_writemask(tmp, WRITEMASK_Z), + brw_writemask(tmp, BRW_WRITEMASK_Z), brw_swizzle1(tmp, 1), BRW_MATH_PRECISION_FULL); brw_ADD(p, - brw_writemask(tmp, WRITEMASK_Z), + brw_writemask(tmp, BRW_WRITEMASK_Z), brw_swizzle1(tmp, 2), brw_swizzle1(tmp, 0)); } - if (dst.dw1.bits.writemask & WRITEMASK_W) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) { /* result[3] = 1.0; */ - brw_MOV(p, brw_writemask(tmp, WRITEMASK_W), brw_imm_f(1)); + brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_W), brw_imm_f(1)); } if (need_tmp) { @@ -639,14 +639,14 @@ static void emit_dst_noalias( struct brw_vs_compile *c, /* There must be a better way to do this: */ - if (dst.dw1.bits.writemask & WRITEMASK_X) - brw_MOV(p, brw_writemask(dst, WRITEMASK_X), brw_imm_f(1.0)); - if (dst.dw1.bits.writemask & WRITEMASK_Y) - brw_MUL(p, brw_writemask(dst, WRITEMASK_Y), arg0, arg1); - if (dst.dw1.bits.writemask & WRITEMASK_Z) - brw_MOV(p, brw_writemask(dst, WRITEMASK_Z), arg0); - if (dst.dw1.bits.writemask & WRITEMASK_W) - brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1); + if (dst.dw1.bits.writemask & BRW_WRITEMASK_X) + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_X), brw_imm_f(1.0)); + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y) + brw_MUL(p, brw_writemask(dst, BRW_WRITEMASK_Y), arg0, arg1); + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Z), arg0); + if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), arg1); } @@ -672,8 +672,8 @@ static void emit_lit_noalias( struct brw_vs_compile *c, if (need_tmp) tmp = get_tmp(c); - brw_MOV(p, brw_writemask(dst, WRITEMASK_YZ), brw_imm_f(0)); - brw_MOV(p, brw_writemask(dst, WRITEMASK_XW), brw_imm_f(1)); + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_YZ), brw_imm_f(0)); + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_XW), brw_imm_f(1)); /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order * to get all channels active inside the IF. In the clipping code @@ -683,15 +683,15 @@ static void emit_lit_noalias( struct brw_vs_compile *c, brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0)); if_insn = brw_IF(p, BRW_EXECUTE_8); { - brw_MOV(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0,0)); + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0,0)); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0)); - brw_MOV(p, brw_writemask(tmp, WRITEMASK_Z), brw_swizzle1(arg0,1)); + brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_Z), brw_swizzle1(arg0,1)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); emit_math2(c, BRW_MATH_FUNCTION_POW, - brw_writemask(dst, WRITEMASK_Z), + brw_writemask(dst, BRW_WRITEMASK_Z), brw_swizzle1(tmp, 2), brw_swizzle1(arg0, 3), BRW_MATH_PRECISION_PARTIAL); @@ -1045,7 +1045,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* ndc = 1.0 / pos.w */ emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); /* ndc.xyz = pos * ndc */ - brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + brw_MUL(p, brw_writemask(ndc, BRW_WRITEMASK_XYZ), pos, ndc); /* Update the header for point size, user clipping flags, and -ve rhw * workaround. @@ -1062,14 +1062,14 @@ static void emit_vertex_write( struct brw_vs_compile *c) if (c->prog_data.outputs_written & (1<regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; - brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); - brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); + brw_MUL(p, brw_writemask(header1, BRW_WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); + brw_AND(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); } for (i = 0; i < c->key.nr_userclip; i++) { brw_set_conditionalmod(p, BRW_CONDITIONAL_L); brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); - brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<first_overflow_output > 0) { /* Not all of the vertex outputs/results fit into the MRF. * Move the overflowed attributes from the GRF to the MRF and diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h new file mode 100644 index 0000000000..2142db5a4d --- /dev/null +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -0,0 +1,243 @@ +/************************************************************************** + * + * Copyright © 2009 Jakob Bornecrantz + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_WINSYS_H +#define BRW_WINSYS_H + +#include "pipe/p_compiler.h" + +struct brw_winsys; +struct pipe_fence_handle; + +/* This currently just wraps dri_bo: + */ +struct brw_winsys_buffer { + struct brw_winsys_screen *sws; + void *bo; + unsigned offset; +}; + +enum brw_buffer_usage { + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_SAMPLER, + I915_GEM_DOMAIN_VERTEX, + I915_GEM_DOMAIN_INSTRUCTION, + + + /* XXX: migrate from domains to explicit usage cases, eg below: + */ + + /* use on textures */ + BRW_USAGE_RENDER = 0x01, + BRW_USAGE_SAMPLER = 0x02, + BRW_USAGE_2D_TARGET = 0x04, + BRW_USAGE_2D_SOURCE = 0x08, + /* use on vertex */ + BRW_USAGE_VERTEX = 0x10, +}; + +enum brw_buffer_type +{ + BRW_BUFFER_TYPE_TEXTURE, + BRW_BUFFER_TYPE_SCANOUT, /**< a texture used for scanning out from */ + BRW_BUFFER_TYPE_VERTEX, +}; + + +/* AKA winsys context: + */ +struct brw_batchbuffer { + + struct brw_winsys *iws; + struct brw_winsys_buffer *buf; + + /** + * Values exported to speed up the writing the batchbuffer, + * instead of having to go trough a accesor function for + * each dword written. + */ + /*{@*/ + uint8_t *map; + uint8_t *ptr; + size_t size; + + size_t relocs; + size_t max_relocs; + /*@}*/ +}; + +struct brw_winsys_screen { + + /** + * Batchbuffer functions. + */ + /*@{*/ + /** + * Create a new batchbuffer. + */ + struct brw_batchbuffer *(*batchbuffer_create)(struct brw_winsys_screen *iws); + + /** + * Emit a relocation to a buffer. + * Target position in batchbuffer is the same as ptr. + */ + int (*batchbuffer_reloc)(struct brw_batchbuffer *batch, + unsigned offset, + struct brw_winsys_buffer *reloc, + unsigned pre_add, + enum brw_buffer_usage usage); + + /** + * Flush a bufferbatch. + */ + void (*batchbuffer_flush)(struct brw_batchbuffer *batch, + struct pipe_fence_handle **fence); + + /** + * Destroy a batchbuffer. + */ + void (*batchbuffer_destroy)(struct brw_batchbuffer *batch); + /*@}*/ + + + /** + * Buffer functions. + */ + /*@{*/ + /** + * Create a buffer. + */ + struct brw_winsys_buffer *(*buffer_create)(struct brw_winsys *iws, + unsigned size, + unsigned alignment, + enum brw_buffer_type type); + + + /* Reference and unreference buffers: + */ + void (*bo_reference)( struct brw_winsys_buffer *buffer ); + void (*bo_unreference)( struct brw_winsys_buffer *buffer ); + void (*bo_emit_reloc)( struct brw_winsys_buffer *buffer, + unsigned domain, + unsigned a, + unsigned b, + unsigned offset, + struct brw_winsys_buffer *b2); + + /** + * Map a buffer. + */ + void *(*buffer_map)(struct brw_winsys *iws, + struct brw_winsys_buffer *buffer, + boolean write); + + /** + * Unmap a buffer. + */ + void (*buffer_unmap)(struct brw_winsys *iws, + struct brw_winsys_buffer *buffer); + + /** + * Write to a buffer. + * + * Arguments follows pipe_buffer_write. + */ + int (*buffer_write)(struct brw_winsys *iws, + struct brw_winsys_buffer *dst, + size_t offset, + size_t size, + const void *data); + + void (*buffer_destroy)(struct brw_winsys *iws, + struct brw_winsys_buffer *buffer); + /*@}*/ + + + /** + * Fence functions. + */ + /*@{*/ + /** + * Reference fence and set ptr to fence. + */ + void (*fence_reference)(struct brw_winsys *iws, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence); + + /** + * Check if a fence has finished. + */ + int (*fence_signalled)(struct brw_winsys *iws, + struct pipe_fence_handle *fence); + + /** + * Wait on a fence to finish. + */ + int (*fence_finish)(struct brw_winsys *iws, + struct pipe_fence_handle *fence); + /*@}*/ + + + /** + * Destroy the winsys. + */ + void (*destroy)(struct brw_winsys *iws); +}; + + +/** + * Create i915 pipe_screen. + */ +struct pipe_screen *i915_create_screen(struct brw_winsys *iws, unsigned pci_id); + +/** + * Create a i915 pipe_context. + */ +struct pipe_context *i915_create_context(struct pipe_screen *screen); + +/** + * Get the brw_winsys buffer backing the texture. + * + * TODO UGLY + */ +struct pipe_texture; +boolean i915_get_texture_buffer_brw(struct pipe_texture *texture, + struct brw_winsys_buffer **buffer, + unsigned *stride); + +/** + * Wrap a brw_winsys buffer with a texture blanket. + * + * TODO UGLY + */ +struct pipe_texture * i915_texture_blanket_brw(struct pipe_screen *screen, + struct pipe_texture *tmplt, + unsigned pitch, + struct brw_winsys_buffer *buffer); + + + + +#endif diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 756a680150..18775830f9 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -34,7 +34,6 @@ #define BRW_WM_H -#include "shader/prog_instruction.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c index 220821087c..c6659646f2 100644 --- a/src/gallium/drivers/i965/brw_wm_debug.c +++ b/src/gallium/drivers/i965/brw_wm_debug.c @@ -98,7 +98,7 @@ void brw_wm_print_insn( struct brw_wm_compile *c, } _mesa_printf("]"); - if (inst->writemask != WRITEMASK_XYZW) + if (inst->writemask != BRW_WRITEMASK_XYZW) _mesa_printf(".%s%s%s%s", GET_BIT(inst->writemask, 0) ? "x" : "", GET_BIT(inst->writemask, 1) ? "y" : "", diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index fec33f74eb..7df9b79d7a 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -72,14 +72,14 @@ static void emit_pixel_xy(struct brw_compile *p, /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ - if (mask & WRITEMASK_X) { + if (mask & BRW_WRITEMASK_X) { brw_ADD(p, vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } - if (mask & WRITEMASK_Y) { + if (mask & BRW_WRITEMASK_Y) { brw_ADD(p, vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw,5), 2, 4, 0), @@ -101,14 +101,14 @@ static void emit_delta_xy(struct brw_compile *p, /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers. */ - if (mask & WRITEMASK_X) { + if (mask & BRW_WRITEMASK_X) { brw_ADD(p, dst[0], retype(arg0[0], BRW_REGISTER_TYPE_UW), negate(r1)); } - if (mask & WRITEMASK_Y) { + if (mask & BRW_WRITEMASK_Y) { brw_ADD(p, dst[1], retype(arg0[1], BRW_REGISTER_TYPE_UW), @@ -124,7 +124,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; - if (mask & WRITEMASK_X) { + if (mask & BRW_WRITEMASK_X) { /* X' = X */ brw_MOV(p, dst[0], @@ -133,7 +133,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c, /* XXX: is this needed any more, or is this a NOOP? */ - if (mask & WRITEMASK_Y) { + if (mask & BRW_WRITEMASK_Y) { /* Y' = height - 1 - Y */ brw_ADD(p, dst[1], @@ -152,7 +152,7 @@ static void emit_pixel_w( struct brw_compile *p, /* Don't need this if all you are doing is interpolating color, for * instance. */ - if (mask & WRITEMASK_W) { + if (mask & BRW_WRITEMASK_W) { struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the @@ -255,7 +255,7 @@ static void emit_frontfacing( struct brw_compile *p, struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); GLuint i; - if (!(mask & WRITEMASK_XYZW)) + if (!(mask & BRW_WRITEMASK_XYZW)) return; for (i = 0; i < 4; i++) { @@ -321,26 +321,26 @@ void emit_ddxy(struct brw_compile *p, BRW_VERTICAL_STRIDE_2, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW); src1 = brw_reg(arg0[i].file, arg0[i].nr, 0, BRW_REGISTER_TYPE_F, BRW_VERTICAL_STRIDE_2, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW); } else { src0 = brw_reg(arg0[i].file, arg0[i].nr, 0, BRW_REGISTER_TYPE_F, BRW_VERTICAL_STRIDE_4, BRW_WIDTH_4, BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW); src1 = brw_reg(arg0[i].file, arg0[i].nr, 2, BRW_REGISTER_TYPE_F, BRW_VERTICAL_STRIDE_4, BRW_WIDTH_4, BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW); } brw_ADD(p, dst[i], src0, negate(src1)); } @@ -611,12 +611,12 @@ static void emit_dp3( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; - if (!(mask & WRITEMASK_XYZW)) + if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & WRITEMASK_XYZW)); + assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); @@ -633,12 +633,12 @@ static void emit_dp4( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; - if (!(mask & WRITEMASK_XYZW)) + if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & WRITEMASK_XYZW)); + assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); @@ -656,12 +656,12 @@ static void emit_dph( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + const int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; - if (!(mask & WRITEMASK_XYZW)) + if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & WRITEMASK_XYZW)); + assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); @@ -681,7 +681,7 @@ static void emit_xpd( struct brw_compile *p, { GLuint i; - assert(!(mask & WRITEMASK_W) == WRITEMASK_X); + assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_X); for (i = 0 ; i < 3; i++) { if (mask & (1<tex_idx) { case TEXTURE_1D_INDEX: - emit = WRITEMASK_X; + emit = BRW_WRITEMASK_X; nr = 1; break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: - emit = WRITEMASK_XY; + emit = BRW_WRITEMASK_XY; nr = 2; break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: - emit = WRITEMASK_XYZ; + emit = BRW_WRITEMASK_XYZ; nr = 3; break; default: @@ -815,7 +815,7 @@ static void emit_tex( struct brw_wm_compile *c, if (inst->tex_shadow) { nr = 4; - emit |= WRITEMASK_W; + emit |= BRW_WRITEMASK_W; } msgLength = 1; @@ -922,18 +922,18 @@ static void emit_lit( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { - assert((mask & WRITEMASK_XW) == 0); + assert((mask & BRW_WRITEMASK_XW) == 0); - if (mask & WRITEMASK_Y) { + if (mask & BRW_WRITEMASK_Y) { brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_MOV(p, dst[1], arg0[0]); brw_set_saturate(p, 0); } - if (mask & WRITEMASK_Z) { + if (mask & BRW_WRITEMASK_Z) { emit_math2(p, BRW_MATH_FUNCTION_POW, &dst[2], - WRITEMASK_X | (mask & SATURATE), + BRW_WRITEMASK_X | (mask & SATURATE), &arg0[1], &arg0[3]); } @@ -944,10 +944,10 @@ static void emit_lit( struct brw_compile *p, */ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0)); { - if (mask & WRITEMASK_Y) + if (mask & BRW_WRITEMASK_Y) brw_MOV(p, dst[1], brw_imm_f(0)); - if (mask & WRITEMASK_Z) + if (mask & BRW_WRITEMASK_Z) brw_MOV(p, dst[2], brw_imm_f(0)); } brw_set_predicate_control(p, BRW_PREDICATE_NONE); @@ -1414,10 +1414,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* There is an scs math function, but it would need some * fixup for 16-element execution. */ - if (dst_flags & WRITEMASK_X) - emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); - if (dst_flags & WRITEMASK_Y) - emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + if (dst_flags & BRW_WRITEMASK_X) + emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]); + if (dst_flags & BRW_WRITEMASK_Y) + emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]); break; case OPCODE_POW: diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 5f47d86f71..be240031c7 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -115,7 +115,7 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx) struct prog_dst_register reg; reg.File = file; reg.Index = idx; - reg.WriteMask = WRITEMASK_XYZW; + reg.WriteMask = BRW_WRITEMASK_XYZW; reg.RelAddr = 0; reg.CondMask = COND_TR; reg.CondSwizzle = 0; @@ -249,7 +249,7 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) */ emit_op(c, WM_PIXELXY, - dst_mask(pixel_xy, WRITEMASK_XY), + dst_mask(pixel_xy, BRW_WRITEMASK_XY), 0, payload_r0_depth, src_undef(), @@ -272,7 +272,7 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) */ emit_op(c, WM_DELTAXY, - dst_mask(delta_xy, WRITEMASK_XY), + dst_mask(delta_xy, BRW_WRITEMASK_XY), 0, pixel_xy, payload_r0_depth, @@ -295,7 +295,7 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) */ emit_op(c, WM_PIXELW, - dst_mask(pixel_w, WRITEMASK_W), + dst_mask(pixel_w, BRW_WRITEMASK_W), 0, interp_wpos, deltas, @@ -327,13 +327,13 @@ static void emit_interp( struct brw_wm_compile *c, */ emit_op(c, WM_WPOSXY, - dst_mask(dst, WRITEMASK_XY), + dst_mask(dst, BRW_WRITEMASK_XY), 0, get_pixel_xy(c), src_undef(), src_undef()); - dst = dst_mask(dst, WRITEMASK_ZW); + dst = dst_mask(dst, BRW_WRITEMASK_ZW); /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw */ @@ -370,7 +370,7 @@ static void emit_interp( struct brw_wm_compile *c, /* Interpolate the fog coordinate */ emit_op(c, WM_PINTERP, - dst_mask(dst, WRITEMASK_X), + dst_mask(dst, BRW_WRITEMASK_X), 0, interp, deltas, @@ -378,7 +378,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, TGSI_OPCODE_MOV, - dst_mask(dst, WRITEMASK_YZW), + dst_mask(dst, BRW_WRITEMASK_YZW), 0, src_swizzle(interp, SWIZZLE_ZERO, @@ -393,7 +393,7 @@ static void emit_interp( struct brw_wm_compile *c, /* XXX review/test this case */ emit_op(c, WM_FRONTFACING, - dst_mask(dst, WRITEMASK_X), + dst_mask(dst, BRW_WRITEMASK_X), 0, src_undef(), src_undef(), @@ -404,7 +404,7 @@ static void emit_interp( struct brw_wm_compile *c, /* XXX review/test this case */ emit_op(c, WM_PINTERP, - dst_mask(dst, WRITEMASK_XY), + dst_mask(dst, BRW_WRITEMASK_XY), 0, interp, deltas, @@ -412,7 +412,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, TGSI_OPCODE_MOV, - dst_mask(dst, WRITEMASK_ZW), + dst_mask(dst, BRW_WRITEMASK_ZW), 0, src_swizzle(interp, SWIZZLE_ZERO, @@ -518,19 +518,19 @@ static void precalc_dst( struct brw_wm_compile *c, struct prog_src_register src1 = inst->SrcReg[1]; struct prog_dst_register dst = inst->DstReg; - if (dst.WriteMask & WRITEMASK_Y) { + if (dst.WriteMask & BRW_WRITEMASK_Y) { /* dst.y = mul src0.y, src1.y */ emit_op(c, TGSI_OPCODE_MUL, - dst_mask(dst, WRITEMASK_Y), + dst_mask(dst, BRW_WRITEMASK_Y), inst->SaturateMode, src0, src1, src_undef()); } - if (dst.WriteMask & WRITEMASK_XZ) { + if (dst.WriteMask & BRW_WRITEMASK_XZ) { struct prog_instruction *swz; GLuint z = GET_SWZ(src0.Swizzle, Z); @@ -538,7 +538,7 @@ static void precalc_dst( struct brw_wm_compile *c, */ swz = emit_op(c, TGSI_OPCODE_MOV, - dst_mask(dst, WRITEMASK_XZ), + dst_mask(dst, BRW_WRITEMASK_XZ), inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), src_undef(), @@ -546,12 +546,12 @@ static void precalc_dst( struct brw_wm_compile *c, /* Avoid letting negation flag of src0 affect our 1 constant. */ swz->SrcReg[0].Negate &= ~NEGATE_X; } - if (dst.WriteMask & WRITEMASK_W) { + if (dst.WriteMask & BRW_WRITEMASK_W) { /* dst.w = mov src1.w */ emit_op(c, TGSI_OPCODE_MOV, - dst_mask(dst, WRITEMASK_W), + dst_mask(dst, BRW_WRITEMASK_W), inst->SaturateMode, src1, src_undef(), @@ -566,14 +566,14 @@ static void precalc_lit( struct brw_wm_compile *c, struct prog_src_register src0 = inst->SrcReg[0]; struct prog_dst_register dst = inst->DstReg; - if (dst.WriteMask & WRITEMASK_XW) { + if (dst.WriteMask & BRW_WRITEMASK_XW) { struct prog_instruction *swz; /* dst.xw = swz src0.1111 */ swz = emit_op(c, TGSI_OPCODE_MOV, - dst_mask(dst, WRITEMASK_XW), + dst_mask(dst, BRW_WRITEMASK_XW), 0, src_swizzle1(src0, SWIZZLE_ONE), src_undef(), @@ -582,10 +582,10 @@ static void precalc_lit( struct brw_wm_compile *c, swz->SrcReg[0].Negate = NEGATE_NONE; } - if (dst.WriteMask & WRITEMASK_YZ) { + if (dst.WriteMask & BRW_WRITEMASK_YZ) { emit_op(c, TGSI_OPCODE_LIT, - dst_mask(dst, WRITEMASK_YZ), + dst_mask(dst, BRW_WRITEMASK_YZ), inst->SaturateMode, src0, src_undef(), @@ -649,7 +649,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp0 = 1 / tmp1 */ emit_op(c, TGSI_OPCODE_RCP, - dst_mask(tmp0, WRITEMASK_X), + dst_mask(tmp0, BRW_WRITEMASK_X), 0, tmp1src, src_undef(), @@ -740,7 +740,7 @@ static void precalc_tex( struct brw_wm_compile *c, */ emit_op(c, TGSI_OPCODE_ADD, - dst_mask(tmp, WRITEMASK_XYZ), + dst_mask(tmp, BRW_WRITEMASK_XYZ), 0, tmpsrc, C0, @@ -751,7 +751,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, TGSI_OPCODE_MUL, - dst_mask(tmp, WRITEMASK_Y), + dst_mask(tmp, BRW_WRITEMASK_Y), 0, tmpsrc, src_swizzle1(C0, W), @@ -766,7 +766,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, TGSI_OPCODE_MAD, - dst_mask(dst, WRITEMASK_XYZ), + dst_mask(dst, BRW_WRITEMASK_XYZ), 0, swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), C1, @@ -776,7 +776,7 @@ static void precalc_tex( struct brw_wm_compile *c, */ emit_op(c, TGSI_OPCODE_MAD, - dst_mask(dst, WRITEMASK_Y), + dst_mask(dst, BRW_WRITEMASK_Y), 0, src_swizzle1(tmpsrc, Z), src_swizzle1(C1, W), @@ -863,7 +863,7 @@ static void precalc_txp( struct brw_wm_compile *c, */ emit_op(c, TGSI_OPCODE_RCP, - dst_mask(tmp, WRITEMASK_W), + dst_mask(tmp, BRW_WRITEMASK_W), 0, src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), src_undef(), @@ -873,7 +873,7 @@ static void precalc_txp( struct brw_wm_compile *c, */ emit_op(c, TGSI_OPCODE_MUL, - dst_mask(tmp, WRITEMASK_XYZ), + dst_mask(tmp, BRW_WRITEMASK_XYZ), 0, src0, src_swizzle1(src_reg_from_dst(tmp), W), @@ -1053,7 +1053,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) out = emit_insn(c, inst); /* This should probably be done in the parser. */ - out->DstReg.WriteMask &= WRITEMASK_XY; + out->DstReg.WriteMask &= BRW_WRITEMASK_XY; break; case TGSI_OPCODE_DST: @@ -1082,7 +1082,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) out = emit_insn(c, inst); /* This should probably be done in the parser. */ - out->DstReg.WriteMask &= WRITEMASK_XYZ; + out->DstReg.WriteMask &= BRW_WRITEMASK_XYZ; break; case TGSI_OPCODE_KIL: diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 0c411b57f5..de5f5fe821 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -32,7 +32,6 @@ #include "brw_context.h" #include "brw_wm.h" -#include "shader/prog_parameter.h" diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index d940ec09a9..f2ae3a958f 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -91,15 +91,15 @@ static GLuint get_texcoord_mask( GLuint tex_idx ) { switch (tex_idx) { case TEXTURE_1D_INDEX: - return WRITEMASK_X; + return BRW_WRITEMASK_X; case TEXTURE_2D_INDEX: - return WRITEMASK_XY; + return BRW_WRITEMASK_XY; case TEXTURE_3D_INDEX: - return WRITEMASK_XYZ; + return BRW_WRITEMASK_XYZ; case TEXTURE_CUBE_INDEX: - return WRITEMASK_XYZ; + return BRW_WRITEMASK_XYZ; case TEXTURE_RECT_INDEX: - return WRITEMASK_XY; + return BRW_WRITEMASK_XY; default: return 0; } } @@ -121,16 +121,16 @@ void brw_wm_pass1( struct brw_wm_compile *c ) GLuint read0, read1, read2; if (inst->opcode == TGSI_OPCODE_KIL) { - track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */ + track_arg(c, inst, 0, BRW_WRITEMASK_XYZW); /* All args contribute to final */ continue; } if (inst->opcode == WM_FB_WRITE) { - track_arg(c, inst, 0, WRITEMASK_XYZW); - track_arg(c, inst, 1, WRITEMASK_XYZW); + track_arg(c, inst, 0, BRW_WRITEMASK_XYZW); + track_arg(c, inst, 1, BRW_WRITEMASK_XYZW); if (c->key.source_depth_to_render_target && c->key.computes_depth) - track_arg(c, inst, 2, WRITEMASK_Z); + track_arg(c, inst, 2, BRW_WRITEMASK_Z); else track_arg(c, inst, 2, 0); continue; @@ -191,9 +191,9 @@ void brw_wm_pass1( struct brw_wm_compile *c ) break; case TGSI_OPCODE_XPD: - if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ; - if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ; - if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY; + if (writemask & BRW_WRITEMASK_X) read0 |= BRW_WRITEMASK_YZ; + if (writemask & BRW_WRITEMASK_Y) read0 |= BRW_WRITEMASK_XZ; + if (writemask & BRW_WRITEMASK_Z) read0 |= BRW_WRITEMASK_XY; read1 = read0; break; @@ -206,12 +206,12 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case TGSI_OPCODE_SCS: case WM_CINTERP: case WM_PIXELXY: - read0 = WRITEMASK_X; + read0 = BRW_WRITEMASK_X; break; case TGSI_OPCODE_POW: - read0 = WRITEMASK_X; - read1 = WRITEMASK_X; + read0 = BRW_WRITEMASK_X; + read1 = BRW_WRITEMASK_X; break; case TGSI_OPCODE_TEX: @@ -219,57 +219,57 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read0 = get_texcoord_mask(inst->tex_idx); if (inst->tex_shadow) - read0 |= WRITEMASK_Z; + read0 |= BRW_WRITEMASK_Z; break; case TGSI_OPCODE_TXB: /* Shadow ignored for txb. */ - read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W; + read0 = get_texcoord_mask(inst->tex_idx) | BRW_WRITEMASK_W; break; case WM_WPOSXY: - read0 = writemask & WRITEMASK_XY; + read0 = writemask & BRW_WRITEMASK_XY; break; case WM_DELTAXY: - read0 = writemask & WRITEMASK_XY; - read1 = WRITEMASK_X; + read0 = writemask & BRW_WRITEMASK_XY; + read1 = BRW_WRITEMASK_X; break; case WM_PIXELW: - read0 = WRITEMASK_X; - read1 = WRITEMASK_XY; + read0 = BRW_WRITEMASK_X; + read1 = BRW_WRITEMASK_XY; break; case WM_LINTERP: - read0 = WRITEMASK_X; - read1 = WRITEMASK_XY; + read0 = BRW_WRITEMASK_X; + read1 = BRW_WRITEMASK_XY; break; case WM_PINTERP: - read0 = WRITEMASK_X; /* interpolant */ - read1 = WRITEMASK_XY; /* deltas */ - read2 = WRITEMASK_W; /* pixel w */ + read0 = BRW_WRITEMASK_X; /* interpolant */ + read1 = BRW_WRITEMASK_XY; /* deltas */ + read2 = BRW_WRITEMASK_W; /* pixel w */ break; case TGSI_OPCODE_DP3: - read0 = WRITEMASK_XYZ; - read1 = WRITEMASK_XYZ; + read0 = BRW_WRITEMASK_XYZ; + read1 = BRW_WRITEMASK_XYZ; break; case TGSI_OPCODE_DPH: - read0 = WRITEMASK_XYZ; - read1 = WRITEMASK_XYZW; + read0 = BRW_WRITEMASK_XYZ; + read1 = BRW_WRITEMASK_XYZW; break; case TGSI_OPCODE_DP4: - read0 = WRITEMASK_XYZW; - read1 = WRITEMASK_XYZW; + read0 = BRW_WRITEMASK_XYZW; + read1 = BRW_WRITEMASK_XYZW; break; case TGSI_OPCODE_LIT: - read0 = WRITEMASK_XYW; + read0 = BRW_WRITEMASK_XYW; break; case TGSI_OPCODE_DST: diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index 86dcb74b5b..5045c9b4a6 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -31,7 +31,7 @@ #include "intel_mipmap_tree.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "intel_tex.h" #include "intel_fbo.h" diff --git a/src/gallium/drivers/i965/intel_batchbuffer.h b/src/gallium/drivers/i965/intel_batchbuffer.h deleted file mode 100644 index be04656aec..0000000000 --- a/src/gallium/drivers/i965/intel_batchbuffer.h +++ /dev/null @@ -1,168 +0,0 @@ -#ifndef INTEL_BATCHBUFFER_H -#define INTEL_BATCHBUFFER_H - -#include "intel_bufmgr.h" -#include "intel_reg.h" - -#define BATCH_SZ 16384 -#define BATCH_RESERVED 16 - -enum cliprect_mode { - /** - * Batchbuffer contents may be looped over per cliprect, but do not - * require it. - */ - IGNORE_CLIPRECTS, - /** - * Batchbuffer contents require looping over per cliprect at batch submit - * time. - * - * This will be upgraded to NO_LOOP_CLIPRECTS when there's a single - * constant cliprect, as in DRI2 or FBO rendering. - */ - LOOP_CLIPRECTS, - /** - * Batchbuffer contents contain drawing that should not be executed multiple - * times. - */ - NO_LOOP_CLIPRECTS, - /** - * Batchbuffer contents contain drawing that already handles cliprects, such - * as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE. - * - * Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch - * outside of LOCK/UNLOCK. This is upgraded to just NO_LOOP_CLIPRECTS when - * there's a constant cliprect, as in DRI2 or FBO rendering. - */ - REFERENCES_CLIPRECTS -}; - -struct intel_batchbuffer -{ - struct intel_context *intel; - - struct brw_winsys_buffer *buf; - - GLubyte *buffer; - - GLubyte *map; - GLubyte *ptr; - - GLuint size; - - /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */ - struct { - GLuint total; - GLubyte *start_ptr; - } emit; - - GLuint dirty_state; -}; - -struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context - *intel); - -void intel_batchbuffer_free(struct intel_batchbuffer *batch); - - -void _intel_batchbuffer_flush(struct intel_batchbuffer *batch, - const char *file, int line); - -#define intel_batchbuffer_flush(batch) \ - _intel_batchbuffer_flush(batch, __FILE__, __LINE__) - -void intel_batchbuffer_reset(struct intel_batchbuffer *batch); - - -/* Unlike bmBufferData, this currently requires the buffer be mapped. - * Consider it a convenience function wrapping multple - * intel_buffer_dword() calls. - */ -void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, GLuint bytes, - enum cliprect_mode cliprect_mode); - -void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, - GLuint bytes); - -GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, - struct brw_winsys_buffer *buffer, - uint32_t read_domains, - uint32_t write_domain, - uint32_t offset); - -/* Inline functions - might actually be better off with these - * non-inlined. Certainly better off switching all command packets to - * be passed as structs rather than dwords, but that's a little bit of - * work... - */ -static INLINE GLint -intel_batchbuffer_space(struct intel_batchbuffer *batch) -{ - return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); -} - - -static INLINE void -intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword) -{ - assert(batch->map); - assert(intel_batchbuffer_space(batch) >= 4); - *(GLuint *) (batch->ptr) = dword; - batch->ptr += 4; -} - -static INLINE void -intel_batchbuffer_require_space(struct intel_batchbuffer *batch, - GLuint sz, - enum cliprect_mode cliprect_mode) -{ - assert(sz < batch->size - 8); - if (intel_batchbuffer_space(batch) < sz) - intel_batchbuffer_flush(batch); - - /* All commands should be executed once regardless of cliprect - * mode. - */ - (void)cliprect_mode; -} - -/* Here are the crusty old macros, to be removed: - */ -#define BATCH_LOCALS - -#define BEGIN_BATCH(n, cliprect_mode) do { \ - intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \ - assert(intel->batch->emit.start_ptr == NULL); \ - intel->batch->emit.total = (n) * 4; \ - intel->batch->emit.start_ptr = intel->batch->ptr; \ -} while (0) - -#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) - -#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ - assert((unsigned) (delta) < buf->size); \ - intel_batchbuffer_emit_reloc(intel->batch, buf, \ - read_domains, write_domain, delta); \ -} while (0) - -#define ADVANCE_BATCH() do { \ - unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \ - assert(intel->batch->emit.start_ptr != NULL); \ - if (_n != intel->batch->emit.total) { \ - fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", \ - _n, intel->batch->emit.total); \ - abort(); \ - } \ - intel->batch->emit.start_ptr = NULL; \ -} while(0) - - -static INLINE void -intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) -{ - intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS); - intel_batchbuffer_emit_dword(batch, MI_FLUSH); -} - -#endif -- cgit v1.2.3 From 4dd2f6640b70e2313f8771f7588aa49a861153aa Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 25 Oct 2009 00:02:16 +0100 Subject: i965g: more work on compiling, particularly the brw_draw files --- src/gallium/auxiliary/util/u_debug.c | 27 +++ src/gallium/auxiliary/util/u_prim.h | 2 + src/gallium/auxiliary/util/u_upload_mgr.h | 2 + src/gallium/drivers/i965/Makefile | 2 +- src/gallium/drivers/i965/brw_batchbuffer.c | 198 +++++++++++++++ src/gallium/drivers/i965/brw_batchbuffer.h | 14 +- src/gallium/drivers/i965/brw_cc.c | 8 +- src/gallium/drivers/i965/brw_clip.c | 4 +- src/gallium/drivers/i965/brw_clip_state.c | 4 +- src/gallium/drivers/i965/brw_context.c | 2 +- src/gallium/drivers/i965/brw_context.h | 68 ++++-- src/gallium/drivers/i965/brw_curbe.c | 13 +- src/gallium/drivers/i965/brw_draw.c | 165 +++++++------ src/gallium/drivers/i965/brw_draw.h | 3 +- src/gallium/drivers/i965/brw_draw_upload.c | 372 +++++++++++++++++------------ src/gallium/drivers/i965/brw_eu.c | 5 +- src/gallium/drivers/i965/brw_eu_debug.c | 13 +- src/gallium/drivers/i965/brw_misc_state.c | 18 +- src/gallium/drivers/i965/brw_pipe_flush.c | 3 + src/gallium/drivers/i965/brw_pipe_shader.c | 19 ++ src/gallium/drivers/i965/brw_pipe_vertex.c | 25 +- src/gallium/drivers/i965/brw_screen.h | 22 ++ src/gallium/drivers/i965/brw_sf.c | 2 +- src/gallium/drivers/i965/brw_sf_state.c | 39 +-- src/gallium/drivers/i965/brw_state.h | 6 +- src/gallium/drivers/i965/brw_state_batch.c | 4 +- src/gallium/drivers/i965/brw_swtnl.c | 6 +- src/gallium/drivers/i965/brw_winsys.h | 7 + src/gallium/drivers/i965/brw_wm.c | 2 +- src/gallium/drivers/i965/brw_wm.h | 8 +- src/gallium/drivers/i965/brw_wm_glsl.c | 28 --- src/gallium/drivers/i965/brw_wm_pass0.c | 32 +-- src/mesa/state_tracker/st_draw.c | 3 +- 33 files changed, 722 insertions(+), 404 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_batchbuffer.c (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 96d400c839..321ac59a7d 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -69,6 +69,7 @@ #include "util/u_stream.h" #include "util/u_math.h" #include "util/u_tile.h" +#include "util/u_prim.h" #ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY @@ -600,6 +601,32 @@ const char *pf_name( enum pipe_format format ) } + +static const struct debug_named_value pipe_prim_names[] = { +#ifdef DEBUG + DEBUG_NAMED_VALUE(PIPE_PRIM_POINTS), + DEBUG_NAMED_VALUE(PIPE_PRIM_LINES), + DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_LOOP), + DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_STRIP), + DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLES), + DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_STRIP), + DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_FAN), + DEBUG_NAMED_VALUE(PIPE_PRIM_QUADS), + DEBUG_NAMED_VALUE(PIPE_PRIM_QUAD_STRIP), + DEBUG_NAMED_VALUE(PIPE_PRIM_POLYGON), +#endif + DEBUG_NAMED_VALUE_END +}; + + +const char *u_prim_name( unsigned prim ) +{ + return debug_dump_enum(pipe_prim_names, prim); +} + + + + #ifdef DEBUG void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index a9b533eea7..7434329962 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -135,4 +135,6 @@ static INLINE unsigned u_reduced_prim( unsigned pipe_prim ) } } +const char *u_prim_name( unsigned pipe_prim ); + #endif diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h index 745b5834af..d414a1f2f6 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.h +++ b/src/gallium/auxiliary/util/u_upload_mgr.h @@ -32,6 +32,8 @@ #ifndef U_UPLOAD_MGR_H #define U_UPLOAD_MGR_H +#include "pipe/p_error.h" + struct pipe_screen; struct pipe_buffer; struct u_upload_mgr; diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 40c8364824..40e8aa8786 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -61,7 +61,7 @@ C_SOURCES = \ brw_wm_state.c \ brw_wm_surface_state.c \ brw_bo.c \ - intel_batchbuffer.c \ + brw_batchbuffer.c \ intel_tex_layout.c include ../../Makefile.template diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c new file mode 100644 index 0000000000..8bcac76ede --- /dev/null +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -0,0 +1,198 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "brw_batchbuffer.h" +#include "brw_decode.h" +#include "brw_reg.h" +#include "brw_winsys.h" + + +void +brw_batchbuffer_reset(struct brw_batchbuffer *batch) +{ + struct intel_context *intel = batch->intel; + + if (batch->buf != NULL) { + brw->sws->bo_unreference(batch->buf); + batch->buf = NULL; + } + + if (!batch->buffer && intel->ttm == GL_TRUE) + batch->buffer = malloc (intel->maxBatchSize); + + batch->buf = batch->sws->bo_alloc(batch->sws, + BRW_BUFFER_TYPE_BATCH, + intel->maxBatchSize, 4096); + if (batch->buffer) + batch->map = batch->buffer; + else { + batch->sws->bo_map(batch->buf, GL_TRUE); + batch->map = batch->buf->virtual; + } + batch->size = intel->maxBatchSize; + batch->ptr = batch->map; + batch->dirty_state = ~0; + batch->cliprect_mode = IGNORE_CLIPRECTS; +} + +struct brw_batchbuffer * +brw_batchbuffer_alloc(struct brw_winsys_screen *sws) +{ + struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer); + + batch->sws = sws; + brw_batchbuffer_reset(batch); + + return batch; +} + +void +brw_batchbuffer_free(struct brw_batchbuffer *batch) +{ + if (batch->map) { + dri_bo_unmap(batch->buf); + batch->map = NULL; + } + + brw->sws->bo_unreference(batch->buf); + batch->buf = NULL; + FREE(batch); +} + + +void +_brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file, + int line) +{ + struct intel_context *intel = batch->intel; + GLuint used = batch->ptr - batch->map; + + if (used == 0) + return; + + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch->buf; + batch->sws->bo_reference(intel->first_post_swapbuffers_batch); + } + + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch->buf; + batch->sws->bo_reference(intel->first_post_swapbuffers_batch); + } + + + if (INTEL_DEBUG & DEBUG_BATCH) + fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, + used); + + /* Emit a flush if the bufmgr doesn't do it for us. */ + if (intel->always_flush_cache || !intel->ttm) { + *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd(); + batch->ptr += 4; + used = batch->ptr - batch->map; + } + + /* Round batchbuffer usage to 2 DWORDs. */ + + if ((used & 4) == 0) { + *(GLuint *) (batch->ptr) = 0; /* noop */ + batch->ptr += 4; + used = batch->ptr - batch->map; + } + + /* Mark the end of the buffer. */ + *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */ + batch->ptr += 4; + used = batch->ptr - batch->map; + + batch->sws->bo_unmap(batch->buf); + + batch->map = NULL; + batch->ptr = NULL; + + batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 ); + + if (INTEL_DEBUG & DEBUG_BATCH) { + dri_bo_map(batch->buf, GL_FALSE); + intel_decode(batch->buf->virtual, used / 4, batch->buf->offset, + brw->brw_screen->pci_id); + dri_bo_unmap(batch->buf); + } + + if (INTEL_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "waiting for idle\n"); + dri_bo_map(batch->buf, GL_TRUE); + dri_bo_unmap(batch->buf); + } + + /* Reset the buffer: + */ + brw_batchbuffer_reset(batch); +} + + +/* This is the only way buffers get added to the validate list. + */ +GLboolean +brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, + struct brw_winsys_buffer *buffer, + uint32_t read_domains, uint32_t write_domain, + uint32_t delta) +{ + int ret; + + if (batch->ptr - batch->map > batch->buf->size) + _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n", + batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); + + ret = batch->sws->bo_emit_reloc(batch->buf, + read_domains, + write_domain, + delta, + batch->ptr - batch->map, + buffer); + + /* + * Using the old buffer offset, write in what the right data would be, in case + * the buffer doesn't move and we can short-circuit the relocation processing + * in the kernel + */ + brw_batchbuffer_emit_dword (batch, buffer->offset + delta); + + return GL_TRUE; +} + +void +brw_batchbuffer_data(struct brw_batchbuffer *batch, + const void *data, GLuint bytes, + enum cliprect_mode cliprect_mode) +{ + assert((bytes & 3) == 0); + brw_batchbuffer_require_space(batch, bytes); + __memcpy(batch->ptr, data, bytes); + batch->ptr += bytes; +} diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index b8492882e1..25bb9cefca 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -33,18 +33,16 @@ void brw_batchbuffer_reset(struct brw_batchbuffer *batch); * Consider it a convenience function wrapping multple * intel_buffer_dword() calls. */ -void brw_batchbuffer_data(struct brw_batchbuffer *batch, +int brw_batchbuffer_data(struct brw_batchbuffer *batch, const void *data, GLuint bytes, enum cliprect_mode cliprect_mode); -void brw_batchbuffer_release_space(struct brw_batchbuffer *batch, - GLuint bytes); -GLboolean brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, - struct brw_winsys_buffer *buffer, - uint32_t read_domains, - uint32_t write_domain, - uint32_t offset); +int brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, + struct brw_winsys_buffer *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); /* Inline functions - might actually be better off with these * non-inlined. Certainly better off switching all command packets to diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index 76759304eb..ca10bc73f6 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -57,7 +57,7 @@ static void calc_sane_viewport( const struct pipe_viewport_state *vp, svp->far = 1; } -static void prepare_cc_vp( struct brw_context *brw ) +static int prepare_cc_vp( struct brw_context *brw ) { struct brw_cc_viewport ccv; struct sane_viewport svp; @@ -72,6 +72,8 @@ static void prepare_cc_vp( struct brw_context *brw ) brw->sws->bo_unreference(brw->cc.vp_bo); brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); + + return 0; } const struct brw_tracked_state brw_cc_vp = { @@ -158,7 +160,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) return bo; } -static void prepare_cc_unit( struct brw_context *brw ) +static int prepare_cc_unit( struct brw_context *brw ) { struct brw_cc_unit_key key; @@ -172,6 +174,8 @@ static void prepare_cc_unit( struct brw_context *brw ) if (brw->cc.state_bo == NULL) brw->cc.state_bo = cc_unit_create_from_key(brw, &key); + + return 0; } const struct brw_tracked_state brw_cc_unit = { diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 622d9dba96..1a52fa771b 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -146,7 +146,7 @@ static void compile_clip_prog( struct brw_context *brw, /* Calculate interpolants for triangle and line rasterization. */ -static void upload_clip_prog(struct brw_context *brw) +static int upload_clip_prog(struct brw_context *brw) { struct brw_clip_prog_key key; @@ -173,6 +173,8 @@ static void upload_clip_prog(struct brw_context *brw) &brw->clip.prog_data); if (brw->clip.prog_bo == NULL) compile_clip_prog( brw, &key ); + + return 0; } diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 25b8c6372f..bf4e6f5103 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -159,7 +159,7 @@ clip_unit_create_from_key(struct brw_context *brw, return bo; } -static void upload_clip_unit( struct brw_context *brw ) +static int upload_clip_unit( struct brw_context *brw ) { struct brw_clip_unit_key key; @@ -173,6 +173,8 @@ static void upload_clip_unit( struct brw_context *brw ) if (brw->clip.state_bo == NULL) { brw->clip.state_bo = clip_unit_create_from_key(brw, &key); } + + return 0; } const struct brw_tracked_state brw_clip_unit = { diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index e9605bafe6..e10b7d8bf5 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -105,7 +105,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen) brw->state.dirty.mesa = ~0; brw->state.dirty.brw = ~0; - brw->emit_state_always = 0; + brw->flags.always_emit_state = 0; make_empty_list(&brw->query.active_head); diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index dd782fdba9..7ead641811 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -182,6 +182,8 @@ struct brw_fragment_shader { #define PIPE_NEW_FRAGMENT_CONSTANTS 0x2 #define PIPE_NEW_VERTEX_CONSTANTS 0x2 #define PIPE_NEW_CLIP 0x2 +#define PIPE_NEW_INDEX_BUFFER 0x2 +#define PIPE_NEW_INDEX_RANGE 0x2 #define BRW_NEW_URB_FENCE 0x1 @@ -387,8 +389,8 @@ struct brw_cache { */ struct brw_tracked_state { struct brw_state_flags dirty; - void (*prepare)( struct brw_context *brw ); - void (*emit)( struct brw_context *brw ); + int (*prepare)( struct brw_context *brw ); + int (*emit)( struct brw_context *brw ); }; /* Flags for brw->state.cache. @@ -465,9 +467,7 @@ struct brw_context GLuint primitive; GLuint reduced_primitive; - GLboolean emit_state_always; - - /* Active vertex program: + /* Active state from the state tracker: */ struct { const struct brw_vertex_shader *vertex_shader; @@ -475,11 +475,31 @@ struct brw_context const struct brw_blend_state *blend; const struct brw_rasterizer_state *rast; const struct brw_depth_stencil_alpha_state *zstencil; + + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned num_vertex_elements; + unsigned num_vertex_buffers; + struct pipe_framebuffer_state fb; struct pipe_viewport_state vp; struct pipe_clip_state ucp; struct pipe_buffer *vertex_constants; struct pipe_buffer *fragment_constants; + + /** + * Index buffer for this draw_prims call. + * + * Updates are signaled by PIPE_NEW_INDEX_BUFFER. + */ + struct pipe_buffer *index_buffer; + unsigned index_size; + + /* Updates are signalled by PIPE_NEW_INDEX_RANGE: + */ + unsigned min_index; + unsigned max_index; + } curr; struct { @@ -504,30 +524,26 @@ struct brw_context struct brw_cached_batch_item *cached_batch_items; struct { - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned num_vertex_element; - unsigned num_vertex_buffer; - struct u_upload_mgr *upload_vertex; struct u_upload_mgr *upload_index; - - /* Summary of size and varying of active arrays, so we can check - * for changes to this state: + /* Information on uploaded vertex buffers: */ - struct brw_vertex_info info; - unsigned int min_index, max_index; + struct { + unsigned stride; /* in bytes between successive vertices */ + unsigned offset; /* in bytes, of first vertex in bo */ + unsigned vertex_count; /* count of valid vertices which may be accessed */ + struct brw_winsys_buffer *bo; + } vb[PIPE_MAX_ATTRIBS]; + + struct { + } ve[PIPE_MAX_ATTRIBS]; + + unsigned nr_vb; /* currently the same as curr.num_vertex_buffers */ + unsigned nr_ve; /* currently the same as curr.num_vertex_elements */ } vb; struct { - /** - * Index buffer for this draw_prims call. - * - * Updates are signaled by BRW_NEW_INDICES. - */ - const struct _mesa_index_buffer *ib; - /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */ struct brw_winsys_buffer *bo; unsigned int offset; @@ -668,6 +684,14 @@ struct brw_context int index; GLboolean active; } query; + + struct { + unsigned always_emit_state:1; + unsigned always_flush_batch:1; + unsigned force_swtnl:1; + unsigned no_swtnl:1; + } flags; + /* Used to give every program string a unique id */ GLuint program_id; diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index edc39ff223..278ffa4ca2 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -48,7 +48,7 @@ * constants. That greatly reduces the demand for space in the CURBE. * Some of the comments within are dated... */ -static void calculate_curbe_offsets( struct brw_context *brw ) +static int calculate_curbe_offsets( struct brw_context *brw ) { /* CACHE_NEW_WM_PROG */ const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; @@ -104,6 +104,8 @@ static void calculate_curbe_offsets( struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; } + + return 0; } @@ -157,7 +159,7 @@ static GLfloat fixed_plane[6][4] = { * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ -static void prepare_constant_buffer(struct brw_context *brw) +static int prepare_constant_buffer(struct brw_context *brw) { const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); @@ -170,7 +172,7 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } - return; + return 0; } buf = (GLfloat *) CALLOC(bufsz, 1); @@ -305,9 +307,11 @@ static void prepare_constant_buffer(struct brw_context *brw) * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ + + return 0; } -static void emit_constant_buffer(struct brw_context *brw) +static int emit_constant_buffer(struct brw_context *brw) { GLuint sz = brw->curbe.total_size; @@ -322,6 +326,7 @@ static void emit_constant_buffer(struct brw_context *brw) (sz - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); + return 0; } const struct brw_tracked_state brw_constant_buffer = { diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 7af490bc5a..b5fe7c9601 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -26,15 +26,18 @@ **************************************************************************/ +#include "util/u_prim.h" +#include "util/u_upload_mgr.h" + #include "brw_draw.h" #include "brw_defines.h" #include "brw_context.h" #include "brw_state.h" #include "brw_debug.h" +#include "brw_screen.h" #include "brw_batchbuffer.h" -#define FILE_DEBUG_FLAG DEBUG_BATCH static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = { _3DPRIM_POINTLIST, @@ -56,18 +59,21 @@ static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = { * programs be immune to the active primitive (ie. cope with all * possibilities). That may not be realistic however. */ -static GLuint brw_set_prim(struct brw_context *brw, unsigned prim) +static int brw_set_prim(struct brw_context *brw, unsigned prim ) { if (BRW_DEBUG & DEBUG_PRIMS) debug_printf("PRIM: %s\n", u_prim_name(prim)); if (prim != brw->primitive) { + unsigned reduced_prim; + brw->primitive = prim; brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; - if (reduced_prim[prim] != brw->reduced_primitive) { - brw->reduced_primitive = reduced_prim[prim]; + reduced_prim = u_reduced_prim(prim); + if (reduced_prim != brw->reduced_primitive) { + brw->reduced_primitive = reduced_prim; brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; } } @@ -77,17 +83,14 @@ static GLuint brw_set_prim(struct brw_context *brw, unsigned prim) -static enum pipe_error brw_emit_prim(struct brw_context *brw, - unsigned prim, - unsigned start, - unsigned count, - boolean indexed, - uint32_t hw_prim) +static int brw_emit_prim(struct brw_context *brw, + unsigned start, + unsigned count, + boolean indexed, + uint32_t hw_prim) { struct brw_3d_primitive prim_packet; - - if (INTEL_DEBUG & DEBUG_PRIMS) - debug_printf("PRIM: %s %d %d\n", u_prim_name(prim), start, count); + int ret; prim_packet.header.opcode = CMD_3D_PRIM; prim_packet.header.length = sizeof(prim_packet)/4 - 2; @@ -101,7 +104,7 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw, prim_packet.start_vert_location += brw->ib.start_vertex_offset; prim_packet.instance_count = 1; prim_packet.start_instance_location = 0; - prim_packet.base_vert_location = prim->basevertex; + prim_packet.base_vert_location = 0; // prim->basevertex; XXX: add this to gallium /* If we're set to always flush, do it before and after the primitive emit. @@ -109,20 +112,20 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw, * and missed flushes of the render cache as it heads to other parts of * the besides the draw code. */ - if (intel->always_flush_cache) { - BEGIN_BATCH(1, IGNORE_CLIPRECTS) - OUT_BATCH(intel->vtbl.flush_cmd()); + if (0) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); ADVANCE_BATCH(); } if (prim_packet.verts_per_instance) { - ret = brw_batchbuffer_data( brw->intel.batch, &prim_packet, + ret = brw_batchbuffer_data( brw->batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); if (ret) return ret; } - if (intel->always_flush_cache) { + if (0) { BEGIN_BATCH(1, IGNORE_CLIPRECTS); - OUT_BATCH(intel->vtbl.flush_cmd()); + OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); ADVANCE_BATCH(); } @@ -133,44 +136,24 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw, /* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ -static GLboolean brw_try_draw_prims( struct brw_context *brw, - const struct gl_client_array *arrays[], - const struct _mesa_prim *prim, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLuint min_index, - GLuint max_index ) +static int +try_draw_range_elements(struct brw_context *brw, + struct pipe_buffer *index_buffer, + unsigned hw_prim, + unsigned start, unsigned count) { - struct brw_context *brw = brw_context(ctx); - GLboolean retval = GL_FALSE; - GLboolean warn = GL_FALSE; - GLboolean first_time = GL_TRUE; - uint32_t hw_prim; - GLuint i; - - if (ctx->NewState) - _mesa_update_state( ctx ); - - /* Bind all inputs, derive varying and size information: - */ - brw_merge_inputs( brw, arrays ); - - brw->ib.ib = ib; - brw->state.dirty.brw |= BRW_NEW_INDICES; - - brw->vb.min_index = min_index; - brw->vb.max_index = max_index; - brw->state.dirty.brw |= BRW_NEW_VERTICES; - - hw_prim = brw_set_prim(brw, prim[i].mode); + int ret; - brw_validate_state(brw); + ret = brw_validate_state(brw); + if (ret) + return ret; /* Check that we can fit our state in with our existing batchbuffer, or * flush otherwise. */ - ret = dri_bufmgr_check_aperture_space(brw->state.validated_bos, - brw->state.validated_bo_count); + ret = brw->sws->check_aperture_space(brw->sws, + brw->state.validated_bos, + brw->state.validated_bo_count); if (ret) return ret; @@ -178,12 +161,12 @@ static GLboolean brw_try_draw_prims( struct brw_context *brw, if (ret) return ret; - ret = brw_emit_prim(brw, &prim[i], hw_prim); + ret = brw_emit_prim(brw, start, count, index_buffer != NULL, hw_prim); if (ret) return ret; - if (intel->always_flush_batch) - brw_batchbuffer_flush(intel->batch); + if (brw->flags.always_flush_batch) + brw_batchbuffer_flush(brw->batch); return 0; } @@ -197,22 +180,45 @@ brw_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count) { - enum pipe_error ret; + struct brw_context *brw = brw_context(pipe); + int ret; + uint32_t hw_prim; + + hw_prim = brw_set_prim(brw, mode); - if (!vbo_all_varyings_in_vbos(arrays)) { - if (!index_bounds_valid) - vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + if (BRW_DEBUG & DEBUG_PRIMS) + debug_printf("PRIM: %s %d %d\n", u_prim_name(mode), start, count); + + /* Potentially trigger upload of new index buffer. + * + * XXX: do we need to go through state validation to achieve this? + * Could just call upload code directly. + */ + if (brw->curr.index_buffer != index_buffer) { + pipe_buffer_reference( &brw->curr.index_buffer, index_buffer ); + brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER; + } + + /* XXX: do we really care? + */ + if (brw->curr.min_index != min_index || + brw->curr.max_index != max_index) + { + brw->curr.min_index = min_index; + brw->curr.max_index = max_index; + brw->state.dirty.mesa |= PIPE_NEW_INDEX_RANGE; } + /* Make a first attempt at drawing: */ - ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); /* Otherwise, flush and retry: */ if (ret != 0) { - brw_batchbuffer_flush(intel->batch); - ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + brw_batchbuffer_flush(brw->batch); + ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); assert(ret == 0); } @@ -242,28 +248,37 @@ brw_draw_arrays(struct pipe_context *pipe, unsigned mode, -void brw_draw_init( struct brw_context *brw ) +boolean brw_draw_init( struct brw_context *brw ) { /* Register our drawing function: */ brw->base.draw_arrays = brw_draw_arrays; brw->base.draw_elements = brw_draw_elements; brw->base.draw_range_elements = brw_draw_range_elements; -} -void brw_draw_destroy( struct brw_context *brw ) -{ - int i; + /* Create helpers for uploading data in user buffers: + */ + brw->vb.upload_vertex = u_upload_create( &brw->brw_screen->base, + 128 * 1024, + 64, + PIPE_BUFFER_USAGE_VERTEX ); + if (brw->vb.upload_vertex == NULL) + return FALSE; + + brw->vb.upload_index = u_upload_create( &brw->brw_screen->base, + 128 * 1024, + 64, + PIPE_BUFFER_USAGE_INDEX ); + if (brw->vb.upload_index == NULL) + return FALSE; - if (brw->vb.upload.bo != NULL) { - brw->sws->bo_unreference(brw->vb.upload.bo); - brw->vb.upload.bo = NULL; - } + return TRUE; +} - for (i = 0; i < VERT_ATTRIB_MAX; i++) { - brw->sws->bo_unreference(brw->vb.inputs[i].bo); - brw->vb.inputs[i].bo = NULL; - } +void brw_draw_cleanup( struct brw_context *brw ) +{ + u_upload_destroy( brw->vb.upload_vertex ); + u_upload_destroy( brw->vb.upload_index ); brw->sws->bo_unreference(brw->ib.bo); brw->ib.bo = NULL; diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h index 13f0443a81..8dc5dbce62 100644 --- a/src/gallium/drivers/i965/brw_draw.h +++ b/src/gallium/drivers/i965/brw_draw.h @@ -32,8 +32,7 @@ struct brw_context; - -void brw_draw_init( struct brw_context *brw ); +boolean brw_draw_init( struct brw_context *brw ); void brw_draw_cleanup( struct brw_context *brw ); diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index 7b0860d04c..040d8ca93a 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -26,21 +26,23 @@ **************************************************************************/ #include "pipe/p_context.h" +#include "pipe/p_error.h" #include "util/u_upload_mgr.h" +#include "util/u_math.h" #include "brw_draw.h" #include "brw_defines.h" #include "brw_context.h" #include "brw_state.h" -#include "brw_fallback.h" - +#include "brw_screen.h" #include "brw_batchbuffer.h" +#include "brw_debug.h" -unsigned brw_translate_surface_format( unsigned id ) +static unsigned brw_translate_surface_format( unsigned id ) { switch (id) { case PIPE_FORMAT_R64_FLOAT: @@ -186,70 +188,136 @@ static unsigned get_index_type(int type) } - -static boolean brw_prepare_vertices(struct brw_context *brw) +static int brw_prepare_vertices(struct brw_context *brw) { - GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; + unsigned int min_index = brw->curr.min_index; + unsigned int max_index = brw->curr.max_index; GLuint i; - const unsigned char *ptr = NULL; - GLuint interleave = 0; - unsigned int min_index = brw->vb.min_index; - unsigned int max_index = brw->vb.max_index; - - struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; - GLuint nr_uploads = 0; - - /* First build an array of pointers to ve's in vb.inputs_read - */ - if (0) - _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); - + int ret; + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); - for (i = 0; i < brw->vb.num_vertex_buffer; i++) { - struct brw_vertex_buffer *vb = brw->vb.vertex_buffer[i]; - unsigned size = (vb->stride == 0 ? - vb->size : - vb->stride * (max_index + 1 - min_index)); - - if (brw_is_user_buffer(vb)) { - u_upload_buffer( brw->upload_vertex, - min_index * vb->stride, - size, - &offset, - &buffer ); + for (i = 0; i < brw->curr.num_vertex_buffers; i++) { + struct pipe_vertex_buffer *vb = &brw->curr.vertex_buffer[i]; + struct brw_winsys_buffer *bo; + struct pipe_buffer *upload_buf; + unsigned offset; + + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s vb[%d] user:%d offset:0x%x sz:0x%x stride:0x%x\n", + __FUNCTION__, i, + brw_buffer_is_user_buffer(vb->buffer), + vb->buffer_offset, + vb->buffer->size, + vb->stride); + + if (brw_buffer_is_user_buffer(vb->buffer)) { + + /* XXX: simplify this. Stop the state trackers from generating + * zero-stride buffers & have them use additional constants (or + * add support for >1 constant buffer) instead. + */ + unsigned size = (vb->stride == 0 ? + vb->buffer->size - vb->buffer_offset : + MAX2(vb->buffer->size - vb->buffer_offset, + vb->stride * (max_index + 1 - min_index))); + + ret = u_upload_buffer( brw->vb.upload_vertex, + vb->buffer_offset + min_index * vb->stride, + size, + vb->buffer, + &offset, + &upload_buf ); + if (ret) + return ret; + + bo = brw_buffer(upload_buf)->bo; + + assert(offset + size <= bo->size); } else { - offset = 0; - buffer = vb->buffer; + offset = vb->buffer_offset; + bo = brw_buffer(vb->buffer)->bo; } + + assert(offset < bo->size); /* Set up post-upload info about this vertex buffer: */ - input->offset = (unsigned long)offset; - input->stride = vb->stride; - input->count = count; - brw->sws->bo_unreference(input->bo); - input->bo = intel_bufferobj_buffer(intel, intel_buffer, - INTEL_READ); - brw->sws->bo_reference(input->bo); - - assert(input->offset < input->bo->size); - assert(input->offset + size <= input->bo->size); + brw->vb.vb[i].offset = offset; + brw->vb.vb[i].stride = vb->stride; + brw->vb.vb[i].vertex_count = (vb->stride == 0 ? + 1 : + (bo->size - offset) / vb->stride); + brw->sws->bo_unreference(brw->vb.vb[i].bo); + brw->vb.vb[i].bo = bo; + brw->sws->bo_reference(brw->vb.vb[i].bo); + + /* Don't need to retain this reference. We have a reference on + * the underlying winsys buffer: + */ + pipe_buffer_reference( &upload_buf, NULL ); } + brw->vb.nr_vb = i; brw_prepare_query_begin(brw); - for (i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; + for (i = 0; i < brw->vb.nr_vb; i++) { + brw_add_validated_bo(brw, brw->vb.vb[i].bo); + } + + return 0; +} + +static int brw_emit_vertex_buffers( struct brw_context *brw ) +{ + int i; + + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), just bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (brw->vb.nr_vb == 0) { + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s: no active vertex buffers\n", __FUNCTION__); - brw_add_validated_bo(brw, input->bo); + return 0; + } + + /* Emit VB state packets. + */ + BEGIN_BATCH(1 + brw->vb.nr_vb * 4, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_BUFFER << 16) | + ((1 + brw->vb.nr_vb * 4) - 2)); + + for (i = 0; i < brw->vb.nr_vb; i++) { + OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | + BRW_VB0_ACCESS_VERTEXDATA | + (brw->vb.vb[i].stride << BRW_VB0_PITCH_SHIFT)); + OUT_RELOC(brw->vb.vb[i].bo, + I915_GEM_DOMAIN_VERTEX, 0, + brw->vb.vb[i].offset); + if (BRW_IS_IGDNG(brw)) { + OUT_RELOC(brw->vb.vb[i].bo, + I915_GEM_DOMAIN_VERTEX, 0, + brw->vb.vb[i].bo->size - 1); + } else + OUT_BATCH(brw->vb.vb[i].stride ? brw->vb.vb[i].vertex_count : 0); + OUT_BATCH(0); /* Instance data step rate */ } + ADVANCE_BATCH(); + return 0; } -static void brw_emit_vertices(struct brw_context *brw) + + + +static int brw_emit_vertex_elements(struct brw_context *brw) { GLuint i; @@ -262,7 +330,7 @@ static void brw_emit_vertices(struct brw_context *brw) * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ - if (brw->vb.nr_enabled == 0) { + if (brw->vb.nr_ve == 0) { BEGIN_BATCH(3, IGNORE_CLIPRECTS); OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | @@ -274,59 +342,23 @@ static void brw_emit_vertices(struct brw_context *brw) (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); ADVANCE_BATCH(); - return; + return 0; } - /* Now emit VB and VEP state packets. + /* Now emit vertex element (VEP) state packets. * - * This still defines a hardware VB for each input, even if they - * are interleaved or from the same VBO. TBD if this makes a - * performance difference. */ - BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS); - OUT_BATCH((CMD_VERTEX_BUFFER << 16) | - ((1 + brw->vb.nr_enabled * 4) - 2)); - - for (i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - - OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | - BRW_VB0_ACCESS_VERTEXDATA | - (input->stride << BRW_VB0_PITCH_SHIFT)); - OUT_RELOC(input->bo, - I915_GEM_DOMAIN_VERTEX, 0, - input->offset); - if (BRW_IS_IGDNG(brw)) { - if (input->stride) { - OUT_RELOC(input->bo, - I915_GEM_DOMAIN_VERTEX, 0, - input->offset + input->stride * input->count - 1); - } else { - assert(input->count == 1); - OUT_RELOC(input->bo, - I915_GEM_DOMAIN_VERTEX, 0, - input->offset + input->element_size - 1); - } - } else - OUT_BATCH(input->stride ? input->count : 0); - OUT_BATCH(0); /* Instance data step rate */ - } - ADVANCE_BATCH(); - - BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2)); - for (i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - uint32_t format = get_surface_type(input->glarray->Type, - input->glarray->Size, - input->glarray->Format, - input->glarray->Normalized); + BEGIN_BATCH(1 + brw->curr.num_vertex_elements * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_ve * 2) - 2)); + for (i = 0; i < brw->vb.nr_ve; i++) { + const struct pipe_vertex_element *input = &brw->curr.vertex_element[i]; + uint32_t format = brw_translate_surface_format( input->src_format ); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; - switch (input->glarray->Size) { + switch (input->nr_components) { case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; @@ -352,11 +384,29 @@ static void brw_emit_vertices(struct brw_context *brw) ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } ADVANCE_BATCH(); + return 0; +} + + +static int brw_emit_vertices( struct brw_context *brw ) +{ + int ret; + + ret = brw_emit_vertex_buffers( brw ); + if (ret) + return ret; + + ret = brw_emit_vertex_elements( brw ); + if (ret) + return ret; + + return 0; } + const struct brw_tracked_state brw_vertices = { .dirty = { - .mesa = 0, + .mesa = PIPE_NEW_INDEX_RANGE, .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES, .cache = 0, }, @@ -364,104 +414,106 @@ const struct brw_tracked_state brw_vertices = { .emit = brw_emit_vertices, }; -static void brw_prepare_indices(struct brw_context *brw) + +static int brw_prepare_indices(struct brw_context *brw) { - const struct _mesa_index_buffer *index_buffer = brw->ib.ib; - GLuint ib_size; + struct pipe_buffer *index_buffer = brw->curr.index_buffer; struct brw_winsys_buffer *bo = NULL; - struct gl_buffer_object *bufferobj; GLuint offset; - GLuint ib_type_size; + GLuint index_size; + GLuint ib_size; + int ret; if (index_buffer == NULL) - return; + return 0; - ib_type_size = get_size(index_buffer->type); - ib_size = ib_type_size * index_buffer->count; - bufferobj = index_buffer->obj;; + if (DEBUG & DEBUG_VERTS) + debug_printf("%s: index_size:%d index_buffer->size:%d\n", + __FUNCTION__, + brw->curr.index_size, + brw->curr.index_buffer->size); - /* Turn into a proper VBO: - */ - if (!_mesa_is_bufferobj(bufferobj)) { - brw->ib.start_vertex_offset = 0; + ib_size = index_buffer->size; + index_size = brw->curr.index_size; - /* Get new bufferobj, offset: - */ - get_space(brw, ib_size, &bo, &offset); - - /* Straight upload + /* Turn userbuffer into a proper hardware buffer? + */ + if (brw_buffer_is_user_buffer(index_buffer)) { + struct pipe_buffer *upload_buf; + + ret = u_upload_buffer( brw->vb.upload_index, + 0, + ib_size, + index_buffer, + &offset, + &upload_buf ); + if (ret) + return ret; + + bo = brw_buffer(upload_buf)->bo; + brw->sws->bo_reference(bo); + pipe_buffer_reference( &upload_buf, NULL ); + + /* XXX: annotate the userbuffer with the upload information so + * that successive calls don't get re-uploaded. */ - brw_bo_subdata(bo, offset, ib_size, index_buffer->ptr); - - } else { - offset = (GLuint) (unsigned long) index_buffer->ptr; - brw->ib.start_vertex_offset = 0; + } + else { + bo = brw_buffer(index_buffer)->bo; + brw->sws->bo_reference(bo); + + ib_size = bo->size; + offset = 0; + } - /* If the index buffer isn't aligned to its element size, we have to - * rebase it into a temporary. - */ - if ((get_size(index_buffer->type) - 1) & offset) { - GLubyte *map = ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_DYNAMIC_DRAW_ARB, - bufferobj); - map += offset; - - get_space(brw, ib_size, &bo, &offset); - - dri_bo_subdata(bo, offset, ib_size, map); - - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); - } else { - bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), - INTEL_READ); - brw->sws->bo_reference(bo); - - /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading - * the index buffer state when we're just moving the start index - * of our drawing. - */ - brw->ib.start_vertex_offset = offset / ib_type_size; - offset = 0; - ib_size = bo->size; - } + /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading the + * index buffer state when we're just moving the start index of our + * drawing. + * + * In gallium this will happen in the case where successive draw + * calls are made with (distinct?) userbuffers, but the upload_mgr + * places the data into a single winsys buffer. + * + * This statechange doesn't raise any state flags and is always + * just merged into the final draw packet: + */ + if (1) { + assert((offset & (index_size - 1)) == 0); + brw->ib.start_vertex_offset = offset / index_size; } + /* These statechanges trigger a new CMD_INDEX_BUFFER packet: + */ if (brw->ib.bo != bo || - brw->ib.offset != offset || brw->ib.size != ib_size) { - drm_intel_bo_unreference(brw->ib.bo); + brw->sws->bo_unreference(brw->ib.bo); brw->ib.bo = bo; - brw->ib.offset = offset; brw->ib.size = ib_size; - brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; - } else { - drm_intel_bo_unreference(bo); + } + else { + brw->sws->bo_unreference(bo); } brw_add_validated_bo(brw, brw->ib.bo); + return 0; } const struct brw_tracked_state brw_indices = { .dirty = { - .mesa = 0, - .brw = BRW_NEW_INDICES, + .mesa = PIPE_NEW_INDEX_BUFFER, + .brw = 0, .cache = 0, }, .prepare = brw_prepare_indices, }; -static void brw_emit_index_buffer(struct brw_context *brw) +static int brw_emit_index_buffer(struct brw_context *brw) { - const struct _mesa_index_buffer *index_buffer = brw->ib.ib; - - if (index_buffer == NULL) - return; - /* Emit the indexbuffer packet: */ + if (brw->ib.bo) { struct brw_indexbuffer ib; @@ -469,7 +521,7 @@ static void brw_emit_index_buffer(struct brw_context *brw) ib.header.bits.opcode = CMD_INDEX_BUFFER; ib.header.bits.length = sizeof(ib)/4 - 2; - ib.header.bits.index_format = get_index_type(index_buffer->type); + ib.header.bits.index_format = get_index_type(brw->ib.size); ib.header.bits.cut_index_enable = 0; BEGIN_BATCH(4, IGNORE_CLIPRECTS); @@ -483,6 +535,8 @@ static void brw_emit_index_buffer(struct brw_context *brw) OUT_BATCH( 0 ); ADVANCE_BATCH(); } + + return 0; } const struct brw_tracked_state brw_index_buffer = { diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c index 1df561386e..df49d4b72f 100644 --- a/src/gallium/drivers/i965/brw_eu.c +++ b/src/gallium/drivers/i965/brw_eu.c @@ -29,6 +29,7 @@ * Keith Whitwell */ +#include "util/u_memory.h" #include "brw_context.h" #include "brw_defines.h" @@ -237,7 +238,7 @@ brw_resolve_cals(struct brw_compile *c) struct brw_glsl_call *call, *next; for (call = c->first_call; call; call = next) { next = call->next; - _mesa_free(call); + FREE(call); } c->first_call = NULL; } @@ -247,7 +248,7 @@ brw_resolve_cals(struct brw_compile *c) struct brw_glsl_label *label, *next; for (label = c->first_label; label; label = next) { next = label->next; - _mesa_free(label); + FREE(label); } c->first_label = NULL; } diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c index ad7ec36e86..5989f5a04e 100644 --- a/src/gallium/drivers/i965/brw_eu_debug.c +++ b/src/gallium/drivers/i965/brw_eu_debug.c @@ -28,7 +28,8 @@ * Authors: * Keith Whitwell */ - + +#include "util/u_debug.h" #include "brw_eu.h" @@ -52,7 +53,7 @@ void brw_print_reg( struct brw_reg hwreg ) "f" }; - _mesa_printf("%s%s", + debug_printf("%s%s", hwreg.abs ? "abs/" : "", hwreg.negate ? "-" : ""); @@ -64,7 +65,7 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && hwreg.type == BRW_REGISTER_TYPE_F) { /* vector register */ - _mesa_printf("vec%d", hwreg.nr); + debug_printf("vec%d", hwreg.nr); } else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && hwreg.vstride == BRW_VERTICAL_STRIDE_0 && @@ -72,13 +73,13 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && hwreg.type == BRW_REGISTER_TYPE_F) { /* "scalar" register */ - _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); } else if (hwreg.file == BRW_IMMEDIATE_VALUE) { - _mesa_printf("imm %f", hwreg.dw1.f); + debug_printf("imm %f", hwreg.dw1.f); } else { - _mesa_printf("%s%d.%d<%d;%d,%d>:%s", + debug_printf("%s%d.%d<%d;%d,%d>:%s", file[hwreg.file], hwreg.nr, hwreg.subnr / type_sz(hwreg.type), diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index 0f2612c181..98fec85c1d 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -315,24 +315,20 @@ const struct brw_tracked_state brw_polygon_stipple = { static void upload_polygon_stipple_offset(struct brw_context *brw) { - __DRIdrawablePrivate *dPriv = brw->intel.driDrawable; struct brw_polygon_stipple_offset bpso; memset(&bpso, 0, sizeof(bpso)); bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; bpso.header.length = sizeof(bpso)/4-2; - /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0), - * we have to invert the Y axis in order to match the OpenGL - * pixel coordinate system, and our offset must be matched - * to the window position. If we're drawing to a FBO - * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate - * system works just fine, and there's no window system to - * worry about. + /* Never need to offset stipple coordinates. + * + * XXX: is it ever necessary to invert Y values? */ - if (brw->intel.ctx.DrawBuffer->Name == 0) { - bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31; - bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31; + if (0) { + int x = 0, y = 0, h = 0; + bpso.bits0.x_offset = (32 - (x & 31)) & 31; + bpso.bits0.y_offset = (32 - ((y + h) & 31)) & 31; } else { bpso.bits0.y_offset = 0; diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index e85a1a9c1b..65e7151517 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -53,6 +53,9 @@ static void brw_note_fence( struct brw_context *brw, GLuint fence ) static GLuint brw_flush_cmd( void ) { struct brw_mi_flush flush; + + return ; + flush.opcode = CMD_MI_FLUSH; flush.pad = 0; flush.flags = BRW_FLUSH_STATE_CACHE; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index fbb772d18c..8b61da763c 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -33,6 +33,25 @@ #include "brw_util.h" #include "brw_wm.h" + +/** + * Determine if the given fragment program uses GLSL features such + * as flow conditionals, loops, subroutines. + * Some GLSL shaders may use these features, others might not. + */ +GLboolean brw_wm_is_glsl(const struct brw_fragment_shader *fp) +{ + return (fp->info.insn_count[TGSI_OPCODE_ARL] > 0 || + fp->info.insn_count[TGSI_OPCODE_IF] > 0 || + fp->info.insn_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */ + fp->info.insn_count[TGSI_OPCODE_CAL] > 0 || + fp->info.insn_count[TGSI_OPCODE_BRK] > 0 || /* redundant - BGNLOOP */ + fp->info.insn_count[TGSI_OPCODE_RET] > 0 || /* redundant - CAL */ + fp->info.insn_count[TGSI_OPCODE_BGNLOOP] > 0); +} + + + static void brwBindProgram( struct brw_context *brw, GLenum target, struct gl_program *prog ) diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index b0928adbe4..d1d0d7cd43 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -1,26 +1,11 @@ -static void brw_merge_inputs( struct brw_context *brw, - const struct gl_client_array *arrays[]) -{ - struct brw_vertex_info old = brw->vb.info; - GLuint i; - - for (i = 0; i < VERT_ATTRIB_MAX; i++) - brw->sws->bo_unreference(brw->vb.inputs[i].bo); - memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs)); - memset(&brw->vb.info, 0, sizeof(brw->vb.info)); +void +brw_pipe_vertex_cleanup( struct brw_context *brw ) +{ for (i = 0; i < VERT_ATTRIB_MAX; i++) { - brw->vb.inputs[i].glarray = arrays[i]; - brw->vb.inputs[i].attrib = (gl_vert_attrib) i; - - if (arrays[i]->StrideB != 0) - brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) << - ((i%16) * 2); + brw->sws->bo_unreference(brw->vb.inputs[i].bo); + brw->vb.inputs[i].bo = NULL; } - - /* Raise statechanges if input sizes have changed. */ - if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0) - brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; } diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index 79d595d0ad..b0be0e1f8a 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -56,6 +56,14 @@ struct brw_transfer unsigned offset; }; +struct brw_buffer +{ + struct pipe_buffer base; + struct brw_winsys_buffer *bo; + void *ptr; + boolean is_user_buffer; +}; + /* * Cast wrappers @@ -72,5 +80,19 @@ brw_transfer(struct pipe_transfer *transfer) return (struct brw_transfer *)transfer; } +static INLINE struct brw_buffer * +brw_buffer(struct pipe_buffer *buffer) +{ + return (struct brw_buffer *)buffer; +} + + +/* Pipe buffer helpers + */ +static INLINE boolean +brw_buffer_is_user_buffer( const struct pipe_buffer *buf ) +{ + return ((const struct brw_buffer *)buf)->is_user_buffer; +} #endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index 53e8f09e37..e2db2e76e6 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -134,7 +134,7 @@ static void upload_sf_prog(struct brw_context *brw) key.attrs = brw->vs.prog_data->outputs_written; /* BRW_NEW_REDUCED_PRIMITIVE */ - switch (brw->intel.reduced_primitive) { + switch (brw->reduced_primitive) { case GL_TRIANGLES: /* NOTE: We just use the edgeflag attribute as an indicator that * unfilled triangles are active. We don't actually do the diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index 0e406f12e1..648a16a038 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -40,19 +40,12 @@ static void upload_sf_vp(struct brw_context *brw) const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; struct brw_sf_viewport sfv; GLfloat y_scale, y_bias; - const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; memset(&sfv, 0, sizeof(sfv)); - if (render_to_fbo) { - y_scale = 1.0; - y_bias = 0; - } - else { - y_scale = -1.0; - y_bias = ctx->DrawBuffer->Height; - } + y_scale = 1.0; + y_bias = 0; /* _NEW_VIEWPORT */ @@ -73,20 +66,11 @@ static void upload_sf_vp(struct brw_context *brw) * Note that the hardware's coordinates are inclusive, while Mesa's min is * inclusive but max is exclusive. */ - if (render_to_fbo) { - /* texmemory: Y=0=bottom */ - sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; - sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; - sfv.scissor.ymin = ctx->DrawBuffer->_Ymin; - sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1; - } - else { - /* memory: Y=0=top */ - sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; - sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; - sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; - sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; - } + /* Y=0=bottom */ + sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; + sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + sfv.scissor.ymin = ctx->DrawBuffer->_Ymin; + sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1; brw->sws->bo_unreference(brw->sf.vp_bo); brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); @@ -151,7 +135,7 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) /* _NEW_LIGHT */ key->provoking_vertex = ctx->Light.ProvokingVertex; - key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + key->render_to_fbo = 1; } static struct brw_winsys_buffer * @@ -211,11 +195,6 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, else sf.sf5.front_winding = BRW_FRONTWINDING_CW; - /* The viewport is inverted for rendering to a FBO, and that inverts - * polygon front/back orientation. - */ - sf.sf5.front_winding ^= key->render_to_fbo; - switch (key->cull_face) { case GL_FRONT: sf.sf6.cull_mode = BRW_CULLMODE_FRONT; @@ -245,7 +224,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.sf6.line_width = 0; /* _NEW_BUFFERS */ - key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + key->render_to_fbo = 1; if (!key->render_to_fbo) { /* Rendering to an OpenGL window */ sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index 02657eaba7..9bf34c3fe4 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -109,8 +109,8 @@ struct brw_surface_key { /*********************************************************************** * brw_state.c */ -void brw_validate_state(struct brw_context *brw); -void brw_upload_state(struct brw_context *brw); +int brw_validate_state(struct brw_context *brw); +int brw_upload_state(struct brw_context *brw); void brw_init_state(struct brw_context *brw); void brw_destroy_state(struct brw_context *brw); @@ -157,7 +157,7 @@ void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer /*********************************************************************** * brw_state_batch.c */ -#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) +#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) GLboolean brw_cached_batch_struct( struct brw_context *brw, diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c index b285837070..324fce5163 100644 --- a/src/gallium/drivers/i965/brw_state_batch.c +++ b/src/gallium/drivers/i965/brw_state_batch.c @@ -47,7 +47,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, struct header *newheader = (struct header *)data; if (brw->emit_state_always) { - brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } @@ -74,7 +74,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, emit: memcpy(item->header, newheader, sz); - brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c index 83f138f67a..d2df8af9f4 100644 --- a/src/gallium/drivers/i965/brw_swtnl.c +++ b/src/gallium/drivers/i965/brw_swtnl.c @@ -12,13 +12,13 @@ static GLboolean check_fallbacks( struct brw_context *brw, * use fallbacks. If we're forcing fallbacks, always * use fallfacks. */ - if (brw->intel.conformance_mode == 0) + if (brw->flags.no_swtnl) return GL_FALSE; - if (brw->intel.conformance_mode == 2) + if (brw->flags.force_swtnl) return GL_TRUE; - if (ctx->Polygon.SmoothFlag) { + if (brw->curr.rast->tmpl.smooth_polys) { for (i = 0; i < nr_prims; i++) if (reduced_prim[prim[i].mode] == GL_TRIANGLES) return GL_TRUE; diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index 82cd8007ac..51e23b9640 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -161,6 +161,13 @@ struct brw_winsys_screen { size_t size, const void *data); + /* XXX: couldn't this be handled by returning true/false on + * bo_emit_reloc? + */ + boolean (*check_aperture_space)( struct brw_winsys_screen *iws, + struct brw_winsys_buffer **buffers, + unsigned count ); + /** * Map a buffer. */ diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 284cf42f8b..4948ea0dff 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -158,7 +158,7 @@ static void do_wm_prog( struct brw_context *brw, memcpy(&c->key, key, sizeof(*key)); c->fp = fp; - c->env_param = brw->intel.ctx.FragmentProgram.Parameters; + c->env_param = NULL; /*brw->intel.ctx.FragmentProgram.Parameters;*/ brw_init_compile(brw, &c->func); diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 18775830f9..e06de95a8a 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -131,9 +131,9 @@ struct brw_wm_ref { GLuint insn:24; }; -struct brw_wm_constref { +struct brw_wm_imm_ref { const struct brw_wm_ref *ref; - GLfloat constval; + GLfloat imm1f; }; @@ -232,8 +232,8 @@ struct brw_wm_compile { struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; GLuint nr_insns; - struct brw_wm_constref constref[BRW_WM_MAX_CONST]; - GLuint nr_constrefs; + struct brw_wm_imm_ref imm_ref[BRW_WM_MAX_CONST]; + GLuint nr_imm_refs; struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2]; diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index c4f0711793..a8de5fdd0b 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -7,34 +7,6 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, const struct prog_instruction *inst, GLuint component); -/** - * Determine if the given fragment program uses GLSL features such - * as flow conditionals, loops, subroutines. - * Some GLSL shaders may use these features, others might not. - */ -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) -{ - int i; - - for (i = 0; i < fp->Base.NumInstructions; i++) { - const struct prog_instruction *inst = &fp->Base.Instructions[i]; - switch (inst->Opcode) { - case OPCODE_ARL: - case OPCODE_IF: - case OPCODE_ENDIF: - case OPCODE_CAL: - case OPCODE_BRK: - case OPCODE_RET: - case OPCODE_BGNLOOP: - return GL_TRUE; - default: - break; - } - } - return GL_FALSE; -} - - static void reclaim_temps(struct brw_wm_compile *c); diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index de5f5fe821..31b0270e84 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -124,33 +124,33 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, } -/** Return a ref to a constant/literal value */ -static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, - const GLfloat *constval ) +/** Return a ref to an immediate value */ +static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c, + const GLfloat *imm1f ) { GLuint i; /* Search for an existing const value matching the request: */ - for (i = 0; i < c->nr_constrefs; i++) { - if (c->constref[i].constval == *constval) - return c->constref[i].ref; + for (i = 0; i < c->nr_imm_refs; i++) { + if (c->imm_ref[i].imm_val == *imm1f) + return c->imm_ref[i].ref; } /* Else try to add a new one: */ - if (c->nr_constrefs < BRW_WM_MAX_CONST) { - GLuint i = c->nr_constrefs++; + if (c->nr_imm_refs < BRW_WM_MAX_IMM) { + GLuint i = c->nr_imm_refs++; - /* A constant is a special type of parameter: + /* An immediate is a special type of parameter: */ - c->constref[i].constval = *constval; - c->constref[i].ref = get_param_ref(c, constval); + c->imm_ref[i].imm_val = *imm_val; + c->imm_ref[i].ref = get_param_ref(c, imm_val); - return c->constref[i].ref; + return c->imm_ref[i].ref; } else { - _mesa_printf("%s: out of constrefs\n", __FUNCTION__); + _mesa_printf("%s: out of imm_refs\n", __FUNCTION__); c->prog_data.error = 1; return NULL; } @@ -200,7 +200,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, case PROGRAM_CONSTANT: /* These are invarient: */ - ref = get_const_ref(c, &plist->ParameterValues[idx][component]); + ref = get_imm_ref(c, &plist->ParameterValues[idx][component]); break; case PROGRAM_STATE_VAR: @@ -266,9 +266,9 @@ static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, static const GLfloat const_one = 1.0; if (component == SWIZZLE_ZERO) - src_ref = get_const_ref(c, &const_zero); + src_ref = get_imm_ref(c, &const_zero); else if (component == SWIZZLE_ONE) - src_ref = get_const_ref(c, &const_one); + src_ref = get_imm_ref(c, &const_one); else src_ref = pass0_get_reg(c, src.File, src.Index, component); diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index c76bff9181..ec9c859fcb 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -554,7 +554,8 @@ st_draw_vbo(GLcontext *ctx, /* Gallium probably doesn't want this in some cases. */ if (!index_bounds_valid) - vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index); + if (!vbo_all_varyings_in_vbos(arrays)) + vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index); /* sanity check for pointer arithmetic below */ assert(sizeof(arrays[0]->Ptr[0]) == 1); -- cgit v1.2.3 From 562ca4eae257dd3b268e7f13487c8cd91f618eae Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 25 Oct 2009 01:20:56 +0100 Subject: i965g: more compiling wip --- src/gallium/drivers/i965/brw_context.h | 15 +- src/gallium/drivers/i965/brw_curbe.c | 3 +- src/gallium/drivers/i965/brw_gs.c | 48 +++---- src/gallium/drivers/i965/brw_gs.h | 4 +- src/gallium/drivers/i965/brw_gs_state.c | 21 +-- src/gallium/drivers/i965/brw_misc_state.c | 222 ++++++++++++++---------------- src/gallium/drivers/i965/brw_pipe_blend.c | 19 +++ src/gallium/drivers/i965/brw_pipe_rast.c | 20 +++ src/gallium/drivers/i965/brw_screen.h | 7 + src/gallium/drivers/i965/brw_sf.c | 2 +- src/gallium/drivers/i965/brw_state.h | 4 +- src/gallium/drivers/i965/brw_urb.c | 3 +- src/gallium/drivers/i965/brw_vs.c | 4 +- src/gallium/drivers/i965/brw_vs_emit.c | 67 +++++---- src/gallium/drivers/i965/brw_wm.c | 2 +- src/gallium/drivers/i965/brw_wm.h | 2 +- 16 files changed, 243 insertions(+), 200 deletions(-) (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 7ead641811..2e17e150bb 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -184,6 +184,8 @@ struct brw_fragment_shader { #define PIPE_NEW_CLIP 0x2 #define PIPE_NEW_INDEX_BUFFER 0x2 #define PIPE_NEW_INDEX_RANGE 0x2 +#define PIPE_NEW_BLEND_COLOR 0x2 +#define PIPE_NEW_POLYGON_STIPPLE 0x2 #define BRW_NEW_URB_FENCE 0x1 @@ -202,7 +204,9 @@ struct brw_fragment_shader { #define BRW_NEW_VERTICES 0x8000 /** * Used for any batch entry with a relocated pointer that will be used - * by any 3D rendering. + * by any 3D rendering. Need to re-emit these fresh in each + * batchbuffer as the referenced buffers may be relocated in the + * meantime. */ #define BRW_NEW_BATCH 0x10000 /** brw->depth_region updated */ @@ -271,7 +275,7 @@ struct brw_vs_prog_data { GLuint curb_read_length; GLuint urb_read_length; GLuint total_grf; - GLuint outputs_written; + GLuint nr_outputs_written; GLuint nr_params; /**< number of float params/constants */ GLuint inputs_read; @@ -487,6 +491,9 @@ struct brw_context struct pipe_buffer *vertex_constants; struct pipe_buffer *fragment_constants; + struct brw_blend_constant_color bcc; + struct brw_polygon_stipple bps; + /** * Index buffer for this draw_prims call. * @@ -726,11 +733,11 @@ void brw_init_shader_funcs( struct brw_context *brw ); /* brw_urb.c */ -void brw_upload_urb_fence(struct brw_context *brw); +int brw_upload_urb_fence(struct brw_context *brw); /* brw_curbe.c */ -void brw_upload_cs_urb_state(struct brw_context *brw); +int brw_upload_cs_urb_state(struct brw_context *brw); /* brw_disasm.c */ int brw_disasm (FILE *file, struct brw_instruction *inst); diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 278ffa4ca2..3dd08f6eeb 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -126,7 +126,7 @@ const struct brw_tracked_state brw_curbe_offsets = { * fixed-function hardware in a double-buffering scheme to avoid a * pipeline stall each time the contents of the curbe is changed. */ -void brw_upload_cs_urb_state(struct brw_context *brw) +int brw_upload_cs_urb_state(struct brw_context *brw) { struct brw_cs_urb_state cs_urb; memset(&cs_urb, 0, sizeof(cs_urb)); @@ -144,6 +144,7 @@ void brw_upload_cs_urb_state(struct brw_context *brw) assert(brw->urb.nr_cs_entries); BRW_CACHED_BATCH_STRUCT(brw, &cs_urb); + return 0; } static GLfloat fixed_plane[6][4] = { diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 692ce46679..3ecaa74e4f 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -54,7 +54,7 @@ static void compile_gs_prog( struct brw_context *brw, /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ - c.nr_attrs = util_count_bits(c.key.attrs); + c.nr_attrs = c.key.nr_attrs; if (BRW_IS_IGDNG(brw)) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ @@ -80,30 +80,30 @@ static void compile_gs_prog( struct brw_context *brw, * already been weeded out by this stage: */ switch (key->primitive) { - case GL_QUADS: + case PIPE_PRIM_QUADS: brw_gs_quads( &c ); break; - case GL_QUAD_STRIP: + case PIPE_PRIM_QUAD_STRIP: brw_gs_quad_strip( &c ); break; - case GL_LINE_LOOP: + case PIPE_PRIM_LINE_LOOP: brw_gs_lines( &c ); break; - case GL_LINES: + case PIPE_PRIM_LINES: if (key->hint_gs_always) brw_gs_lines( &c ); else { return; } break; - case GL_TRIANGLES: + case PIPE_PRIM_TRIANGLES: if (key->hint_gs_always) brw_gs_tris( &c ); else { return; } break; - case GL_POINTS: + case PIPE_PRIM_POINTS: if (key->hint_gs_always) brw_gs_points( &c ); else { @@ -129,17 +129,17 @@ static void compile_gs_prog( struct brw_context *brw, &brw->gs.prog_data ); } -static const GLenum gs_prim[GL_POLYGON+1] = { - GL_POINTS, - GL_LINES, - GL_LINE_LOOP, - GL_LINES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_QUADS, - GL_QUAD_STRIP, - GL_TRIANGLES +static const unsigned gs_prim[PIPE_PRIM_MAX] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINE_LOOP, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_QUADS, + PIPE_PRIM_QUAD_STRIP, + PIPE_PRIM_TRIANGLES }; static void populate_key( struct brw_context *brw, @@ -148,7 +148,7 @@ static void populate_key( struct brw_context *brw, memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG */ - key->attrs = brw->vs.prog_data->outputs_written; + key->nr_attrs = brw->vs.prog_data->nr_outputs_written; /* BRW_NEW_PRIMITIVE */ key->primitive = gs_prim[brw->primitive]; @@ -156,14 +156,14 @@ static void populate_key( struct brw_context *brw, key->hint_gs_always = 0; /* debug code? */ key->need_gs_prog = (key->hint_gs_always || - brw->primitive == GL_QUADS || - brw->primitive == GL_QUAD_STRIP || - brw->primitive == GL_LINE_LOOP); + brw->primitive == PIPE_PRIM_QUADS || + brw->primitive == PIPE_PRIM_QUAD_STRIP || + brw->primitive == PIPE_PRIM_LINE_LOOP); } /* Calculate interpolants for triangle and line rasterization. */ -static void prepare_gs_prog(struct brw_context *brw) +static int prepare_gs_prog(struct brw_context *brw) { struct brw_gs_prog_key key; /* Populate the key: @@ -184,6 +184,8 @@ static void prepare_gs_prog(struct brw_context *brw) if (brw->gs.prog_bo == NULL) compile_gs_prog( brw, &key ); } + + return 0; } diff --git a/src/gallium/drivers/i965/brw_gs.h b/src/gallium/drivers/i965/brw_gs.h index bbb991ea2e..6e616dcb87 100644 --- a/src/gallium/drivers/i965/brw_gs.h +++ b/src/gallium/drivers/i965/brw_gs.h @@ -40,11 +40,11 @@ #define MAX_GS_VERTS (4) struct brw_gs_prog_key { - GLuint attrs:32; + GLuint nr_attrs:8; GLuint primitive:4; GLuint hint_gs_always:1; GLuint need_gs_prog:1; - GLuint pad:26; + GLuint pad:18; }; struct brw_gs_compile { diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c index 6d03d72d96..15a66c9741 100644 --- a/src/gallium/drivers/i965/brw_gs_state.c +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -29,11 +29,12 @@ * Keith Whitwell */ - +#include "util/u_math.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_debug.h" struct brw_gs_unit_key { unsigned int total_grf; @@ -76,7 +77,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) memset(&gs, 0, sizeof(gs)); - gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; if (key->prog_active) /* reloc */ gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; @@ -100,7 +101,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) if (BRW_IS_IGDNG(brw)) gs.thread4.rendering_enable = 1; - if (INTEL_DEBUG & DEBUG_STATS) + if (BRW_DEBUG & DEBUG_STATS) gs.thread4.stats_enable = 1; bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, @@ -111,17 +112,17 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) if (key->prog_active) { /* Emit GS program relocation */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - gs.thread0.grf_reg_count << 1, - offsetof(struct brw_gs_unit_state, thread0), - brw->gs.prog_bo); + brw->sws->bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + gs.thread0.grf_reg_count << 1, + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_bo); } return bo; } -static void prepare_gs_unit(struct brw_context *brw) +static int prepare_gs_unit(struct brw_context *brw) { struct brw_gs_unit_key key; @@ -135,6 +136,8 @@ static void prepare_gs_unit(struct brw_context *brw) if (brw->gs.state_bo == NULL) { brw->gs.state_bo = gs_unit_create_from_key(brw, &key); } + + return 0; } const struct brw_tracked_state brw_gs_unit = { diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index 98fec85c1d..ccebe08b4f 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -31,10 +31,12 @@ +#include "brw_debug.h" #include "brw_batchbuffer.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_screen.h" @@ -44,25 +46,16 @@ * Blend color */ -static void upload_blend_constant_color(struct brw_context *brw) +static int upload_blend_constant_color(struct brw_context *brw) { - struct brw_blend_constant_color bcc; - - memset(&bcc, 0, sizeof(bcc)); - bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; - bcc.header.length = sizeof(bcc)/4-2; - bcc.blend_constant_color[0] = ctx->Color.BlendColor[0]; - bcc.blend_constant_color[1] = ctx->Color.BlendColor[1]; - bcc.blend_constant_color[2] = ctx->Color.BlendColor[2]; - bcc.blend_constant_color[3] = ctx->Color.BlendColor[3]; - - BRW_CACHED_BATCH_STRUCT(brw, &bcc); + BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bcc); + return 0; } const struct brw_tracked_state brw_blend_constant_color = { .dirty = { - .mesa = _NEW_COLOR, + .mesa = PIPE_NEW_BLEND_COLOR, .brw = 0, .cache = 0 }, @@ -70,30 +63,32 @@ const struct brw_tracked_state brw_blend_constant_color = { }; /* Constant single cliprect for framebuffer object or DRI2 drawing */ -static void upload_drawing_rect(struct brw_context *brw) +static int upload_drawing_rect(struct brw_context *brw) { BEGIN_BATCH(4, NO_LOOP_CLIPRECTS); OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965); OUT_BATCH(0); - OUT_BATCH(((brw->fb.width - 1) & 0xffff) | - ((brw->fb.height - 1) << 16)); + OUT_BATCH(((brw->curr.fb.width - 1) & 0xffff) | + ((brw->curr.fb.height - 1) << 16)); OUT_BATCH(0); ADVANCE_BATCH(); + return 0; } const struct brw_tracked_state brw_drawing_rect = { .dirty = { - .mesa = _NEW_BUFFERS, + .mesa = PIPE_NEW_FRAMEBUFFER, .brw = 0, .cache = 0 }, .emit = upload_drawing_rect }; -static void prepare_binding_table_pointers(struct brw_context *brw) +static int prepare_binding_table_pointers(struct brw_context *brw) { brw_add_validated_bo(brw, brw->vs.bind_bo); brw_add_validated_bo(brw, brw->wm.bind_bo); + return 0; } /** @@ -103,7 +98,7 @@ static void prepare_binding_table_pointers(struct brw_context *brw) * The binding table pointers are relative to the surface state base address, * which is 0. */ -static void upload_binding_table_pointers(struct brw_context *brw) +static int upload_binding_table_pointers(struct brw_context *brw) { BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); @@ -116,6 +111,7 @@ static void upload_binding_table_pointers(struct brw_context *brw) OUT_BATCH(0); /* sf */ OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ ADVANCE_BATCH(); + return 0; } const struct brw_tracked_state brw_binding_table_pointers = { @@ -135,7 +131,7 @@ const struct brw_tracked_state brw_binding_table_pointers = { * The state pointers in this packet are all relative to the general state * base address set by CMD_STATE_BASE_ADDRESS, which is 0. */ -static void upload_pipelined_state_pointers(struct brw_context *brw ) +static int upload_pipelined_state_pointers(struct brw_context *brw ) { BEGIN_BATCH(7, IGNORE_CLIPRECTS); OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); @@ -151,10 +147,11 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) ADVANCE_BATCH(); brw->state.dirty.brw |= BRW_NEW_PSP; + return 0; } -static void prepare_psp_urb_cbs(struct brw_context *brw) +static int prepare_psp_urb_cbs(struct brw_context *brw) { brw_add_validated_bo(brw, brw->vs.state_bo); brw_add_validated_bo(brw, brw->gs.state_bo); @@ -162,13 +159,26 @@ static void prepare_psp_urb_cbs(struct brw_context *brw) brw_add_validated_bo(brw, brw->sf.state_bo); brw_add_validated_bo(brw, brw->wm.state_bo); brw_add_validated_bo(brw, brw->cc.state_bo); + return 0; } -static void upload_psp_urb_cbs(struct brw_context *brw ) +static int upload_psp_urb_cbs(struct brw_context *brw ) { - upload_pipelined_state_pointers(brw); - brw_upload_urb_fence(brw); - brw_upload_cs_urb_state(brw); + int ret; + + ret = upload_pipelined_state_pointers(brw); + if (ret) + return ret; + + ret = brw_upload_urb_fence(brw); + if (ret) + return ret; + + ret = brw_upload_cs_urb_state(brw); + if (ret) + return ret; + + return 0; } const struct brw_tracked_state brw_psp_urb_cbs = { @@ -187,20 +197,22 @@ const struct brw_tracked_state brw_psp_urb_cbs = { .emit = upload_psp_urb_cbs, }; -static void prepare_depthbuffer(struct brw_context *brw) +static int prepare_depthbuffer(struct brw_context *brw) { - struct intel_region *region = brw->state.depth_region; + struct pipe_surface *zsbuf = brw->curr.fb.zsbuf; - if (region != NULL) - brw_add_validated_bo(brw, region->buffer); + if (zsbuf) + brw_add_validated_bo(brw, brw_surface_bo(zsbuf)); + + return 0; } -static void emit_depthbuffer(struct brw_context *brw) +static int emit_depthbuffer(struct brw_context *brw) { - struct intel_region *region = brw->state.depth_region; + struct pipe_surface *surface = brw->curr.fb.zsbuf; unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; - if (region == NULL) { + if (surface == NULL) { BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | @@ -214,38 +226,45 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } else { + struct brw_winsys_buffer *bo; unsigned int format; + unsigned int pitch; + unsigned int cpp; - switch (region->cpp) { - case 2: + switch (surface->format) { + case PIPE_FORMAT_Z16_UNORM: format = BRW_DEPTHFORMAT_D16_UNORM; + cpp = 2; + break; + case PIPE_FORMAT_Z24S8_UNORM: + format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + cpp = 4; break; - case 4: - if (intel->depth_buffer_is_float) - format = BRW_DEPTHFORMAT_D32_FLOAT; - else - format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + case PIPE_FORMAT_Z32_FLOAT: + format = BRW_DEPTHFORMAT_D32_FLOAT; + cpp = 4; break; default: assert(0); - return; + return PIPE_ERROR_BAD_INPUT; } - assert(region->tiling != I915_TILING_X); + bo = brw_surface_bo(surface); + pitch = brw_surface_pitch(surface); BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); - OUT_BATCH(((region->pitch * region->cpp) - 1) | + OUT_BATCH(((pitch * cpp) - 1) | (format << 18) | (BRW_TILEWALK_YMAJOR << 26) | - ((region->tiling != I915_TILING_NONE) << 27) | + ((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) | (BRW_SURFACE_2D << 29)); - OUT_RELOC(region->buffer, + OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); + surface->offset); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | - ((region->pitch - 1) << 6) | - ((region->height - 1) << 19)); + ((pitch - 1) << 6) | + ((surface->height - 1) << 19)); OUT_BATCH(0); if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) @@ -253,6 +272,8 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } + + return 0; } const struct brw_tracked_state brw_depthbuffer = { @@ -271,37 +292,15 @@ const struct brw_tracked_state brw_depthbuffer = { * Polygon stipple packet */ -static void upload_polygon_stipple(struct brw_context *brw) +static int upload_polygon_stipple(struct brw_context *brw) { - struct brw_polygon_stipple bps; - GLuint i; - - memset(&bps, 0, sizeof(bps)); - bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; - bps.header.length = sizeof(bps)/4-2; - - /* Polygon stipple is provided in OpenGL order, i.e. bottom - * row first. If we're rendering to a window (i.e. the - * default frame buffer object, 0), then we need to invert - * it to match our pixel layout. But if we're rendering - * to a FBO (i.e. any named frame buffer object), we *don't* - * need to invert - we already match the layout. - */ - if (ctx->DrawBuffer->Name == 0) { - for (i = 0; i < 32; i++) - bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */ - } - else { - for (i = 0; i < 32; i++) - bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */ - } - - BRW_CACHED_BATCH_STRUCT(brw, &bps); + BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bps); + return 0; } const struct brw_tracked_state brw_polygon_stipple = { .dirty = { - .mesa = _NEW_POLYGONSTIPPLE, + .mesa = PIPE_NEW_POLYGON_STIPPLE, .brw = 0, .cache = 0 }, @@ -313,37 +312,26 @@ const struct brw_tracked_state brw_polygon_stipple = { * Polygon stipple offset packet */ -static void upload_polygon_stipple_offset(struct brw_context *brw) +static int upload_polygon_stipple_offset(struct brw_context *brw) { struct brw_polygon_stipple_offset bpso; + /* This is invarient state in gallium: + */ memset(&bpso, 0, sizeof(bpso)); bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; bpso.header.length = sizeof(bpso)/4-2; - - /* Never need to offset stipple coordinates. - * - * XXX: is it ever necessary to invert Y values? - */ - if (0) { - int x = 0, y = 0, h = 0; - bpso.bits0.x_offset = (32 - (x & 31)) & 31; - bpso.bits0.y_offset = (32 - ((y + h) & 31)) & 31; - } - else { - bpso.bits0.y_offset = 0; - bpso.bits0.x_offset = 0; - } + bpso.bits0.y_offset = 0; + bpso.bits0.x_offset = 0; BRW_CACHED_BATCH_STRUCT(brw, &bpso); + return 0; } -#define _NEW_WINDOW_POS 0x40000000 - const struct brw_tracked_state brw_polygon_stipple_offset = { .dirty = { - .mesa = _NEW_WINDOW_POS, - .brw = 0, + .mesa = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_polygon_stipple_offset @@ -352,12 +340,12 @@ const struct brw_tracked_state brw_polygon_stipple_offset = { /********************************************************************** * AA Line parameters */ -static void upload_aa_line_parameters(struct brw_context *brw) +static int upload_aa_line_parameters(struct brw_context *brw) { struct brw_aa_line_parameters balp; if (BRW_IS_965(brw)) - return; + return 0; /* use legacy aa line coverage computation */ memset(&balp, 0, sizeof(balp)); @@ -365,6 +353,7 @@ static void upload_aa_line_parameters(struct brw_context *brw) balp.header.length = sizeof(balp) / 4 - 2; BRW_CACHED_BATCH_STRUCT(brw, &balp); + return 0; } const struct brw_tracked_state brw_aa_line_parameters = { @@ -380,31 +369,16 @@ const struct brw_tracked_state brw_aa_line_parameters = { * Line stipple packet */ -static void upload_line_stipple(struct brw_context *brw) +static int upload_line_stipple(struct brw_context *brw) { - struct brw_line_stipple bls; - GLfloat tmp; - GLint tmpi; - - memset(&bls, 0, sizeof(bls)); - bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; - bls.header.length = sizeof(bls)/4 - 2; - - bls.bits0.pattern = ctx->Line.StipplePattern; - bls.bits1.repeat_count = ctx->Line.StippleFactor; - - tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; - tmpi = tmp * (1<<13); - - - bls.bits1.inverse_repeat_count = tmpi; - - BRW_CACHED_BATCH_STRUCT(brw, &bls); + struct brw_line_stipple *bls = NULL; //brw->curr.rast->bls; + BRW_CACHED_BATCH_STRUCT(brw, bls); + return 0; } const struct brw_tracked_state brw_line_stipple = { .dirty = { - .mesa = _NEW_LINE, + .mesa = PIPE_NEW_RAST, .brw = 0, .cache = 0 }, @@ -416,7 +390,7 @@ const struct brw_tracked_state brw_line_stipple = { * Misc invarient state packets */ -static void upload_invarient_state( struct brw_context *brw ) +static int upload_invarient_state( struct brw_context *brw ) { { /* 0x61040000 Pipeline Select */ @@ -424,7 +398,10 @@ static void upload_invarient_state( struct brw_context *brw ) struct brw_pipeline_select ps; memset(&ps, 0, sizeof(ps)); - ps.header.opcode = CMD_PIPELINE_SELECT(brw); + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + ps.header.opcode = CMD_PIPELINE_SELECT_GM45; + else + ps.header.opcode = CMD_PIPELINE_SELECT_965; ps.header.pipeline_select = 0; BRW_BATCH_STRUCT(brw, &ps); } @@ -460,12 +437,18 @@ static void upload_invarient_state( struct brw_context *brw ) struct brw_vf_statistics vfs; memset(&vfs, 0, sizeof(vfs)); - vfs.opcode = CMD_VF_STATISTICS(brw); - if (INTEL_DEBUG & DEBUG_STATS) + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + vfs.opcode = CMD_VF_STATISTICS_GM45; + else + vfs.opcode = CMD_VF_STATISTICS_965; + + if (BRW_DEBUG & DEBUG_STATS) vfs.statistics_enable = 1; BRW_BATCH_STRUCT(brw, &vfs); } + + return 0; } const struct brw_tracked_state brw_invarient_state = { @@ -485,7 +468,7 @@ const struct brw_tracked_state brw_invarient_state = { * state pools. This comes at the expense of memory, and more expensive cache * misses. */ -static void upload_state_base_address( struct brw_context *brw ) +static int upload_state_base_address( struct brw_context *brw ) { /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. @@ -511,6 +494,7 @@ static void upload_state_base_address( struct brw_context *brw ) OUT_BATCH(1); /* Indirect object upper bound */ ADVANCE_BATCH(); } + return 0; } const struct brw_tracked_state brw_state_base_address = { diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c index 17895d2782..54d09d9e45 100644 --- a/src/gallium/drivers/i965/brw_pipe_blend.c +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -43,3 +43,22 @@ if (INTEL_DEBUG & DEBUG_STATS) cc.cc5.statistics_enable = 1; } + + + +static void brw_set_blend_color(struct pipe_context *pipe, + const float *blend_color) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_blend_constant_color *bcc = &brw->curr.blend_color.bcc; + + memset(bcc, 0, sizeof(*bcc)); + bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc->header.length = sizeof(*bcc)/4-2; + bcc->blend_constant_color[0] = blend_color[0]; + bcc->blend_constant_color[1] = blend_color[1]; + bcc->blend_constant_color[2] = blend_color[2]; + bcc->blend_constant_color[3] = blend_color[3]; + + brw->state.dirty.pipe |= PIPE_NEW_BLEND_COLOR; +} diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c index ff64dbd48d..86822d478a 100644 --- a/src/gallium/drivers/i965/brw_pipe_rast.c +++ b/src/gallium/drivers/i965/brw_pipe_rast.c @@ -44,3 +44,23 @@ calculate_clip_key_rast() } } } + + +static void +calculate_line_stipple_rast() +{ + GLfloat tmp; + GLint tmpi; + + memset(&bls, 0, sizeof(bls)); + bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; + bls.header.length = sizeof(bls)/4 - 2; + bls.bits0.pattern = brw->curr.rast.line_stipple_pattern; + bls.bits1.repeat_count = brw->curr.rast.line_stipple_factor + 1; + + tmp = 1.0 / (GLfloat) bls.bits1.repeat_count; + tmpi = tmp * (1<<13); + + bls.bits1.inverse_repeat_count = tmpi; + +} diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index b0be0e1f8a..eafd8ddf77 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -95,4 +95,11 @@ brw_buffer_is_user_buffer( const struct pipe_buffer *buf ) return ((const struct brw_buffer *)buf)->is_user_buffer; } +struct brw_winsys_buffer * +brw_surface_bo( struct pipe_surface *surface ); + +unsigned +brw_surface_pitch( const struct pipe_surface *surface ); + + #endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index e2db2e76e6..1b73b3fd51 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -131,7 +131,7 @@ static void upload_sf_prog(struct brw_context *brw) /* Populate the key, noting state dependencies: */ /* CACHE_NEW_VS_PROG */ - key.attrs = brw->vs.prog_data->outputs_written; + key.attrs = brw->vs.prog_data->nr_outputs_written; /* BRW_NEW_REDUCED_PRIMITIVE */ switch (brw->reduced_primitive) { diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index 9bf34c3fe4..663fc839df 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -33,9 +33,11 @@ #ifndef BRW_STATE_H #define BRW_STATE_H -#include "brw_context.h" +#include "pipe/p_error.h" #include "util/u_memory.h" +#include "brw_context.h" + static inline void brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo) { diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c index 18d79c5ebb..a2277519ad 100644 --- a/src/gallium/drivers/i965/brw_urb.c +++ b/src/gallium/drivers/i965/brw_urb.c @@ -221,7 +221,7 @@ const struct brw_tracked_state brw_recalculate_urb_fence = { -void brw_upload_urb_fence(struct brw_context *brw) +int brw_upload_urb_fence(struct brw_context *brw) { struct brw_urb_fence uf; memset(&uf, 0, sizeof(uf)); @@ -247,4 +247,5 @@ void brw_upload_urb_fence(struct brw_context *brw) uf.bits1.cs_fence = URB_SIZES(brw); BRW_BATCH_STRUCT(brw, &uf); + return 0; } diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index dcd687ac34..010ac115d3 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -51,11 +51,11 @@ static void do_vs_prog( struct brw_context *brw, brw_init_compile(brw, &c.func); c.vp = vp; - c.prog_data.outputs_written = vp->program.Base.OutputsWritten; + c.prog_data.nr_outputs_written = vp->program.Base.OutputsWritten; c.prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { - c.prog_data.outputs_written |= 1<first_overflow_output = 0; if (BRW_IS_IGDNG(c->func.brw)) - mrf = 8; + mrf = 8; else - mrf = 4; + mrf = 4; - for (i = 0; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1 << i)) { - c->nr_outputs++; - assert(i < Elements(c->regs[PROGRAM_OUTPUT])); - if (i == VERT_RESULT_HPOS) { - c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; + for (i = 0; i < c->prog_data.nr_outputs_written; i++) { + c->nr_outputs++; + assert(i < Elements(c->regs[PROGRAM_OUTPUT])); + if (i == VERT_RESULT_HPOS) { + c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + else if (i == VERT_RESULT_PSIZ) { + c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + mrf++; /* just a placeholder? XXX fix later stages & remove this */ + } + else { + if (mrf < 16) { + c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); + mrf++; } - else if (i == VERT_RESULT_PSIZ) { + else { + /* too many vertex results to fit in MRF, use GRF for overflow */ + if (!c->first_overflow_output) + c->first_overflow_output = i; c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; - mrf++; /* just a placeholder? XXX fix later stages & remove this */ - } - else { - if (mrf < 16) { - c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); - mrf++; - } - else { - /* too many vertex results to fit in MRF, use GRF for overflow */ - if (!c->first_overflow_output) - c->first_overflow_output = i; - c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } } } } @@ -238,9 +236,9 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); if (BRW_IS_IGDNG(c->func.brw)) - c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; + c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else - c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; c->prog_data.total_grf = reg; @@ -1050,8 +1048,9 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ - if ((c->prog_data.outputs_written & (1<key.nr_userclip || BRW_IS_965(p->brw)) + if (c->prog_data.writes_psiz || + c->key.nr_userclip || + BRW_IS_965(p->brw)) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1060,7 +1059,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_set_access_mode(p, BRW_ALIGN_16); - if (c->prog_data.outputs_written & (1<prog_data.writes_psiz) { struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; brw_MUL(p, brw_writemask(header1, BRW_WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); brw_AND(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); @@ -1149,12 +1148,10 @@ static void emit_vertex_write( struct brw_vs_compile *c) * at mrf[4] atm... */ GLuint i, mrf = 0; - for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1 << i)) { - /* move from GRF to MRF */ - brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); - mrf++; - } + for (i = c->first_overflow_output; i < c->prog_data.nr_outputs_written; i++) { + /* move from GRF to MRF */ + brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); + mrf++; } brw_urb_WRITE(p, diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 4948ea0dff..764708f7df 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -310,7 +310,7 @@ static void brw_wm_populate_key( struct brw_context *brw, } /* CACHE_NEW_VS_PROG */ - key->vp_outputs_written = brw->vs.prog_data->outputs_written; /* bitmask */ + key->vp_nr_outputs_written = brw->vs.prog_data->nr_outputs_written; /* The unique fragment program ID */ key->program_string_id = fp->id; diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index e06de95a8a..bf241f5fa4 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -76,7 +76,7 @@ struct brw_wm_prog_key { GLuint program_string_id:32; GLuint drawable_height; - GLuint vp_outputs_written; + GLuint vp_nr_outputs_written; }; -- cgit v1.2.3 From 09c231f84a20a306a173b60c82484ce1f9331edf Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 26 Oct 2009 00:20:33 +0000 Subject: i965g: still working on compilation --- src/gallium/auxiliary/tgsi/tgsi_scan.h | 3 + src/gallium/drivers/i965/Makefile | 9 +- src/gallium/drivers/i965/brw_batchbuffer.c | 14 +- src/gallium/drivers/i965/brw_context.h | 18 +- src/gallium/drivers/i965/brw_eu_emit.c | 4 +- src/gallium/drivers/i965/brw_pipe_fb.c | 2 +- src/gallium/drivers/i965/brw_pipe_flush.c | 9 +- src/gallium/drivers/i965/brw_pipe_query.c | 110 +++++++----- src/gallium/drivers/i965/brw_pipe_sampler.c | 81 +++++++++ src/gallium/drivers/i965/brw_screen_surface.c | 156 ++++++++++++++--- src/gallium/drivers/i965/brw_screen_texture.c | 218 ++++++++++++++++++++++++ src/gallium/drivers/i965/brw_sf.c | 80 ++++----- src/gallium/drivers/i965/brw_sf.h | 13 +- src/gallium/drivers/i965/brw_sf_emit.c | 145 +++++++++------- src/gallium/drivers/i965/brw_sf_state.c | 178 +++++++++---------- src/gallium/drivers/i965/brw_state.h | 13 +- src/gallium/drivers/i965/brw_state_batch.c | 8 +- src/gallium/drivers/i965/brw_state_cache.c | 64 ++++--- src/gallium/drivers/i965/brw_state_debug.c | 19 ++- src/gallium/drivers/i965/brw_state_dump.c | 64 +++---- src/gallium/drivers/i965/brw_state_upload.c | 37 ++-- src/gallium/drivers/i965/brw_tex.c | 50 ------ src/gallium/drivers/i965/brw_tex_layout.c | 218 ------------------------ src/gallium/drivers/i965/brw_urb.c | 10 +- src/gallium/drivers/i965/brw_vs.h | 2 +- src/gallium/drivers/i965/brw_vs_emit.c | 20 +-- src/gallium/drivers/i965/brw_vs_state.c | 4 +- src/gallium/drivers/i965/brw_winsys.h | 18 +- src/gallium/drivers/i965/brw_wm.c | 4 +- src/gallium/drivers/i965/brw_wm.h | 36 ++-- src/gallium/drivers/i965/brw_wm_debug.c | 68 ++++---- src/gallium/drivers/i965/brw_wm_emit.c | 8 +- src/gallium/drivers/i965/brw_wm_fp.c | 18 +- src/gallium/drivers/i965/brw_wm_glsl.c | 16 +- src/gallium/drivers/i965/brw_wm_pass0.c | 6 +- src/gallium/drivers/i965/brw_wm_pass1.c | 2 +- src/gallium/drivers/i965/brw_wm_pass2.c | 4 +- src/gallium/drivers/i965/brw_wm_sampler_state.c | 170 ++++-------------- src/gallium/drivers/i965/brw_wm_state.c | 6 +- 39 files changed, 1007 insertions(+), 898 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_screen_texture.c delete mode 100644 src/gallium/drivers/i965/brw_tex.c delete mode 100644 src/gallium/drivers/i965/brw_tex_layout.c (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 8a7ee0c7e4..6754001e88 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -61,6 +61,9 @@ struct tgsi_shader_info boolean uses_kill; /**< KIL or KILP instruction used? */ boolean uses_fogcoord; /**< fragment shader uses fog coord? */ boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */ + + uint texture_max; + uint texture_mask; }; diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 40e8aa8786..c3dbad72ae 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -28,10 +28,7 @@ C_SOURCES = \ brw_pipe_blend.c \ brw_pipe_depth.c \ brw_pipe_fb.c \ - brw_pipe_flush.c \ brw_pipe_query.c \ - brw_pipe_shader.c \ - brw_screen_surface.c \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ @@ -40,8 +37,6 @@ C_SOURCES = \ brw_state_dump.c \ brw_state_upload.c \ brw_swtnl.c \ - brw_tex.c \ - brw_tex_layout.c \ brw_urb.c \ brw_util.c \ brw_vs.c \ @@ -60,8 +55,12 @@ C_SOURCES = \ brw_wm_sampler_state.c \ brw_wm_state.c \ brw_wm_surface_state.c \ + brw_screen_surface.c \ + brw_screen_texture.c \ brw_bo.c \ brw_batchbuffer.c \ + brw_pipe_shader.c \ + brw_pipe_flush.c \ intel_tex_layout.c include ../../Makefile.template diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index 8bcac76ede..45fbd59273 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -105,13 +105,13 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file, } - if (INTEL_DEBUG & DEBUG_BATCH) - fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, + if (BRW_DEBUG & DEBUG_BATCH) + debug_printf("%s:%d: Batchbuffer flush with %db used\n", file, line, used); /* Emit a flush if the bufmgr doesn't do it for us. */ if (intel->always_flush_cache || !intel->ttm) { - *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd(); + *(GLuint *) (batch->ptr) = ((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); batch->ptr += 4; used = batch->ptr - batch->map; } @@ -136,15 +136,15 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file, batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 ); - if (INTEL_DEBUG & DEBUG_BATCH) { + if (BRW_DEBUG & DEBUG_BATCH) { dri_bo_map(batch->buf, GL_FALSE); intel_decode(batch->buf->virtual, used / 4, batch->buf->offset, brw->brw_screen->pci_id); dri_bo_unmap(batch->buf); } - if (INTEL_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "waiting for idle\n"); + if (BRW_DEBUG & DEBUG_SYNC) { + debug_printf("waiting for idle\n"); dri_bo_map(batch->buf, GL_TRUE); dri_bo_unmap(batch->buf); } @@ -166,7 +166,7 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, int ret; if (batch->ptr - batch->map > batch->buf->size) - _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n", + debug_printf ("bad relocation ptr %p map %p offset %d size %d\n", batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); ret = batch->sws->bo_emit_reloc(batch->buf, diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index df43d8ba4d..10c1cf6f33 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -190,6 +190,8 @@ struct brw_fragment_shader { #define PIPE_NEW_FRAMEBUFFER_DIMENSIONS 0x10000 #define PIPE_NEW_DEPTH_BUFFER 0x20000 #define PIPE_NEW_COLOR_BUFFERS 0x40000 +#define PIPE_NEW_QUERY 0x80000 +#define PIPE_NEW_SCISSOR 0x100000 @@ -204,7 +206,7 @@ struct brw_fragment_shader { #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 #define BRW_NEW_PSP 0x800 #define BRW_NEW_WM_SURFACES 0x1000 -#define BRW_NEW_FENCE 0x2000 +#define BRW_NEW_xxx 0x2000 /* was FENCE */ #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 /** @@ -373,6 +375,7 @@ struct brw_cache_item { struct brw_cache { struct brw_context *brw; + struct brw_winsys_screen *sws; struct brw_cache_item **items; GLuint size, n_items; @@ -380,6 +383,7 @@ struct brw_cache { GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */ GLuint aux_size[BRW_MAX_CACHE]; char *name[BRW_MAX_CACHE]; + /* Record of the last BOs chosen for each cache_id. Used to set * brw->state.dirty.cache when a new cache item is chosen. @@ -448,7 +452,7 @@ struct brw_query_object { int last_index; /* Total count of pixels from previous BOs */ - unsigned int count; + uint64_t result; }; @@ -477,11 +481,18 @@ struct brw_context const struct brw_rasterizer_state *rast; const struct brw_depth_stencil_state *zstencil; + const struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + const struct pipe_sampler *sampler[PIPE_MAX_SAMPLERS]; + unsigned num_textures; + unsigned num_samplers; + + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; unsigned num_vertex_elements; unsigned num_vertex_buffers; + struct pipe_scissor_state scissor; struct pipe_framebuffer_state fb; struct pipe_viewport_state vp; struct pipe_clip_state ucp; @@ -492,6 +503,8 @@ struct brw_context struct brw_blend_constant_color bcc; struct brw_polygon_stipple bps; + + /** * Index buffer for this draw_prims call. * @@ -688,6 +701,7 @@ struct brw_context struct brw_winsys_buffer *bo; int index; GLboolean active; + int stats_wm; } query; struct { diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index f6b8843e01..f7fa520348 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -1262,7 +1262,7 @@ void brw_SAMPLE(struct brw_compile *p, GLboolean need_stall = 0; if (writemask == 0) { - /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ + /*debug_printf("%s: zero writemask??\n", __FUNCTION__); */ return; } @@ -1294,7 +1294,7 @@ void brw_SAMPLE(struct brw_compile *p, if (newmask != writemask) { need_stall = 1; - /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ + /* debug_printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index 6391717227..c65f9bc374 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -53,7 +53,7 @@ static void brw_set_viewport_state( struct pipe_context *pipe, void brw_pipe_framebuffer_init( struct brw_context *brw ) { brw->base.set_framebuffer_state = brw_set_framebuffer_state; - brw->base.set_framebuffer_state = brw_set_framebuffer_state; + brw->base.set_viewport_state = brw_set_viewport_state; } void brw_pipe_framebuffer_cleanup( struct brw_context *brw ) diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index 65e7151517..fb4a784de9 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -52,14 +52,7 @@ static void brw_note_fence( struct brw_context *brw, GLuint fence ) */ static GLuint brw_flush_cmd( void ) { - struct brw_mi_flush flush; - - return ; - - flush.opcode = CMD_MI_FLUSH; - flush.pad = 0; - flush.flags = BRW_FLUSH_STATE_CACHE; - return *(GLuint *)&flush; + return ((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); } diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c index a2da1373bf..18a9b71af0 100644 --- a/src/gallium/drivers/i965/brw_pipe_query.c +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -46,25 +46,38 @@ #include "brw_reg.h" /** Waits on the query object's BO and totals the results for this query */ -static void -brw_queryobj_get_results(struct brw_query_object *query) +static boolean +brw_query_get_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64_t *result) { - int i; - uint64_t *results; - - if (query->bo == NULL) - return; + struct brw_context *brw = brw_context(pipe); + struct brw_query_object *query = (struct brw_query_object *)q; /* Map and count the pixels from the current query BO */ - dri_bo_map(query->bo, GL_FALSE); - results = query->bo->virtual; - for (i = query->first_index; i <= query->last_index; i++) { - query->Base.Result += results[i * 2 + 1] - results[i * 2]; + if (query->bo) { + int i; + uint64_t *map; + + if (brw->sws->bo_is_busy(query->bo) && !wait) + return FALSE; + + map = brw->sws->bo_map(query->bo, GL_FALSE); + if (map == NULL) + return FALSE; + + for (i = query->first_index; i <= query->last_index; i++) { + query->result += map[i * 2 + 1] - map[i * 2]; + } + + brw->sws->bo_unmap(query->bo); + brw->sws->bo_unreference(query->bo); + query->bo = NULL; } - dri_bo_unmap(query->bo); - brw->sws->bo_unreference(query->bo); - query->bo = NULL; + *result = query->result; + return TRUE; } static struct pipe_query * @@ -72,12 +85,12 @@ brw_query_create(struct pipe_context *pipe, unsigned type ) { struct brw_query_object *query; - switch (query->type) { + switch (type) { case PIPE_QUERY_OCCLUSION_COUNTER: query = CALLOC_STRUCT( brw_query_object ); if (query == NULL) return NULL; - return &query->Base; + return (struct pipe_query *)query; default: return NULL; @@ -87,6 +100,7 @@ brw_query_create(struct pipe_context *pipe, unsigned type ) static void brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q) { + struct brw_context *brw = brw_context(pipe); struct brw_query_object *query = (struct brw_query_object *)q; brw->sws->bo_unreference(query->bo); @@ -94,24 +108,25 @@ brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q) } static void -brw_begin_query(struct pipe_context *pipe, struct pipe_query *q) +brw_query_begin(struct pipe_context *pipe, struct pipe_query *q) { struct brw_context *brw = brw_context(pipe); struct brw_query_object *query = (struct brw_query_object *)q; /* Reset our driver's tracking of query state. */ brw->sws->bo_unreference(query->bo); + query->result = 0; query->bo = NULL; query->first_index = -1; query->last_index = -1; insert_at_head(&brw->query.active_head, query); - brw->stats_wm++; - brw->dirty.mesa |= PIPE_NEW_QUERY; + brw->query.stats_wm++; + brw->state.dirty.mesa |= PIPE_NEW_QUERY; } static void -brw_end_query(struct pipe_context *pipe, struct pipe_query *q) +brw_query_end(struct pipe_context *pipe, struct pipe_query *q) { struct brw_context *brw = brw_context(pipe); struct brw_query_object *query = (struct brw_query_object *)q; @@ -129,27 +144,13 @@ brw_end_query(struct pipe_context *pipe, struct pipe_query *q) } remove_from_list(query); - brw->stats_wm--; - brw->dirty.mesa |= PIPE_NEW_QUERY; + brw->query.stats_wm--; + brw->state.dirty.mesa |= PIPE_NEW_QUERY; } -static void brw_wait_query(struct pipe_context *pipe, struct pipe_query *q) -{ - struct brw_query_object *query = (struct brw_query_object *)q; - - brw_queryobj_get_results(query); - query->Base.Ready = GL_TRUE; -} - -static void brw_check_query(struct pipe_context *pipe, struct pipe_query *q) -{ - struct brw_query_object *query = (struct brw_query_object *)q; - - if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) { - brw_queryobj_get_results(query); - query->Base.Ready = GL_TRUE; - } -} +/*********************************************************************** + * Internal functions and callbacks to implement queries + */ /** Called to set up the query BO and account for its aperture space */ void @@ -201,8 +202,17 @@ brw_emit_query_begin(struct brw_context *brw) foreach(query, &brw->query.active_head) { if (query->bo != brw->query.bo) { + uint64_t tmp; + + /* Propogate the results from this buffer to all of the + * active queries, as the bo is going away. + */ if (query->bo != NULL) - brw_queryobj_get_results(query); + brw_query_get_result( &brw->base, + (struct pipe_query *)query, + FALSE, + &tmp ); + brw->sws->bo_reference(brw->query.bo); query->bo = brw->query.bo; query->first_index = brw->query.index; @@ -235,12 +245,18 @@ brw_emit_query_end(struct brw_context *brw) brw->query.index++; } -void brw_init_queryobj_functions(struct dd_function_table *functions) +void brw_pipe_query_init( struct brw_context *brw ) { - functions->NewQueryObject = brw_new_query_object; - functions->DeleteQuery = brw_delete_query; - functions->BeginQuery = brw_begin_query; - functions->EndQuery = brw_end_query; - functions->CheckQuery = brw_check_query; - functions->WaitQuery = brw_wait_query; + brw->base.create_query = brw_query_create; + brw->base.destroy_query = brw_query_destroy; + brw->base.begin_query = brw_query_begin; + brw->base.end_query = brw_query_end; + brw->base.get_query_result = brw_query_get_result; +} + + +void brw_pipe_query_cleanup( struct brw_context *brw ) +{ + /* Unreference brw->query.bo ?? + */ } diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c index b3069f08c0..bc20eef6fb 100644 --- a/src/gallium/drivers/i965/brw_pipe_sampler.c +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -14,6 +14,87 @@ static void *brw_create_sampler_state( struct pipe_context *pipe, { struct brw_sampler_state *sampler = CALLOC_STRUCT(brw_sampler_state); + switch (key->minfilter) { + case GL_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + break; + case GL_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + break; + case GL_NEAREST_MIPMAP_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; + break; + case GL_LINEAR_MIPMAP_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; + break; + case GL_LINEAR_MIPMAP_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; + break; + default: + break; + } + + /* Set Anisotropy: + */ + if (key->max_aniso > 1.0) { + sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; + sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; + + if (key->max_aniso > 2.0) { + sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2, + BRW_ANISORATIO_16); + } + } + else { + switch (key->magfilter) { + case GL_NEAREST: + sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + break; + case GL_LINEAR: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + default: + break; + } + } + + sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); + sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); + + /* Set LOD bias: + */ + sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6); + + sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ + sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ + + /* Set shadow function: + */ + if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) { + /* Shadowing is "enabled" by emitting a particular sampler + * message (sample_c). So need to recompile WM program when + * shadow comparison is enabled on each/any texture unit. + */ + sampler->ss0.shadow_function = + intel_translate_shadow_compare_func(key->comparefunc); + } + + /* Set BaseMipLevel, MaxLOD, MinLOD: + */ + sampler->ss0.base_level = U_FIXED(0, 1); + + sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6); + sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6); return (void *)sampler; } diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c index 544be6a089..e0df6cc629 100644 --- a/src/gallium/drivers/i965/brw_screen_surface.c +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -1,27 +1,131 @@ - /* _NEW_BUFFERS */ - if (IS_965(brw->brw_screen->pci_id) && - !IS_G4X(brw->brw_screen->pci_id)) { - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - - /* The original gen4 hardware couldn't set up WM surfaces pointing - * at an offset within a tile, which can happen when rendering to - * anything but the base level of a texture or the +X face/0 depth. - * This was fixed with the 4 Series hardware. - * - * For these original chips, you would have to make the depth and - * color destination surfaces include information on the texture - * type, LOD, face, and various limits to use them as a destination. - * I would have done this, but there's also a nasty requirement that - * the depth and the color surfaces all be of the same LOD, which - * may be a worse requirement than this alignment. (Also, we may - * want to just demote the texture to untiled, instead). - */ - if (irb->region && - irb->region->tiling != I915_TILING_NONE && - (irb->region->draw_offset & 4095)) { - DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n"); - return GL_TRUE; - } + +#include "pipe/p_screen.h" +#include "brw_screen.h" + +struct brw_surface_id { + unsigned face:3; + unsigned zslice:13; + unsigned level:16; +}; + +static boolean need_linear_view( struct brw_screen *brw_screen, + struct brw_texture *brw_texture, + unsigned face, + unsigned level, + unsigned zslice ) +{ +#if 0 + /* XXX: what about IDGNG? + */ + if (!BRW_IS_G4X(brw->brw_screen->pci_id)) + { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + /* The original gen4 hardware couldn't set up WM surfaces pointing + * at an offset within a tile, which can happen when rendering to + * anything but the base level of a texture or the +X face/0 depth. + * This was fixed with the 4 Series hardware. + * + * For these original chips, you would have to make the depth and + * color destination surfaces include information on the texture + * type, LOD, face, and various limits to use them as a destination. + * + * This is easy in Gallium as surfaces are all backed by + * textures, but there's also a nasty requirement that the depth + * and the color surfaces all be of the same LOD, which is + * harder to get around as we can't look at a surface in + * isolation and decide if it's legal. + * + * Instead, end up being pessimistic and say that for i965, + * ... ?? + */ + if (brw_tex->tiling != I915_TILING_NONE && + (brw_tex_image_offset(brw_tex, face, level, zslize) & 4095)) { + if (BRW_DEBUG & DEBUG_VIEW) + debug_printf("%s: need surface view for non-aligned tex image\n", + __FUNCTION__); + return GL_TRUE; } + } +#endif + + /* Tiled 3d textures don't have subsets that look like 2d surfaces: + */ + + /* Everything else should be fine to render to in-place: + */ + return GL_FALSE; +} + +/* Look at all texture views and figure out if any of them need to be + * back-copied into the texture for sampling + */ +void brw_update_texture( struct pipe_screen *screen, + struct pipe_texture *texture ) +{ + /* currently nothing to do */ +} + + +static struct pipe_surface *create_linear_view( struct brw_screen *brw_screen, + struct brw_texture *brw_tex, + struct brw_surface_id id ) +{ + +} + +static struct pipe_surface *create_in_place_view( struct brw_screen *brw_screen, + struct brw_texture *brw_tex, + struct brw_surface_id id ) +{ + struct brw_surface *surface = CALLOC_STRUCT(brw_surface); + surface->id = id; + +} + +/* Get a surface which is view into a texture + */ +struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned face, unsigned level, + unsigned zslice, + unsigned usage ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_surface_id id; + + id.face = face; + id.level = level; + id.zslice = zslice; + + if (need_linear_view(brw_screen, brw_tex, id)) + type = BRW_VIEW_LINEAR; + else + type = BRW_VIEW_IN_PLACE; + + + foreach (surface, texture->views[type]) { + if (id.value == surface->id.value) + return surface; + } + + switch (type) { + case BRW_VIEW_LINEAR: + surface = create_linear_view( texture, id, type ); + break; + case BRW_VIEW_IN_PLACE: + surface = create_in_place_view( texture, id, type ); + break; + default: + return NULL; + } + + insert_at_head( texture->views[type], surface ); + return surface; +} + + +void brw_tex_surface_destroy( struct pipe_surface *surface ) +{ +} diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c new file mode 100644 index 0000000000..50c30878c6 --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -0,0 +1,218 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +/* Code to layout images in a mipmap tree for i965. + */ + +#include "brw_tex_layout.h" + +#define FILE_DEBUG_FLAG DEBUG_MIPTREE + +GLboolean brw_miptree_layout(struct brw_context *brw, + struct intel_mipmap_tree *mt, + uint32_t tiling) +{ + /* XXX: these vary depending on image format: */ + /* GLint align_w = 4; */ + + switch (mt->target) { + case GL_TEXTURE_CUBE_MAP: + if (IS_IGDNG(brw->brw_screen->pci_id)) { + GLuint align_h = 2, align_w = 4; + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = mt->width0; + GLuint height = mt->height0; + GLuint qpitch = 0; + GLuint y_pitch = 0; + + mt->pitch = mt->width0; + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + y_pitch = ALIGN(height, align_h); + + if (mt->compressed) { + mt->pitch = ALIGN(mt->width0, align_w); + } + + if (mt->last_level != 0) { + GLuint mip1_width; + + if (mt->compressed) { + mip1_width = ALIGN(minify(mt->width0), align_w) + + ALIGN(minify(minify(mt->width0)), align_w); + } else { + mip1_width = ALIGN(minify(mt->width0), align_w) + + minify(minify(mt->width0)); + } + + if (mip1_width > mt->pitch) { + mt->pitch = mip1_width; + } + } + + mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); + + if (mt->compressed) { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; + } else { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; + } + + for (level = 0; level <= mt->last_level; level++) { + GLuint img_height; + GLuint nr_images = 6; + GLuint q = 0; + + intel_miptree_set_level_info(mt, level, nr_images, x, y, width, + height, 1); + + for (q = 0; q < nr_images; q++) + intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch); + + if (mt->compressed) + img_height = MAX2(1, height/4); + else + img_height = ALIGN(height, align_h); + + if (level == 1) { + x += ALIGN(width, align_w); + } + else { + y += img_height; + } + + width = minify(width); + height = minify(height); + } + + break; + } + + case GL_TEXTURE_3D: { + GLuint width = mt->width0; + GLuint height = mt->height0; + GLuint depth = mt->depth0; + GLuint pack_x_pitch, pack_x_nr; + GLuint pack_y_pitch; + GLuint level; + GLuint align_h = 2; + GLuint align_w = 4; + + mt->total_height = 0; + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + + if (mt->compressed) { + mt->pitch = ALIGN(width, align_w); + pack_y_pitch = (height + 3) / 4; + } else { + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); + pack_y_pitch = ALIGN(mt->height0, align_h); + } + + pack_x_pitch = width; + pack_x_nr = 1; + + for (level = 0 ; level <= mt->last_level ; level++) { + GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; + GLint x = 0; + GLint y = 0; + GLint q, j; + + intel_miptree_set_level_info(mt, level, nr_images, + 0, mt->total_height, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + intel_miptree_set_image_offset(mt, level, q, x, y); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + mt->total_height += y; + width = minify(width); + height = minify(height); + depth = minify(depth); + + if (mt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > ALIGN(width, align_w)) { + pack_x_pitch = ALIGN(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = ALIGN(pack_y_pitch, align_h); + } + } + + } + /* The 965's sampler lays cachelines out according to how accesses + * in the texture surfaces run, so they may be "vertical" through + * memory. As a result, the docs say in Surface Padding Requirements: + * Sampling Engine Surfaces that two extra rows of padding are required. + * We don't know of similar requirements for pre-965, but given that + * those docs are silent on padding requirements in general, let's play + * it safe. + */ + if (mt->target == GL_TEXTURE_CUBE_MAP) + mt->total_height += 2; + break; + } + + default: + i945_miptree_layout_2d(intel, mt, tiling); + break; + } + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + mt->pitch, + mt->total_height, + mt->cpp, + mt->pitch * mt->total_height * mt->cpp ); + + return GL_TRUE; +} + diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index 1b73b3fd51..013d839e37 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -29,11 +29,12 @@ * Keith Whitwell */ +#include "pipe/p_state.h" #include "brw_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" +#include "brw_pipe_rast.h" #include "brw_eu.h" #include "brw_util.h" #include "brw_sf.h" @@ -45,7 +46,6 @@ static void compile_sf_prog( struct brw_context *brw, struct brw_sf_compile c; const GLuint *program; GLuint program_size; - GLuint i, idx; memset(&c, 0, sizeof(c)); @@ -54,7 +54,7 @@ static void compile_sf_prog( struct brw_context *brw, brw_init_compile(brw, &c.func); c.key = *key; - c.nr_attrs = util_count_bits(c.key.attrs); + c.nr_attrs = c.key.nr_attrs; c.nr_attr_regs = (c.nr_attrs+1)/2; c.nr_setup_attrs = c.key.nr_attrs; c.nr_setup_regs = (c.nr_setup_attrs+1)/2; @@ -62,21 +62,6 @@ static void compile_sf_prog( struct brw_context *brw, c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; - /* Construct map from attribute number to position in the vertex. - */ - for (i = idx = 0; i < VERT_RESULT_MAX; i++) - if (c.key.attrs & (1<= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { - c.point_attrs[i].CoordReplace = - ctx->Point.CoordReplace[i - VERT_RESULT_TEX0]; - } - else { - c.point_attrs[i].CoordReplace = GL_FALSE; - } - idx++; - } /* Which primitive? Or all three? */ @@ -122,7 +107,7 @@ static void compile_sf_prog( struct brw_context *brw, /* Calculate interpolants for triangle and line rasterization. */ -static void upload_sf_prog(struct brw_context *brw) +static int upload_sf_prog(struct brw_context *brw) { struct brw_sf_prog_key key; @@ -131,46 +116,49 @@ static void upload_sf_prog(struct brw_context *brw) /* Populate the key, noting state dependencies: */ /* CACHE_NEW_VS_PROG */ - key.attrs = brw->vs.prog_data->nr_outputs_written; + key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_OUTPUT] + 1; + + + /* XXX: this is probably where the mapping between vertex shader + * outputs and fragment shader inputs should be handled. Assume + * for now 1:1 correspondance. + * + * XXX: scan frag shader inputs to work out linear vs. perspective + * interpolation below. + * + * XXX: as long as we're hard-wiring, is eg. position required to + * be linear? + */ + key.linear_attrs = 0; + key.persp_attrs = (1 << key.nr_attrs) - 1; /* BRW_NEW_REDUCED_PRIMITIVE */ switch (brw->reduced_primitive) { - case GL_TRIANGLES: - /* NOTE: We just use the edgeflag attribute as an indicator that - * unfilled triangles are active. We don't actually do the - * edgeflag testing here, it is already done in the clip - * program. + case PIPE_PRIM_TRIANGLES: + /* PIPE_NEW_RAST */ - if (key.attrs & (1<curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL || + brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL) key.primitive = SF_UNFILLED_TRIS; else key.primitive = SF_TRIANGLES; break; - case GL_LINES: + case PIPE_PRIM_LINES: key.primitive = SF_LINES; break; - case GL_POINTS: + case PIPE_PRIM_POINTS: key.primitive = SF_POINTS; break; } - key.do_point_sprite = ctx->Point.PointSprite; - key.SpriteOrigin = ctx->Point.SpriteOrigin; - /* _NEW_LIGHT */ - key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); - key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + key.do_point_sprite = brw->curr.rast->templ.point_sprite; + key.sprite_origin_lower_left = 0; /* XXX: ctx->Point.SpriteOrigin - fix rast state */ + key.do_flat_shading = brw->curr.rast->templ.flatshade; + key.do_twoside_color = brw->curr.rast->templ.light_twoside; - /* _NEW_HINT */ - key.linear_color = 0; - - /* _NEW_POLYGON */ if (key.do_twoside_color) { - /* If we're rendering to a FBO, we have to invert the polygon - * face orientation, just as we invert the viewport in - * sf_unit_create_from_key(). ctx->DrawBuffer->Name will be - * nonzero if we're rendering to such an FBO. - */ - key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0); + key.frontface_ccw = (brw->curr.rast->templ.front_winding == + PIPE_WINDING_CCW); } brw->sws->bo_unreference(brw->sf.prog_bo); @@ -180,14 +168,16 @@ static void upload_sf_prog(struct brw_context *brw) &brw->sf.prog_data); if (brw->sf.prog_bo == NULL) compile_sf_prog( brw, &key ); + + return 0; } const struct brw_tracked_state brw_sf_prog = { .dirty = { - .mesa = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT), + .mesa = (PIPE_NEW_RAST | PIPE_NEW_VERTEX_SHADER), .brw = (BRW_NEW_REDUCED_PRIMITIVE), - .cache = CACHE_NEW_VS_PROG + .cache = 0 }, .prepare = upload_sf_prog }; diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h index c99116b8b1..0b7003dc5e 100644 --- a/src/gallium/drivers/i965/brw_sf.h +++ b/src/gallium/drivers/i965/brw_sf.h @@ -49,14 +49,21 @@ struct brw_sf_prog_key { */ GLuint persp_attrs:32; GLuint linear_attrs:32; + GLuint point_coord_replace_attrs:32; + GLuint nr_attrs:8; GLuint primitive:2; GLuint do_twoside_color:1; GLuint do_flat_shading:1; GLuint frontface_ccw:1; GLuint do_point_sprite:1; GLuint sprite_origin_lower_left:1; - GLuint pad:25; + GLuint pad:17; + + GLuint attr_col0:8; + GLuint attr_col1:8; + GLuint attr_bfc0:8; + GLuint attr_bfc1:8; }; struct brw_sf_point_tex { @@ -101,9 +108,7 @@ struct brw_sf_compile { GLuint nr_setup_attrs; GLuint nr_setup_regs; - GLubyte attr_to_idx[VERT_RESULT_MAX]; - GLubyte idx_to_attr[VERT_RESULT_MAX]; - struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; + GLuint point_coord_replace_mask; }; diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index 4acb2b7d72..db52c9553e 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -43,17 +43,12 @@ static struct brw_reg get_vert_attr(struct brw_sf_compile *c, struct brw_reg vert, GLuint attr) { - GLuint off = c->attr_to_idx[attr] / 2; - GLuint sub = c->attr_to_idx[attr] % 2; + GLuint off = attr / 2; + GLuint sub = attr % 2; return brw_vec4_grf(vert.nr + off, sub * 4); } -static GLboolean have_attr(struct brw_sf_compile *c, - GLuint attr) -{ - return (c->key.attrs & (1<func; - GLuint i; - for (i = 0; i < 2; i++) { - if (have_attr(c, VERT_RESULT_COL0+i) && - have_attr(c, VERT_RESULT_BFC0+i)) - brw_MOV(p, - get_vert_attr(c, vert, VERT_RESULT_COL0+i), - get_vert_attr(c, vert, VERT_RESULT_BFC0+i)); - } + if (c->key.attr_col0 && c->key.attr_bfc0) + brw_MOV(p, + get_vert_attr(c, vert, c->key.attr_col0), + get_vert_attr(c, vert, c->key.attr_bfc0)); + + if (c->key.attr_col1 && c->key.attr_bfc1) + brw_MOV(p, + get_vert_attr(c, vert, c->key.attr_col1), + get_vert_attr(c, vert, c->key.attr_bfc1)); } @@ -89,8 +85,8 @@ static void do_twoside_color( struct brw_sf_compile *c ) * for user-supplied vertex programs, as t_vp_build.c always does * the right thing. */ - if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) && - !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1))) + if (!(c->key.attr_col0 && c->key.attr_bfc0) && + !(c->key.attr_col1 && c->key.attr_bfc1)) return; /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order @@ -126,14 +122,17 @@ static void copy_colors( struct brw_sf_compile *c, struct brw_reg src) { struct brw_compile *p = &c->func; - GLuint i; - for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) { - if (have_attr(c,i)) - brw_MOV(p, - get_vert_attr(c, dst, i), - get_vert_attr(c, src, i)); - } + if (c->key.attr_col0) + brw_MOV(p, + get_vert_attr(c, dst, c->key.attr_col0), + get_vert_attr(c, src, c->key.attr_col0)); + + if (c->key.attr_col1) + brw_MOV(p, + get_vert_attr(c, dst, c->key.attr_col1), + get_vert_attr(c, src, c->key.attr_col1)); + } @@ -146,10 +145,16 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); - GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; + GLuint nr = 0; - if (!nr) + if (c->key.attr_col0) + nr++; + + if (c->key.attr_col1) + nr++; + + if (nr == 0) return; /* Already done in clip program: @@ -184,10 +189,16 @@ static void do_flatshade_line( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); - GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; + GLuint nr = 0; + + if (c->key.attr_col0) + nr++; + + if (c->key.attr_col1) + nr++; - if (!nr) + if (nr == 0) return; /* Already done in clip program: @@ -319,10 +330,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, *pc_linear = 0; *pc = 0xf; - if (persp_mask & (1 << c->idx_to_attr[reg*2])) + if (persp_mask & (1 << (reg*2))) *pc_persp = 0xf; - if (linear_mask & (1 << c->idx_to_attr[reg*2])) + if (linear_mask & (1 << (reg*2))) *pc_linear = 0xf; /* Maybe only processs one attribute on the final round: @@ -330,10 +341,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, if (reg*2+1 < c->nr_setup_attrs) { *pc |= 0xf0; - if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) + if (persp_mask & (1 << (reg*2+1))) *pc_persp |= 0xf0; - if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) + if (linear_mask & (1 << (reg*2+1))) *pc_linear |= 0xf0; } @@ -513,24 +524,28 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) alloc_regs(c); copy_z_inv_w(c); + for (i = 0; i < c->nr_setup_regs; i++) { - struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]]; + /* XXX: only seems to check point_coord_replace_attrs for every + * second attribute?!? + */ + boolean coord_replace = !!(c->key.point_coord_replace_attrs & (1<<(2*i))); struct brw_reg a0 = offset(c->vert[0], i); GLushort pc, pc_persp, pc_linear; GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); if (pc_persp) { - if (!tex->CoordReplace) { - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - } + if (coord_replace) { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } } - if (tex->CoordReplace) { - /* Caculate 1.0/PointWidth */ - brw_math(&c->func, + if (coord_replace) { + /* Caculate 1.0/PointWidth */ + brw_math(&c->func, c->tmp, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, @@ -539,33 +554,37 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); - if (c->key.SpriteOrigin == GL_LOWER_LEFT) { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } else { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } - } else { - brw_MOV(p, c->m1Cx, brw_imm_ud(0)); - brw_MOV(p, c->m2Cy, brw_imm_ud(0)); + if (c->key.sprite_origin_lower_left) { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } + else { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } + } + else { + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); } { brw_set_predicate_control_flag_value(p, pc); - if (tex->CoordReplace) { - if (c->key.sprite_origin_lower_left) { - brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); - brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); - } - else - brw_MOV(p, c->m3C0, brw_imm_f(0.0)); - } else { - brw_MOV(p, c->m3C0, a0); /* constant value */ + if (coord_replace) { + if (c->key.sprite_origin_lower_left) { + brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); + brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); + } + else { + brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + } + } + else { + brw_MOV(p, c->m3C0, a0); /* constant value */ } /* Copy m0..m3 to URB. diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index 648a16a038..fbc9f15eb4 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -29,58 +29,48 @@ * Keith Whitwell */ +#include "util/u_math.h" +#include "pipe/p_state.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_debug.h" +#include "brw_pipe_rast.h" -static void upload_sf_vp(struct brw_context *brw) +static int upload_sf_vp(struct brw_context *brw) { - const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + const struct pipe_viewport_state *vp = &brw->curr.vp; + const struct pipe_scissor_state *scissor = &brw->curr.scissor; struct brw_sf_viewport sfv; - GLfloat y_scale, y_bias; - const GLfloat *v = ctx->Viewport._WindowMap.m; memset(&sfv, 0, sizeof(sfv)); - y_scale = 1.0; - y_bias = 0; + /* PIPE_NEW_VIEWPORT, PIPE_NEW_SCISSOR */ - /* _NEW_VIEWPORT */ + sfv.viewport.m00 = vp->scale[0]; + sfv.viewport.m11 = vp->scale[1]; + sfv.viewport.m22 = vp->scale[2]; + sfv.viewport.m30 = vp->translate[0]; + sfv.viewport.m31 = vp->translate[1]; + sfv.viewport.m32 = vp->translate[2]; - sfv.viewport.m00 = v[MAT_SX]; - sfv.viewport.m11 = v[MAT_SY] * y_scale; - sfv.viewport.m22 = v[MAT_SZ] * depth_scale; - sfv.viewport.m30 = v[MAT_TX]; - sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; - sfv.viewport.m32 = v[MAT_TZ] * depth_scale; - - /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT - * for DrawBuffer->_[XY]{min,max} - */ - - /* The scissor only needs to handle the intersection of drawable and - * scissor rect. - * - * Note that the hardware's coordinates are inclusive, while Mesa's min is - * inclusive but max is exclusive. - */ - /* Y=0=bottom */ - sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; - sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; - sfv.scissor.ymin = ctx->DrawBuffer->_Ymin; - sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1; + sfv.scissor.xmin = scissor->minx; + sfv.scissor.xmax = scissor->maxx; /* -1 ?? */ + sfv.scissor.ymin = scissor->miny; + sfv.scissor.ymax = scissor->maxy; /* -1 ?? */ brw->sws->bo_unreference(brw->sf.vp_bo); brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); + + return 0; } const struct brw_tracked_state brw_sf_vp = { .dirty = { - .mesa = (_NEW_VIEWPORT | - _NEW_SCISSOR | - _NEW_BUFFERS), + .mesa = (PIPE_NEW_VIEWPORT | + PIPE_NEW_SCISSOR), .brw = 0, .cache = 0 }, @@ -90,15 +80,17 @@ const struct brw_tracked_state brw_sf_vp = { struct brw_sf_unit_key { unsigned int total_grf; unsigned int urb_entry_read_length; - unsigned int nr_urb_entries, urb_size, sfsize; - - GLenum front_face, cull_face, provoking_vertex; + unsigned scissor:1; unsigned line_smooth:1; unsigned point_sprite:1; unsigned point_attenuated:1; - unsigned render_to_fbo:1; + unsigned front_face:2; + unsigned cull_mode:2; + unsigned flatshade_first:1; + unsigned gl_rasterization_rules:1; + unsigned line_last_pixel_enable:1; float line_width; float point_size; }; @@ -106,6 +98,7 @@ struct brw_sf_unit_key { static void sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) { + const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ; memset(key, 0, sizeof(*key)); /* CACHE_NEW_SF_PROG */ @@ -117,25 +110,22 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) key->urb_size = brw->urb.vsize; key->sfsize = brw->urb.sfsize; - key->scissor = ctx->Scissor.Enabled; - key->front_face = ctx->Polygon.FrontFace; - - if (ctx->Polygon.CullFlag) - key->cull_face = ctx->Polygon.CullFaceMode; - else - key->cull_face = GL_NONE; - - key->line_width = ctx->Line.Width; - key->line_smooth = ctx->Line.SmoothFlag; - - key->point_sprite = ctx->Point.PointSprite; - key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); - key->point_attenuated = ctx->Point._Attenuated; - - /* _NEW_LIGHT */ - key->provoking_vertex = ctx->Light.ProvokingVertex; - - key->render_to_fbo = 1; + /* PIPE_NEW_RAST */ + key->scissor = rast->scissor; + key->front_face = rast->front_winding; + key->cull_mode = rast->cull_mode; + key->line_smooth = rast->line_smooth; + key->line_width = rast->line_width; + key->flatshade_first = rast->flatshade_first; + key->line_last_pixel_enable = rast->line_last_pixel; + key->gl_rasterization_rules = rast->gl_rasterization_rules; + + key->point_sprite = rast->point_sprite; + key->point_attenuated = rast->point_size_per_vertex; + + key->point_size = CLAMP(rast->point_size, + rast->point_size_min, + rast->point_size_max); } static struct brw_winsys_buffer * @@ -147,7 +137,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, int chipset_max_threads; memset(&sf, 0, sizeof(sf)); - sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */ sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -174,10 +164,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1; - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) sf.thread4.max_threads = 0; - if (INTEL_DEBUG & DEBUG_STATS) + if (BRW_DEBUG & DEBUG_STATS) sf.thread4.stats_enable = 1; /* CACHE_NEW_SF_VP */ @@ -185,31 +175,30 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.sf5.viewport_transform = 1; - /* _NEW_SCISSOR */ if (key->scissor) sf.sf6.scissor = 1; - /* _NEW_POLYGON */ - if (key->front_face == GL_CCW) + if (key->front_face == PIPE_WINDING_CCW) sf.sf5.front_winding = BRW_FRONTWINDING_CCW; else sf.sf5.front_winding = BRW_FRONTWINDING_CW; - switch (key->cull_face) { - case GL_FRONT: - sf.sf6.cull_mode = BRW_CULLMODE_FRONT; + switch (key->cull_mode) { + case PIPE_WINDING_CCW: + case PIPE_WINDING_CW: + sf.sf6.cull_mode = (key->front_face == key->cull_mode ? + BRW_CULLMODE_FRONT : + BRW_CULLMODE_BACK); break; - case GL_BACK: - sf.sf6.cull_mode = BRW_CULLMODE_BACK; - break; - case GL_FRONT_AND_BACK: + case PIPE_WINDING_BOTH: sf.sf6.cull_mode = BRW_CULLMODE_BOTH; break; - case GL_NONE: + case PIPE_WINDING_NONE: sf.sf6.cull_mode = BRW_CULLMODE_NONE; break; default: assert(0); + sf.sf6.cull_mode = BRW_CULLMODE_NONE; break; } @@ -223,9 +212,9 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, else if (sf.sf6.line_width <= 0x2) sf.sf6.line_width = 0; - /* _NEW_BUFFERS */ - key->render_to_fbo = 1; - if (!key->render_to_fbo) { + /* XXX: gl_rasterization_rules? something else? + */ + if (0) { /* Rendering to an OpenGL window */ sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; } @@ -261,7 +250,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: */ - if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) { + if (!key->flatshade_first) { sf.sf7.trifan_pv = 2; sf.sf7.linestrip_pv = 1; sf.sf7.tristrip_pv = 2; @@ -270,12 +259,19 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.sf7.linestrip_pv = 0; sf.sf7.tristrip_pv = 0; } - sf.sf7.line_last_pixel_enable = 0; + + sf.sf7.line_last_pixel_enable = key->line_last_pixel_enable; /* Set bias for OpenGL rasterization rules: */ - sf.sf6.dest_org_vbias = 0x8; - sf.sf6.dest_org_hbias = 0x8; + if (key->gl_rasterization_rules) { + sf.sf6.dest_org_vbias = 0x8; + sf.sf6.dest_org_hbias = 0x8; + } + else { + sf.sf6.dest_org_vbias = 0x0; + sf.sf6.dest_org_hbias = 0x0; + } bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT, key, sizeof(*key), @@ -287,23 +283,23 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. */ /* Emit SF program relocation */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - sf.thread0.grf_reg_count << 1, - offsetof(struct brw_sf_unit_state, thread0), - brw->sf.prog_bo); + brw->sws->bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + sf.thread0.grf_reg_count << 1, + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_bo); /* Emit SF viewport relocation */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), - offsetof(struct brw_sf_unit_state, sf5), - brw->sf.vp_bo); + brw->sws->bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), + offsetof(struct brw_sf_unit_state, sf5), + brw->sf.vp_bo); return bo; } -static void upload_sf_unit( struct brw_context *brw ) +static int upload_sf_unit( struct brw_context *brw ) { struct brw_sf_unit_key key; struct brw_winsys_buffer *reloc_bufs[2]; @@ -321,16 +317,12 @@ static void upload_sf_unit( struct brw_context *brw ) if (brw->sf.state_bo == NULL) { brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs); } + return 0; } const struct brw_tracked_state brw_sf_unit = { .dirty = { - .mesa = (_NEW_POLYGON | - _NEW_LIGHT | - _NEW_LINE | - _NEW_POINT | - _NEW_SCISSOR | - _NEW_BUFFERS), + .mesa = (PIPE_NEW_RAST), .brw = BRW_NEW_URB_FENCE, .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index 663fc839df..2275e9ad69 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -168,9 +168,20 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, void brw_destroy_batch_cache( struct brw_context *brw ); void brw_clear_batch_cache( struct brw_context *brw ); -/* brw_wm_surface_state.c */ +/*********************************************************************** + * brw_wm_surface_state.c + */ struct brw_winsys_buffer * brw_create_constant_surface( struct brw_context *brw, struct brw_surface_key *key ); +/*********************************************************************** + * brw_state_debug.c + */ +void brw_update_dirty_counts( unsigned mesa, + unsigned brw, + unsigned cache ); + + + #endif diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c index 324fce5163..7d212e5c24 100644 --- a/src/gallium/drivers/i965/brw_state_batch.c +++ b/src/gallium/drivers/i965/brw_state_batch.c @@ -46,7 +46,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, struct brw_cached_batch_item *item = brw->cached_batch_items; struct header *newheader = (struct header *)data; - if (brw->emit_state_always) { + if (brw->flags.always_emit_state) { brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } @@ -56,8 +56,8 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) return GL_FALSE; if (item->sz != sz) { - _mesa_free(item->header); - item->header = _mesa_malloc(sz); + FREE(item->header); + item->header = MALLOC(sz); item->sz = sz; } goto emit; @@ -67,7 +67,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, assert(!item); item = CALLOC_STRUCT(brw_cached_batch_item); - item->header = _mesa_malloc(sz); + item->header = MALLOC(sz); item->sz = sz; item->next = brw->cached_batch_items; brw->cached_batch_items = item; diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c index 97f88b3ab3..4310d01ba2 100644 --- a/src/gallium/drivers/i965/brw_state_cache.c +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -55,7 +55,9 @@ * only one of the two buffers referenced gets put into the offset, and the * incorrect program is run for the other instance. */ +#include "util/u_memory.h" +#include "brw_debug.h" #include "brw_state.h" #include "brw_batchbuffer.h" @@ -107,9 +109,9 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, if (bo == cache->last_bo[cache_id]) return; /* no change */ - brw->sws->bo_unreference(cache->last_bo[cache_id]); + cache->sws->bo_unreference(cache->last_bo[cache_id]); cache->last_bo[cache_id] = bo; - brw->sws->bo_reference(cache->last_bo[cache_id]); + cache->sws->bo_reference(cache->last_bo[cache_id]); cache->brw->state.dirty.cache |= 1 << cache_id; } @@ -127,7 +129,7 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, for (c = cache->items[hash % cache->size]; c; c = c->next) bucketcount++; - fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size, + debug_printf("bucket %d/%d = %d/%d items\n", hash % cache->size, cache->size, bucketcount, cache->n_items); #endif @@ -154,7 +156,7 @@ rehash(struct brw_cache *cache) GLuint size, i; size = cache->size * 3; - items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items)); + items = (struct brw_cache_item**) CALLOC(size, sizeof(*items)); for (i = 0; i < cache->size; i++) for (c = cache->items[i]; c; c = next) { @@ -194,7 +196,7 @@ brw_search_cache(struct brw_cache *cache, update_cache_last(cache, cache_id, item->bo); - brw->sws->bo_reference(item->bo); + cache->sws->bo_reference(item->bo); return item->bo; } @@ -219,20 +221,25 @@ brw_upload_cache( struct brw_cache *cache, struct brw_winsys_buffer *bo; int i; - /* Create the buffer object to contain the data */ - bo = brw->sws->bo_alloc(cache->sws, - cache->buffer_type[cache_id], data_size, 1 << 6); + /* Create the buffer object to contain the data. For now, use a + * single buffer type to describe all cached state atoms. Later, + * may want to take advantage of hardware distinctions between + * these various entities. + */ + bo = cache->sws->bo_alloc(cache->sws, + BRW_BUFFER_TYPE_STATE_CACHE, + data_size, 1 << 6); /* Set up the memory containing the key, aux_data, and reloc_bufs */ - tmp = _mesa_malloc(key_size + aux_size + relocs_size); + tmp = MALLOC(key_size + aux_size + relocs_size); memcpy(tmp, key, key_size); memcpy(tmp + key_size, aux, cache->aux_size[cache_id]); memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); for (i = 0; i < nr_reloc_bufs; i++) { if (reloc_bufs[i] != NULL) - brw->sws->bo_reference(reloc_bufs[i]); + cache->sws->bo_reference(reloc_bufs[i]); } item->cache_id = cache_id; @@ -243,7 +250,7 @@ brw_upload_cache( struct brw_cache *cache, item->nr_reloc_bufs = nr_reloc_bufs; item->bo = bo; - brw->sws->bo_reference(bo); + cache->sws->bo_reference(bo); item->data_size = data_size; if (cache->n_items > cache->size * 1.5) @@ -259,13 +266,13 @@ brw_upload_cache( struct brw_cache *cache, *(void **)aux_return = (void *)((char *)item->key + item->key_size); } - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("upload %s: %d bytes to cache id %d\n", + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("upload %s: %d bytes to cache id %d\n", cache->name[cache_id], data_size, cache_id); /* Copy data to the buffer */ - dri_bo_subdata(bo, 0, data_size, data); + cache->sws->bo_subdata(bo, 0, data_size, data); update_cache_last(cache, cache_id, bo); @@ -292,7 +299,7 @@ brw_cache_data_sz(struct brw_cache *cache, reloc_bufs, nr_reloc_bufs); if (item) { update_cache_last(cache, cache_id, item->bo); - brw->sws->bo_reference(item->bo); + cache->sws->bo_reference(item->bo); return item->bo; } @@ -349,11 +356,12 @@ brw_init_non_surface_cache(struct brw_context *brw) struct brw_cache *cache = &brw->cache; cache->brw = brw; + cache->sws = brw->sws; cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + CALLOC(cache->size, sizeof(struct brw_cache_item)); brw_init_cache_id(cache, "CC_VP", @@ -457,7 +465,7 @@ brw_init_surface_cache(struct brw_context *brw) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + CALLOC(cache->size, sizeof(struct brw_cache_item)); brw_init_cache_id(cache, "SS_SURFACE", @@ -487,8 +495,8 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) struct brw_cache_item *c, *next; GLuint i; - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { @@ -507,7 +515,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) cache->n_items = 0; if (brw->curbe.last_buf) { - _mesa_free(brw->curbe.last_buf); + FREE(brw->curbe.last_buf); brw->curbe.last_buf = NULL; } @@ -527,8 +535,8 @@ brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo) struct brw_cache_item **prev; GLuint i; - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); for (i = 0; i < cache->size; i++) { for (prev = &cache->items[i]; *prev;) { @@ -540,8 +548,8 @@ brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo) *prev = c->next; for (j = 0; j < c->nr_reloc_bufs; j++) - brw->sws->bo_unreference(c->reloc_bufs[j]); - brw->sws->bo_unreference(c->bo); + cache->sws->bo_unreference(c->reloc_bufs[j]); + cache->sws->bo_unreference(c->bo); free((void *)c->key); free(c); cache->n_items--; @@ -555,8 +563,8 @@ brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo) void brw_state_cache_check_size(struct brw_context *brw) { - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); /* un-tuned guess. We've got around 20 state objects for a total of around * 32k, so 1000 of them is around 1.5MB. @@ -574,8 +582,8 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { GLuint i; - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c index 22cea4b7d8..cc4744dc16 100644 --- a/src/gallium/drivers/i965/brw_state_debug.c +++ b/src/gallium/drivers/i965/brw_state_debug.c @@ -109,8 +109,25 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) if (bit_map[i].bit == 0) return; - fprintf(stderr, "0x%08x: %12d (%s)\n", + debug_printf("0x%08x: %12d (%s)\n", bit_map[i].bit, bit_map[i].count, bit_map[i].name); } } +void +brw_update_dirty_counts( unsigned mesa, + unsigned brw, + unsigned cache ) +{ + static int dirty_count = 0; + + brw_update_dirty_count(mesa_bits, mesa); + brw_update_dirty_count(brw_bits, brw); + brw_update_dirty_count(cache_bits, cache); + if (dirty_count++ % 1000 == 0) { + brw_print_dirty_count(mesa_bits, mesa); + brw_print_dirty_count(brw_bits, brw); + brw_print_dirty_count(cache_bits, cache); + debug_printf("\n"); + } +} diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c index 1bc83fb9c1..72604304d4 100644 --- a/src/gallium/drivers/i965/brw_state_dump.c +++ b/src/gallium/drivers/i965/brw_state_dump.c @@ -28,6 +28,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_winsys.h" /** * Prints out a header, the contents, and the message associated with @@ -44,28 +45,32 @@ state_out(const char *name, void *data, uint32_t hw_offset, int index, { va_list va; - fprintf(stderr, "%8s: 0x%08x: 0x%08x: ", - name, hw_offset + index * 4, ((uint32_t *)data)[index]); + debug_printf("%8s: 0x%08x: 0x%08x: ", + name, hw_offset + index * 4, ((uint32_t *)data)[index]); va_start(va, fmt); - vfprintf(stderr, fmt, va); + debug_vprintf(fmt, va); va_end(va); } /** Generic, undecoded state buffer debug printout */ static void -state_struct_out(const char *name, struct brw_winsys_buffer *buffer, unsigned int state_size) +state_struct_out(struct brw_winsys_screen *sws, + const char *name, + struct brw_winsys_buffer *buffer, + unsigned int state_size) { int i; + void *data; if (buffer == NULL) return; - dri_bo_map(buffer, GL_FALSE); + data = sws->bo_map(buffer, GL_FALSE); for (i = 0; i < state_size / 4; i++) { - state_out(name, buffer->virtual, buffer->offset, i, + state_out(name, data, buffer->offset, i, "dword %d\n", i); } - dri_bo_unmap(buffer); + sws->bo_unmap(buffer); } static const char * @@ -106,12 +111,11 @@ static void dump_wm_surface_state(struct brw_context *brw) char name[20]; if (surf_bo == NULL) { - fprintf(stderr, " WM SS%d: NULL\n", i); + debug_printf(" WM SS%d: NULL\n", i); continue; } - dri_bo_map(surf_bo, GL_FALSE); + surf = (struct brw_surface_state *)brw->sws->bo_map(surf_bo, GL_FALSE); surfoff = surf_bo->offset; - surf = (struct brw_surface_state *)(surf_bo->virtual); sprintf(name, "WM SS%d", i); state_out(name, surf, surfoff, 0, "%s %s\n", @@ -127,7 +131,7 @@ static void dump_wm_surface_state(struct brw_context *brw) state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", surf->ss5.x_offset, surf->ss5.y_offset); - dri_bo_unmap(surf_bo); + brw->sws->bo_unmap(surf_bo); } } @@ -140,9 +144,7 @@ static void dump_sf_viewport_state(struct brw_context *brw) if (brw->sf.vp_bo == NULL) return; - dri_bo_map(brw->sf.vp_bo, GL_FALSE); - - vp = brw->sf.vp_bo->virtual; + vp = (struct brw_sf_viewport *)brw->sws->bo_map(brw->sf.vp_bo, GL_FALSE); vp_off = brw->sf.vp_bo->offset; state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); @@ -157,10 +159,12 @@ static void dump_sf_viewport_state(struct brw_context *brw) state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", vp->scissor.xmax, vp->scissor.ymax); - dri_bo_unmap(brw->sf.vp_bo); + brw->sws->bo_unmap(brw->sf.vp_bo); } -static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog) +static void brw_debug_prog(struct brw_winsys_screen *sws, + const char *name, + struct brw_winsys_buffer *prog) { unsigned int i; uint32_t *data; @@ -168,12 +172,10 @@ static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog) if (prog == NULL) return; - dri_bo_map(prog, GL_FALSE); - - data = prog->virtual; + data = (uint32_t *)sws->bo_map(prog, GL_FALSE); for (i = 0; i < prog->size / 4 / 4; i++) { - fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", + debug_printf("%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", name, (unsigned int)prog->offset + i * 4 * 4, data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); /* Stop at the end of the program. It'd be nice to keep track of the actual @@ -186,7 +188,7 @@ static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog) break; } - dri_bo_unmap(prog); + sws->bo_unmap(prog); } @@ -202,19 +204,21 @@ static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog) */ void brw_debug_batch(struct brw_context *brw) { - state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces); + struct brw_winsys_screen *sws = brw->sws; + + state_struct_out(sws, "WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces); dump_wm_surface_state(brw); - state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state)); - brw_debug_prog("VS prog", brw->vs.prog_bo); + state_struct_out(sws, "VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state)); + brw_debug_prog(sws, "VS prog", brw->vs.prog_bo); - state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state)); - brw_debug_prog("GS prog", brw->gs.prog_bo); + state_struct_out(sws, "GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state)); + brw_debug_prog(sws, "GS prog", brw->gs.prog_bo); - state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state)); + state_struct_out(sws, "SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state)); dump_sf_viewport_state(brw); - brw_debug_prog("SF prog", brw->sf.prog_bo); + brw_debug_prog(sws, "SF prog", brw->sf.prog_bo); - state_struct_out("WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state)); - brw_debug_prog("WM prog", brw->wm.prog_bo); + state_struct_out(sws, "WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state)); + brw_debug_prog(sws, "WM prog", brw->wm.prog_bo); } diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index 8659e35289..eff3a40a46 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -34,6 +34,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_batchbuffer.h" +#include "brw_debug.h" /* This is used to initialize brw->state.atoms[]. We could use this * list directly except for a single atom, brw_constant_buffer, which @@ -83,12 +84,8 @@ const struct brw_tracked_state *atoms[] = &brw_blend_constant_color, &brw_depthbuffer, - &brw_polygon_stipple, - &brw_polygon_stipple_offset, - &brw_line_stipple, - &brw_aa_line_parameters, &brw_psp_urb_cbs, @@ -163,11 +160,12 @@ enum pipe_error brw_validate_state( struct brw_context *brw ) { struct brw_state_flags *state = &brw->state.dirty; GLuint i; + int ret; brw_clear_validated_bos(brw); - brw_add_validated_bo(brw, intel->batch->buf); + brw_add_validated_bo(brw, brw->batch->buf); - if (brw->emit_state_always) { + if (brw->flags.always_emit_state) { state->mesa |= ~0; state->brw |= ~0; state->cache |= ~0; @@ -199,10 +197,10 @@ enum pipe_error brw_validate_state( struct brw_context *brw ) * If this fails, we can experience GPU lock-ups. */ { - const struct brw_fragment_program *fp = brw->fragment_program; + const struct brw_fragment_shader *fp = brw->curr.fragment_shader; if (fp) { - assert(fp->info.max_sampler <= brw->nr_samplers && - fp->info.max_texture <= brw->nr_textures); + assert(fp->info.file_max[TGSI_FILE_SAMPLER] < brw->curr.num_samplers && + fp->info.texture_max < brw->curr.num_textures); } } @@ -213,18 +211,18 @@ enum pipe_error brw_validate_state( struct brw_context *brw ) enum pipe_error brw_upload_state(struct brw_context *brw) { struct brw_state_flags *state = &brw->state.dirty; + int ret; int i; - static int dirty_count = 0; brw_clear_validated_bos(brw); - if (INTEL_DEBUG) { + if (BRW_DEBUG) { /* Debug version which enforces various sanity checks on the * state flags which are generated and checked to help ensure * state atoms are ordered correctly in the list. */ struct brw_state_flags examined, prev; - _mesa_memset(&examined, 0, sizeof(examined)); + memset(&examined, 0, sizeof(examined)); prev = *state; for (i = 0; i < Elements(atoms); i++) { @@ -268,19 +266,14 @@ enum pipe_error brw_upload_state(struct brw_context *brw) } } - if (INTEL_DEBUG & DEBUG_STATE) { - brw_update_dirty_count(mesa_bits, state->mesa); - brw_update_dirty_count(brw_bits, state->brw); - brw_update_dirty_count(cache_bits, state->cache); - if (dirty_count++ % 1000 == 0) { - brw_print_dirty_count(mesa_bits, state->mesa); - brw_print_dirty_count(brw_bits, state->brw); - brw_print_dirty_count(cache_bits, state->cache); - debug_printf("\n"); - } + if (BRW_DEBUG & DEBUG_STATE) { + brw_update_dirty_counts( state->mesa, + state->brw, + state->cache ); } /* Clear dirty flags: */ memset(state, 0, sizeof(*state)); + return 0; } diff --git a/src/gallium/drivers/i965/brw_tex.c b/src/gallium/drivers/i965/brw_tex.c deleted file mode 100644 index 6f7adb6393..0000000000 --- a/src/gallium/drivers/i965/brw_tex.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_context.h" - -/** - * Finalizes all textures, completing any rendering that needs to be done - * to prepare them. - */ -void brw_validate_textures( struct brw_context *brw ) -{ - int i; - - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; - - if (texUnit->_ReallyEnabled) { - intel_finalize_mipmap_tree(intel, i); - } - } -} diff --git a/src/gallium/drivers/i965/brw_tex_layout.c b/src/gallium/drivers/i965/brw_tex_layout.c deleted file mode 100644 index 50c30878c6..0000000000 --- a/src/gallium/drivers/i965/brw_tex_layout.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -/* Code to layout images in a mipmap tree for i965. - */ - -#include "brw_tex_layout.h" - -#define FILE_DEBUG_FLAG DEBUG_MIPTREE - -GLboolean brw_miptree_layout(struct brw_context *brw, - struct intel_mipmap_tree *mt, - uint32_t tiling) -{ - /* XXX: these vary depending on image format: */ - /* GLint align_w = 4; */ - - switch (mt->target) { - case GL_TEXTURE_CUBE_MAP: - if (IS_IGDNG(brw->brw_screen->pci_id)) { - GLuint align_h = 2, align_w = 4; - GLuint level; - GLuint x = 0; - GLuint y = 0; - GLuint width = mt->width0; - GLuint height = mt->height0; - GLuint qpitch = 0; - GLuint y_pitch = 0; - - mt->pitch = mt->width0; - intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); - y_pitch = ALIGN(height, align_h); - - if (mt->compressed) { - mt->pitch = ALIGN(mt->width0, align_w); - } - - if (mt->last_level != 0) { - GLuint mip1_width; - - if (mt->compressed) { - mip1_width = ALIGN(minify(mt->width0), align_w) - + ALIGN(minify(minify(mt->width0)), align_w); - } else { - mip1_width = ALIGN(minify(mt->width0), align_w) - + minify(minify(mt->width0)); - } - - if (mip1_width > mt->pitch) { - mt->pitch = mip1_width; - } - } - - mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); - - if (mt->compressed) { - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; - mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; - } else { - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; - mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; - } - - for (level = 0; level <= mt->last_level; level++) { - GLuint img_height; - GLuint nr_images = 6; - GLuint q = 0; - - intel_miptree_set_level_info(mt, level, nr_images, x, y, width, - height, 1); - - for (q = 0; q < nr_images; q++) - intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch); - - if (mt->compressed) - img_height = MAX2(1, height/4); - else - img_height = ALIGN(height, align_h); - - if (level == 1) { - x += ALIGN(width, align_w); - } - else { - y += img_height; - } - - width = minify(width); - height = minify(height); - } - - break; - } - - case GL_TEXTURE_3D: { - GLuint width = mt->width0; - GLuint height = mt->height0; - GLuint depth = mt->depth0; - GLuint pack_x_pitch, pack_x_nr; - GLuint pack_y_pitch; - GLuint level; - GLuint align_h = 2; - GLuint align_w = 4; - - mt->total_height = 0; - intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); - - if (mt->compressed) { - mt->pitch = ALIGN(width, align_w); - pack_y_pitch = (height + 3) / 4; - } else { - mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); - pack_y_pitch = ALIGN(mt->height0, align_h); - } - - pack_x_pitch = width; - pack_x_nr = 1; - - for (level = 0 ; level <= mt->last_level ; level++) { - GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; - GLint x = 0; - GLint y = 0; - GLint q, j; - - intel_miptree_set_level_info(mt, level, nr_images, - 0, mt->total_height, - width, height, depth); - - for (q = 0; q < nr_images;) { - for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { - intel_miptree_set_image_offset(mt, level, q, x, y); - x += pack_x_pitch; - } - - x = 0; - y += pack_y_pitch; - } - - - mt->total_height += y; - width = minify(width); - height = minify(height); - depth = minify(depth); - - if (mt->compressed) { - pack_y_pitch = (height + 3) / 4; - - if (pack_x_pitch > ALIGN(width, align_w)) { - pack_x_pitch = ALIGN(width, align_w); - pack_x_nr <<= 1; - } - } else { - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->pitch); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - pack_y_pitch = ALIGN(pack_y_pitch, align_h); - } - } - - } - /* The 965's sampler lays cachelines out according to how accesses - * in the texture surfaces run, so they may be "vertical" through - * memory. As a result, the docs say in Surface Padding Requirements: - * Sampling Engine Surfaces that two extra rows of padding are required. - * We don't know of similar requirements for pre-965, but given that - * those docs are silent on padding requirements in general, let's play - * it safe. - */ - if (mt->target == GL_TEXTURE_CUBE_MAP) - mt->total_height += 2; - break; - } - - default: - i945_miptree_layout_2d(intel, mt, tiling); - break; - } - DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, - mt->pitch, - mt->total_height, - mt->cpp, - mt->pitch * mt->total_height * mt->cpp ); - - return GL_TRUE; -} - diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c index a2277519ad..ff2466528d 100644 --- a/src/gallium/drivers/i965/brw_urb.c +++ b/src/gallium/drivers/i965/brw_urb.c @@ -184,17 +184,17 @@ static void recalculate_urb_fence( struct brw_context *brw ) * entries and the values for minimum nr of entries * provided above. */ - _mesa_printf("couldn't calculate URB layout!\n"); + debug_printf("couldn't calculate URB layout!\n"); exit(1); } - if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) - _mesa_printf("URB CONSTRAINED\n"); + if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) + debug_printf("URB CONSTRAINED\n"); } done: - if (INTEL_DEBUG & DEBUG_URB) - _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", + if (BRW_DEBUG & DEBUG_URB) + debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", brw->urb.vs_start, brw->urb.gs_start, brw->urb.clip_start, diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h index 54f7d7d7c4..e33fa2f0aa 100644 --- a/src/gallium/drivers/i965/brw_vs.h +++ b/src/gallium/drivers/i965/brw_vs.h @@ -64,7 +64,7 @@ struct brw_vs_compile { struct brw_reg r0; struct brw_reg r1; - struct brw_reg regs[PROGRAM_ADDRESS+1][128]; + struct brw_reg regs[TGSI_FILE_COUNT][128]; struct brw_reg tmp; struct brw_reg stack; diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 086f54799e..04132a167b 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -242,10 +242,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->prog_data.total_grf = reg; - if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); - _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); - _mesa_printf("%s reg = %d\n", __FUNCTION__, reg); + if (BRW_DEBUG & DEBUG_VS) { + debug_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); + debug_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); + debug_printf("%s reg = %d\n", __FUNCTION__, reg); } } @@ -1248,10 +1248,10 @@ void brw_vs_emit(struct brw_vs_compile *c ) GLuint index; GLuint file; - if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("vs-mesa:\n"); + if (BRW_DEBUG & DEBUG_VS) { + debug_printf("vs-mesa:\n"); _mesa_print_program(&c->vp->program.Base); - _mesa_printf("\n"); + debug_printf("\n"); } brw_set_compression_control(p, BRW_COMPRESSION_NONE); @@ -1526,12 +1526,12 @@ void brw_vs_emit(struct brw_vs_compile *c ) post_vs_emit(c, end_inst, last_inst); - if (INTEL_DEBUG & DEBUG_VS) { + if (BRW_DEBUG & DEBUG_VS) { int i; - _mesa_printf("vs-native:\n"); + debug_printf("vs-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + debug_printf("\n"); } } diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c index 1717223e49..05a91f2de4 100644 --- a/src/gallium/drivers/i965/brw_vs_state.c +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -122,7 +122,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2, 1, chipset_max_threads) - 1; - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) vs.thread4.max_threads = 0; /* No samplers for ARB_vp programs: @@ -131,7 +131,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) */ vs.vs5.sampler_count = 0; - if (INTEL_DEBUG & DEBUG_STATS) + if (BRW_DEBUG & DEBUG_STATS) vs.thread4.stats_enable = 1; /* Vertex program always enabled: diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index 51e23b9640..33032276bc 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -69,6 +69,7 @@ enum brw_buffer_type BRW_BUFFER_TYPE_SHADER_CONSTANTS, BRW_BUFFER_TYPE_WM_SCRATCH, BRW_BUFFER_TYPE_BATCH, + BRW_BUFFER_TYPE_STATE_CACHE, }; @@ -156,11 +157,15 @@ struct brw_winsys_screen { unsigned offset, struct brw_winsys_buffer *b2); - void (*bo_subdata)(struct brw_winsys_buffer *dst, + void (*bo_subdata)(struct brw_winsys_buffer *buffer, size_t offset, size_t size, const void *data); + boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer); + boolean (*bo_references)(struct brw_winsys_buffer *a, + struct brw_winsys_buffer *b); + /* XXX: couldn't this be handled by returning true/false on * bo_emit_reloc? */ @@ -171,18 +176,13 @@ struct brw_winsys_screen { /** * Map a buffer. */ - void *(*buffer_map)(struct brw_winsys *iws, - struct brw_winsys_buffer *buffer, - boolean write); + void *(*bo_map)(struct brw_winsys_buffer *buffer, + boolean write); /** * Unmap a buffer. */ - void (*buffer_unmap)(struct brw_winsys *iws, - struct brw_winsys_buffer *buffer); - - void (*buffer_destroy)(struct brw_winsys *iws, - struct brw_winsys_buffer *buffer); + void (*bo_unmap)(struct brw_winsys_buffer *buffer); /*@}*/ diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 764708f7df..3d889699f8 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -178,8 +178,8 @@ static void do_wm_prog( struct brw_context *brw, brw_wm_non_glsl_emit(brw, c); } - if (INTEL_DEBUG & DEBUG_WM) - fprintf(stderr, "\n"); + if (BRW_DEBUG & DEBUG_WM) + debug_printf("\n"); /* get the program */ diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index bf241f5fa4..5bc2a49c1f 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -33,6 +33,7 @@ #ifndef BRW_WM_H #define BRW_WM_H +#include "tgsi/tgsi_ureg.h" #include "brw_context.h" #include "brw_eu.h" @@ -57,17 +58,18 @@ #define AA_ALWAYS 2 struct brw_wm_prog_key { + unsigned proj_attrib_mask; /**< one bit per fragment program attribute */ + unsigned linear_attrib_mask:1; /**< linear interpolation vs perspective interp */ + GLuint source_depth_reg:3; GLuint aa_dest_stencil_reg:3; GLuint dest_depth_reg:3; GLuint nr_depth_regs:3; - GLuint computes_depth:1; /* could be derived from program string */ + GLuint computes_depth:1; GLuint source_depth_to_render_target:1; GLuint flat_shade:1; - GLuint linear_color:1; /**< linear interpolation vs perspective interp */ GLuint runtime_check_aads_emit:1; - - GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ + GLuint shadowtex_mask:16; GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ @@ -75,7 +77,7 @@ struct brw_wm_prog_key { GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; GLuint program_string_id:32; - GLuint drawable_height; + GLuint vp_nr_outputs_written; }; @@ -151,7 +153,7 @@ struct brw_wm_instruction { }; -#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) +#define BRW_WM_MAX_INSN 2048 #define BRW_WM_MAX_GRF 128 /* hardware limit */ #define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) #define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) @@ -161,11 +163,19 @@ struct brw_wm_instruction { #define BRW_WM_MAX_SUBROUTINE 16 +struct ureg_instruction { + unsigned opcode:8; + unsigned tex_target:3; + struct ureg_dst dst; + struct ureg_src src[3]; +}; + /* New opcodes to track internal operations required for WM unit. * These are added early so that the registers used can be tracked, * freed and reused like those of other instructions. */ +#define MAX_OPCODE TGSI_OPCODE_LAST #define WM_PIXELXY (MAX_OPCODE) #define WM_DELTAXY (MAX_OPCODE + 1) #define WM_PIXELW (MAX_OPCODE + 2) @@ -177,7 +187,7 @@ struct brw_wm_instruction { #define WM_FRONTFACING (MAX_OPCODE + 8) #define MAX_WM_OPCODE (MAX_OPCODE + 9) -#define PROGRAM_PAYLOAD (PROGRAM_FILE_MAX) +#define PROGRAM_PAYLOAD (TGSI_FILE_COUNT) #define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) struct brw_wm_compile { @@ -198,15 +208,15 @@ struct brw_wm_compile { * simplifying and adding instructions for interpolation and * framebuffer writes. */ - struct prog_instruction prog_instructions[BRW_WM_MAX_INSN]; + struct ureg_instruction prog_instructions[BRW_WM_MAX_INSN]; GLuint nr_fp_insns; GLuint fp_temp; GLuint fp_interp_emitted; GLuint fp_fragcolor_emitted; - struct prog_src_register pixel_xy; - struct prog_src_register delta_xy; - struct prog_src_register pixel_w; + struct ureg_src pixel_xy; + struct ureg_src delta_xy; + struct ureg_src pixel_w; struct brw_wm_value vreg[BRW_WM_MAX_VREG]; @@ -217,7 +227,7 @@ struct brw_wm_compile { struct { struct brw_wm_value depth[4]; /* includes r0/r1 */ - struct brw_wm_value input_interp[FRAG_ATTRIB_MAX]; + struct brw_wm_value input_interp[PIPE_MAX_SHADER_INPUTS]; } payload; @@ -295,7 +305,7 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean ps_uses_depth, struct brw_wm_prog_key *key ); -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); +//GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); void emit_ddxy(struct brw_compile *p, diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c index c6659646f2..04dec5ba39 100644 --- a/src/gallium/drivers/i965/brw_wm_debug.c +++ b/src/gallium/drivers/i965/brw_wm_debug.c @@ -41,21 +41,21 @@ void brw_wm_print_value( struct brw_wm_compile *c, if (c->state >= PASS2_DONE) brw_print_reg(value->hw_reg); else if( value == &c->undef_value ) - _mesa_printf("undef"); + debug_printf("undef"); else if( value - c->vreg >= 0 && value - c->vreg < BRW_WM_MAX_VREG) - _mesa_printf("r%d", value - c->vreg); + debug_printf("r%d", value - c->vreg); else if (value - c->creg >= 0 && value - c->creg < BRW_WM_MAX_PARAM) - _mesa_printf("c%d", value - c->creg); + debug_printf("c%d", value - c->creg); else if (value - c->payload.input_interp >= 0 && value - c->payload.input_interp < FRAG_ATTRIB_MAX) - _mesa_printf("i%d", value - c->payload.input_interp); + debug_printf("i%d", value - c->payload.input_interp); else if (value - c->payload.depth >= 0 && value - c->payload.depth < FRAG_ATTRIB_MAX) - _mesa_printf("d%d", value - c->payload.depth); + debug_printf("d%d", value - c->payload.depth); else - _mesa_printf("?"); + debug_printf("?"); } void brw_wm_print_ref( struct brw_wm_compile *c, @@ -64,16 +64,16 @@ void brw_wm_print_ref( struct brw_wm_compile *c, struct brw_reg hw_reg = ref->hw_reg; if (ref->unspill_reg) - _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot); + debug_printf("UNSPILL(%x)/", ref->value->spill_slot); if (c->state >= PASS2_DONE) brw_print_reg(ref->hw_reg); else { - _mesa_printf("%s", hw_reg.negate ? "-" : ""); - _mesa_printf("%s", hw_reg.abs ? "abs/" : ""); + debug_printf("%s", hw_reg.negate ? "-" : ""); + debug_printf("%s", hw_reg.abs ? "abs/" : ""); brw_wm_print_value(c, ref->value); if ((hw_reg.nr&1) || hw_reg.subnr) { - _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); + debug_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); } } } @@ -84,22 +84,22 @@ void brw_wm_print_insn( struct brw_wm_compile *c, GLuint i, arg; GLuint nr_args = brw_wm_nr_args(inst->opcode); - _mesa_printf("["); + debug_printf("["); for (i = 0; i < 4; i++) { if (inst->dst[i]) { brw_wm_print_value(c, inst->dst[i]); if (inst->dst[i]->spill_slot) - _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot); + debug_printf("/SPILL(%x)",inst->dst[i]->spill_slot); } else - _mesa_printf("#"); + debug_printf("#"); if (i < 3) - _mesa_printf(","); + debug_printf(","); } - _mesa_printf("]"); + debug_printf("]"); if (inst->writemask != BRW_WRITEMASK_XYZW) - _mesa_printf(".%s%s%s%s", + debug_printf(".%s%s%s%s", GET_BIT(inst->writemask, 0) ? "x" : "", GET_BIT(inst->writemask, 1) ? "y" : "", GET_BIT(inst->writemask, 2) ? "z" : "", @@ -107,58 +107,58 @@ void brw_wm_print_insn( struct brw_wm_compile *c, switch (inst->opcode) { case WM_PIXELXY: - _mesa_printf(" = PIXELXY"); + debug_printf(" = PIXELXY"); break; case WM_DELTAXY: - _mesa_printf(" = DELTAXY"); + debug_printf(" = DELTAXY"); break; case WM_PIXELW: - _mesa_printf(" = PIXELW"); + debug_printf(" = PIXELW"); break; case WM_WPOSXY: - _mesa_printf(" = WPOSXY"); + debug_printf(" = WPOSXY"); break; case WM_PINTERP: - _mesa_printf(" = PINTERP"); + debug_printf(" = PINTERP"); break; case WM_LINTERP: - _mesa_printf(" = LINTERP"); + debug_printf(" = LINTERP"); break; case WM_CINTERP: - _mesa_printf(" = CINTERP"); + debug_printf(" = CINTERP"); break; case WM_FB_WRITE: - _mesa_printf(" = FB_WRITE"); + debug_printf(" = FB_WRITE"); break; case WM_FRONTFACING: - _mesa_printf(" = FRONTFACING"); + debug_printf(" = FRONTFACING"); break; default: - _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode)); + debug_printf(" = %s", _mesa_opcode_string(inst->opcode)); break; } if (inst->saturate) - _mesa_printf("_SAT"); + debug_printf("_SAT"); for (arg = 0; arg < nr_args; arg++) { - _mesa_printf(" ["); + debug_printf(" ["); for (i = 0; i < 4; i++) { if (inst->src[arg][i]) { brw_wm_print_ref(c, inst->src[arg][i]); } else - _mesa_printf("%%"); + debug_printf("%%"); if (i < 3) - _mesa_printf(","); + debug_printf(","); else - _mesa_printf("]"); + debug_printf("]"); } } - _mesa_printf("\n"); + debug_printf("\n"); } void brw_wm_print_program( struct brw_wm_compile *c, @@ -166,9 +166,9 @@ void brw_wm_print_program( struct brw_wm_compile *c, { GLuint insn; - _mesa_printf("%s:\n", stage); + debug_printf("%s:\n", stage); for (insn = 0; insn < c->nr_insns; insn++) brw_wm_print_insn(c, &c->instruction[insn]); - _mesa_printf("\n"); + debug_printf("\n"); } diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 7df9b79d7a..5f7ae6592c 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -1481,7 +1481,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; default: - _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n", + debug_printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? _mesa_opcode_string(inst->opcode) : "unknown"); @@ -1494,12 +1494,12 @@ void brw_wm_emit( struct brw_wm_compile *c ) inst->dst[i]->spill_slot); } - if (INTEL_DEBUG & DEBUG_WM) { + if (BRW_DEBUG & DEBUG_WM) { int i; - _mesa_printf("wm-native:\n"); + debug_printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + debug_printf("\n"); } } diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index be240031c7..d594730730 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -142,7 +142,7 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c ) int bit = _mesa_ffs( ~c->fp_temp ); if (!bit) { - _mesa_printf("%s: out of temporaries\n", __FILE__); + debug_printf("%s: out of temporaries\n", __FILE__); exit(1); } @@ -977,7 +977,7 @@ static void print_insns( const struct prog_instruction *insn, { GLuint i; for (i = 0; i < nr; i++, insn++) { - _mesa_printf("%3d: ", i); + debug_printf("%3d: ", i); if (insn->Opcode < MAX_OPCODE) _mesa_print_instruction(insn); else if (insn->Opcode < MAX_WM_OPCODE) { @@ -988,7 +988,7 @@ static void print_insns( const struct prog_instruction *insn, 3); } else - _mesa_printf("965 Opcode %d\n", insn->Opcode); + debug_printf("965 Opcode %d\n", insn->Opcode); } } @@ -1002,10 +1002,10 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) struct brw_fragment_program *fp = c->fp; GLuint insn; - if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pre-fp:\n"); + if (BRW_DEBUG & DEBUG_WM) { + debug_printf("pre-fp:\n"); _mesa_print_program(&fp->program.Base); - _mesa_printf("\n"); + debug_printf("\n"); } c->pixel_xy = src_undef(); @@ -1103,10 +1103,10 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) } } - if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pass_fp:\n"); + if (BRW_DEBUG & DEBUG_WM) { + debug_printf("pass_fp:\n"); print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); + debug_printf("\n"); } } diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index a8de5fdd0b..3118e615f9 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -1694,7 +1694,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) c->cur_inst = i; #if 0 - _mesa_printf("Inst %d: ", i); + debug_printf("Inst %d: ", i); _mesa_print_instruction(inst); #endif @@ -1920,7 +1920,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } break; default: - _mesa_printf("unsupported IR in fragment shader %d\n", + debug_printf("unsupported IR in fragment shader %d\n", inst->Opcode); } @@ -1931,11 +1931,11 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } post_wm_emit(c); - if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("wm-native:\n"); + if (BRW_DEBUG & DEBUG_WM) { + debug_printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + debug_printf("\n"); } } @@ -1945,8 +1945,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) */ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { - if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("brw_wm_glsl_emit:\n"); + if (BRW_DEBUG & DEBUG_WM) { + debug_printf("brw_wm_glsl_emit:\n"); } /* initial instruction translation/simplification */ @@ -1955,7 +1955,7 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) /* actual code generation */ brw_wm_emit_glsl(brw, c); - if (INTEL_DEBUG & DEBUG_WM) { + if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_program(c, "brw_wm_glsl_emit done"); } diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 31b0270e84..71e4c56835 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -101,7 +101,7 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, GLuint i = c->prog_data.nr_params++; if (i >= BRW_WM_MAX_PARAM) { - _mesa_printf("%s: out of params\n", __FUNCTION__); + debug_printf("%s: out of params\n", __FUNCTION__); c->prog_data.error = 1; return NULL; } @@ -150,7 +150,7 @@ static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c, return c->imm_ref[i].ref; } else { - _mesa_printf("%s: out of imm_refs\n", __FUNCTION__); + debug_printf("%s: out of imm_refs\n", __FUNCTION__); c->prog_data.error = 1; return NULL; } @@ -434,7 +434,7 @@ void brw_wm_pass0( struct brw_wm_compile *c ) } } - if (INTEL_DEBUG & DEBUG_WM) { + if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_program(c, "pass0"); } } diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index f2ae3a958f..85a3a55ca4 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -284,7 +284,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) track_arg(c, inst, 2, read2); } - if (INTEL_DEBUG & DEBUG_WM) { + if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_program(c, "pass1"); } } diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c index 6faea018fb..a19ca62328 100644 --- a/src/gallium/drivers/i965/brw_wm_pass2.c +++ b/src/gallium/drivers/i965/brw_wm_pass2.c @@ -331,13 +331,13 @@ void brw_wm_pass2( struct brw_wm_compile *c ) } } - if (INTEL_DEBUG & DEBUG_WM) { + if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_program(c, "pass2"); } c->state = PASS2_DONE; - if (INTEL_DEBUG & DEBUG_WM) { + if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_program(c, "pass2/done"); } } diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c index a8993f9312..32692d533c 100644 --- a/src/gallium/drivers/i965/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c @@ -76,8 +76,9 @@ static GLint S_FIXED(GLfloat value, GLuint frac_bits) } -static struct brw_winsys_buffer *upload_default_color( struct brw_context *brw, - const GLfloat *color ) +static struct brw_winsys_buffer * +upload_default_color( struct brw_context *brw, + const GLfloat *color ) { struct brw_sampler_default_color sdc; @@ -117,63 +118,6 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, { _mesa_memset(sampler, 0, sizeof(*sampler)); - switch (key->minfilter) { - case GL_NEAREST: - sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; - sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; - break; - case GL_LINEAR: - sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; - sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; - break; - case GL_NEAREST_MIPMAP_NEAREST: - sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; - sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; - break; - case GL_LINEAR_MIPMAP_NEAREST: - sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; - sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; - break; - case GL_NEAREST_MIPMAP_LINEAR: - sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; - sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; - break; - case GL_LINEAR_MIPMAP_LINEAR: - sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; - sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; - break; - default: - break; - } - - /* Set Anisotropy: - */ - if (key->max_aniso > 1.0) { - sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; - sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; - - if (key->max_aniso > 2.0) { - sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2, - BRW_ANISORATIO_16); - } - } - else { - switch (key->magfilter) { - case GL_NEAREST: - sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; - break; - case GL_LINEAR: - sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; - break; - default: - break; - } - } - - sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); - sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); - sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); - /* Cube-maps on 965 and later must use the same wrap mode for all 3 * coordinate dimensions. Futher, only CUBE and CLAMP are valid. */ @@ -198,36 +142,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, } - /* Set shadow function: - */ - if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) { - /* Shadowing is "enabled" by emitting a particular sampler - * message (sample_c). So need to recompile WM program when - * shadow comparison is enabled on each/any texture unit. - */ - sampler->ss0.shadow_function = - intel_translate_shadow_compare_func(key->comparefunc); - } - - /* Set LOD bias: - */ - sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6); - - sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ - sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ - - /* Set BaseMipLevel, MaxLOD, MinLOD: - * - * XXX: I don't think that using firstLevel, lastLevel works, - * because we always setup the surface state as if firstLevel == - * level zero. Probably have to subtract firstLevel from each of - * these: - */ - sampler->ss0.base_level = U_FIXED(0, 1); - sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6); - sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6); - sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ } @@ -237,57 +152,42 @@ static void brw_wm_sampler_populate_key(struct brw_context *brw, struct wm_sampler_key *key) { - int unit; + int nr = MIN2(brw->curr.number_textures, + brw->curr.number_samplers); + int i; memset(key, 0, sizeof(*key)); - for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { - if (ctx->Texture.Unit[unit]._ReallyEnabled) { - struct wm_sampler_entry *entry = &key->sampler[unit]; - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *texObj = texUnit->_Current; - struct intel_texture_object *intelObj = intel_texture_object(texObj); - struct gl_texture_image *firstImage = - texObj->Image[0][intelObj->firstLevel]; - - entry->tex_target = texObj->Target; - - entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP) - ? ctx->Texture.CubeMapSeamless : GL_FALSE; - - entry->wrap_r = texObj->WrapR; - entry->wrap_s = texObj->WrapS; - entry->wrap_t = texObj->WrapT; - - entry->maxlod = texObj->MaxLod; - entry->minlod = texObj->MinLod; - entry->lod_bias = texUnit->LodBias + texObj->LodBias; - entry->max_aniso = texObj->MaxAnisotropy; - entry->minfilter = texObj->MinFilter; - entry->magfilter = texObj->MagFilter; - entry->comparemode = texObj->CompareMode; - entry->comparefunc = texObj->CompareFunc; - - brw->sws->bo_unreference(brw->wm.sdc_bo[unit]); - if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { - float bordercolor[4] = { - texObj->BorderColor[0], - texObj->BorderColor[0], - texObj->BorderColor[0], - texObj->BorderColor[0] - }; - /* GL specs that border color for depth textures is taken from the - * R channel, while the hardware uses A. Spam R into all the - * channels for safety. - */ - brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor); - } else { - brw->wm.sdc_bo[unit] = upload_default_color(brw, - texObj->BorderColor); - } - key->sampler_count = unit + 1; + for (i = 0; i < nr; i++) { + const struct brw_texture *tex = brw->curr.texture[i]; + const struct brw_sampler *sampler = brw->curr.sampler[i]; + struct wm_sampler_entry *entry = &key->sampler[i]; + + entry->tex_target = texObj->Target; + entry->seamless_cube_map = FALSE; /* XXX: add this to gallium */ + entry->ss0 = sampler->ss0; + entry->ss1 = sampler->ss1; + entry->ss3 = sampler->ss3; + + brw->sws->bo_unreference(brw->wm.sdc_bo[i]); + if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { + float bordercolor[4] = { + texObj->BorderColor[0], + texObj->BorderColor[0], + texObj->BorderColor[0], + texObj->BorderColor[0] + }; + /* GL specs that border color for depth textures is taken from the + * R channel, while the hardware uses A. Spam R into all the + * channels for safety. + */ + brw->wm.sdc_bo[i] = upload_default_color(brw, bordercolor); + } else { + brw->wm.sdc_bo[i] = upload_default_color(brw, texObj->BorderColor); } } + + key->sampler_count = nr; } /* All samplers must be uploaded in a single contiguous array, which @@ -354,7 +254,7 @@ static void upload_wm_samplers( struct brw_context *brw ) const struct brw_tracked_state brw_wm_samplers = { .dirty = { - .mesa = _NEW_TEXTURE, + .mesa = PIPE_NEW_BOUND_TEXTURES | PIPE_NEW_SAMPLER, .brw = 0, .cache = 0 }, diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index 4989aae830..edabf6ceb6 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -65,7 +65,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) memset(key, 0, sizeof(*key)); - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) key->max_threads = 1; else { /* WM maximum threads is number of EUs times number of threads per EU. */ @@ -120,7 +120,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); /* _NEW_QUERY */ - key->stats_wm = intel->stats_wm; + key->stats_wm = (brw->query.stats_wm != 0); /* _NEW_LINE */ key->line_stipple = ctx->Line.StippleFlag; @@ -215,7 +215,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.wm5.line_stipple = key->line_stipple; - if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm) + if (BRW_DEBUG & DEBUG_STATS || key->stats_wm) wm.wm4.stats_enable = 1; bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, -- cgit v1.2.3 From 7ba2fe40fa092551f1c493d754c80ca93564d32b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 Oct 2009 00:29:21 +0000 Subject: i965g: still working on compilation --- src/gallium/drivers/i965/brw_context.h | 1 + src/gallium/drivers/i965/brw_eu.c | 18 ++--- src/gallium/drivers/i965/brw_eu.h | 4 +- src/gallium/drivers/i965/brw_vs.h | 6 ++ src/gallium/drivers/i965/brw_vs_emit.c | 131 ++++++++++++++++----------------- src/gallium/drivers/i965/brw_wm.h | 9 +-- src/gallium/drivers/i965/brw_wm_glsl.c | 2 +- 7 files changed, 83 insertions(+), 88 deletions(-) (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 8aaf895d20..7b85363e9f 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -289,6 +289,7 @@ struct brw_vs_prog_data { GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */ GLboolean copy_edgeflag; + GLboolean writes_psiz; /* Used for calculating urb partitions: */ diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c index df49d4b72f..1189a35b6f 100644 --- a/src/gallium/drivers/i965/brw_eu.c +++ b/src/gallium/drivers/i965/brw_eu.c @@ -152,7 +152,7 @@ const GLuint *brw_get_program( struct brw_compile *p, */ struct brw_glsl_label { - const char *name; /**< the label string */ + GLuint label; /**< the label number */ GLuint position; /**< the position of the brw instruction for this label */ struct brw_glsl_label *next; /**< next in linked list */ }; @@ -164,7 +164,7 @@ struct brw_glsl_label struct brw_glsl_call { GLuint call_inst_pos; /**< location of the CAL instruction */ - const char *sub_name; /**< name of subroutine to call */ + GLuint label; struct brw_glsl_call *next; /**< next in linked list */ }; @@ -173,10 +173,10 @@ struct brw_glsl_call * Called for each OPCODE_BGNSUB. */ void -brw_save_label(struct brw_compile *c, const char *name, GLuint position) +brw_save_label(struct brw_compile *c, unsigned l, GLuint position) { struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label); - label->name = name; + label->label = l; label->position = position; label->next = c->first_label; c->first_label = label; @@ -187,11 +187,11 @@ brw_save_label(struct brw_compile *c, const char *name, GLuint position) * Called for each OPCODE_CAL. */ void -brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos) +brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos) { struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call); call->call_inst_pos = call_pos; - call->sub_name = name; + call->label = label; call->next = c->first_call; c->first_call = call; } @@ -201,11 +201,11 @@ brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos) * Lookup a label, return label's position/offset. */ static GLuint -brw_lookup_label(struct brw_compile *c, const char *name) +brw_lookup_label(struct brw_compile *c, unsigned l) { const struct brw_glsl_label *label; for (label = c->first_label; label; label = label->next) { - if (strcmp(name, label->name) == 0) { + if (l == label->label) { return label->position; } } @@ -224,7 +224,7 @@ brw_resolve_cals(struct brw_compile *c) const struct brw_glsl_call *call; for (call = c->first_call; call; call = call->next) { - const GLuint sub_loc = brw_lookup_label(c, call->sub_name); + const GLuint sub_loc = brw_lookup_label(c, call->label); struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos]; struct brw_instruction *brw_sub_inst = &c->store[sub_loc]; GLint offset = brw_sub_inst - brw_call_inst; diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h index ac5a623cac..3379522104 100644 --- a/src/gallium/drivers/i965/brw_eu.h +++ b/src/gallium/drivers/i965/brw_eu.h @@ -136,10 +136,10 @@ struct brw_compile { void -brw_save_label(struct brw_compile *c, const char *name, GLuint position); +brw_save_label(struct brw_compile *c, unsigned label, GLuint position); void -brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos); +brw_save_call(struct brw_compile *c, unsigned label, GLuint call_pos); void brw_resolve_cals(struct brw_compile *c); diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h index 58119567dc..2a2dbb3457 100644 --- a/src/gallium/drivers/i965/brw_vs.h +++ b/src/gallium/drivers/i965/brw_vs.h @@ -54,6 +54,7 @@ struct brw_vs_compile { struct brw_compile func; struct brw_vs_prog_key key; struct brw_vs_prog_data prog_data; + struct brw_chipset chipset; struct brw_vertex_shader *vp; @@ -88,7 +89,12 @@ struct brw_vs_compile { struct brw_instruction *if_inst[MAX_IF_DEPTH]; struct brw_instruction *loop_inst[MAX_LOOP_DEPTH]; + GLuint insn; + GLuint if_depth; + GLuint loop_depth; + GLuint end_offset; + struct brw_indirect stack_index; }; diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 4daa98b29e..5366ab8514 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -35,19 +35,15 @@ #include "util/u_math.h" #include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_ureg_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" #include "brw_context.h" #include "brw_vs.h" #include "brw_debug.h" -struct ureg_instruction { - unsigned opcode:8; - unsigned tex_target:3; - struct ureg_dst dst; - struct ureg_src src[3]; -}; - static struct brw_reg get_tmp( struct brw_vs_compile *c ) { @@ -149,7 +145,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->first_output = reg; c->first_overflow_output = 0; - if (BRW_IS_IGDNG(c->func.brw)) + if (c->chipset.is_igdng) mrf = 8; else mrf = 4; @@ -251,7 +247,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); - if (BRW_IS_IGDNG(c->func.brw)) + if (c->chipset.is_igdng) c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; @@ -1058,7 +1054,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) */ if (c->prog_data.writes_psiz || c->key.nr_userclip || - BRW_IS_965(p->brw)) + c->chipset.is_965) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1089,7 +1085,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (BRW_IS_965(p->brw)) { + if (c->chipset.is_965) { brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, @@ -1117,7 +1113,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, offset(m0, 2), ndc); - if (BRW_IS_IGDNG(p->brw)) { + if (c->chipset.is_igdng) { /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */ brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ /* m4, m5 contain the distances from vertex to the user clip planeXXX. @@ -1205,6 +1201,9 @@ post_vs_emit( struct brw_vs_compile *c, static uint32_t get_predicate(const struct ureg_instruction *inst) { + /* XXX: disabling for now + */ +#if 0 if (inst->dst.CondMask == COND_TR) return BRW_PREDICATE_NONE; @@ -1237,11 +1236,15 @@ get_predicate(const struct ureg_instruction *inst) inst->dst.CondMask); return BRW_PREDICATE_NORMAL; } +#else + return BRW_PREDICATE_NORMAL; +#endif } static void emit_insn(struct brw_vs_compile *c, - const struct tgsi_full_instruction *insn) + const struct ureg_instruction *inst) { + struct brw_compile *p = &c->func; struct brw_reg args[3], dst; GLuint i; @@ -1253,9 +1256,6 @@ static void emit_insn(struct brw_vs_compile *c, /* Get argument regs. */ for (i = 0; i < 3; i++) { - const struct ureg_src src = inst->src[i]; - index = src.Index; - file = src.File; args[i] = get_arg(c, inst, i); } @@ -1263,16 +1263,13 @@ static void emit_insn(struct brw_vs_compile *c, * dst and arg, given the static allocation of registers. So * care needs to be taken emitting multi-operation instructions. */ - index = inst->dst.Index; - file = inst->dst.File; dst = get_dst(c, inst->dst); - if (inst->SaturateMode != SATURATE_OFF) { - debug_printf("Unsupported saturate %d in vertex shader", - inst->SaturateMode); + if (inst->dst.Saturate) { + debug_printf("Unsupported saturate in vertex shader"); } - switch (inst->Opcode) { + switch (inst->opcode) { case TGSI_OPCODE_ABS: brw_MOV(p, dst, brw_abs(args[0])); break; @@ -1291,7 +1288,7 @@ static void emit_insn(struct brw_vs_compile *c, case TGSI_OPCODE_DPH: brw_DPH(p, dst, args[0], args[1]); break; - case TGSI_OPCODE_NRM3: + case TGSI_OPCODE_NRM: emit_nrm(c, dst, args[0], 3); break; case TGSI_OPCODE_NRM4: @@ -1384,21 +1381,21 @@ static void emit_insn(struct brw_vs_compile *c, emit_xpd(p, dst, args[0], args[1]); break; case TGSI_OPCODE_IF: - assert(if_depth < MAX_IF_DEPTH); - if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); + assert(c->if_depth < MAX_IF_DEPTH); + c->if_inst[c->if_depth] = brw_IF(p, BRW_EXECUTE_8); /* Note that brw_IF smashes the predicate_control field. */ - if_inst[if_depth]->header.predicate_control = get_predicate(inst); - if_depth++; + c->if_inst[c->if_depth]->header.predicate_control = get_predicate(inst); + c->if_depth++; break; case TGSI_OPCODE_ELSE: - if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); + c->if_inst[c->if_depth-1] = brw_ELSE(p, c->if_inst[c->if_depth-1]); break; case TGSI_OPCODE_ENDIF: - assert(if_depth > 0); - brw_ENDIF(p, if_inst[--if_depth]); + assert(c->if_depth > 0); + brw_ENDIF(p, c->if_inst[--c->if_depth]); break; case TGSI_OPCODE_BGNLOOP: - loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + c->loop_inst[c->loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; case TGSI_OPCODE_BRK: brw_set_predicate_control(p, get_predicate(inst)); @@ -1415,14 +1412,14 @@ static void emit_insn(struct brw_vs_compile *c, struct brw_instruction *inst0, *inst1; GLuint br = 1; - loop_depth--; + c->loop_depth--; - if (BRW_IS_IGDNG(brw)) + if (c->chipset.is_igdng) br = 2; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + inst0 = inst1 = brw_WHILE(p, c->loop_inst[c->loop_depth]); /* patch all the BREAK/CONT instructions from last BEGINLOOP */ - while (inst0 > loop_inst[loop_depth]) { + while (inst0 > c->loop_inst[c->loop_depth]) { inst0--; if (inst0->header.opcode == TGSI_OPCODE_BRK) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); @@ -1442,41 +1439,37 @@ static void emit_insn(struct brw_vs_compile *c, break; case TGSI_OPCODE_CAL: brw_set_access_mode(p, BRW_ALIGN_1); - brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_ADD(p, deref_1d(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); brw_set_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(4)); - brw_save_call(p, inst->Comment, p->nr_insn); + brw_ADD(p, get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(4)); + brw_save_call(p, inst->label, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; case TGSI_OPCODE_RET: - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(-4)); + brw_ADD(p, get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(-4)); brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); + brw_MOV(p, brw_ip_reg(), deref_1d(c->stack_index, 0)); brw_set_access_mode(p, BRW_ALIGN_16); break; case TGSI_OPCODE_END: - end_offset = p->nr_insn; + c->end_offset = p->nr_insn; /* this instruction will get patched later to jump past subroutine * code, etc. */ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; - case TGSI_OPCODE_PRINT: - /* no-op */ - break; case TGSI_OPCODE_BGNSUB: - brw_save_label(p, inst->Comment, p->nr_insn); + brw_save_label(p, p->nr_insn, p->nr_insn); break; case TGSI_OPCODE_ENDSUB: /* no-op */ break; default: debug_printf("Unsupported opcode %i (%s) in vertex shader", - inst->Opcode, inst->Opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->Opcode) : - "unknown"); + inst->opcode, + tgsi_get_opcode_name(inst->opcode)); } /* Set the predication update on the last instruction of the native @@ -1485,12 +1478,16 @@ static void emit_insn(struct brw_vs_compile *c, * This would be problematic if it was set on a math instruction, * but that shouldn't be the case with the current GLSL compiler. */ +#if 0 + /* XXX: disabled + */ if (inst->CondUpdate) { struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1]; assert(hw_insn->header.destreg__conditionalmod == 0); hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; } +#endif release_tmps(c); } @@ -1498,24 +1495,19 @@ static void emit_insn(struct brw_vs_compile *c, /* Emit the vertex program instructions here. */ -void brw_vs_emit(struct brw_vs_compile *c ) +void brw_vs_emit(struct brw_vs_compile *c) { struct brw_compile *p = &c->func; - struct brw_context *brw = p->brw; - GLuint insn, if_depth = 0, loop_depth = 0; - GLuint end_offset = 0; struct brw_instruction *end_inst, *last_inst; - const struct brw_indirect stack_index = brw_indirect(0, 0); - struct tgsi_parse_context parse; - struct tgsi_full_declaration *decl; - GLuint index; - GLuint file; + struct ureg_parse_context parse; + struct ureg_declaration *decl; + struct ureg_declaration *imm; + struct ureg_declaration *insn; - if (BRW_DEBUG & DEBUG_VS) { - debug_printf("vs-mesa:\n"); - _mesa_print_program(&c->vp->program.Base); - debug_printf("\n"); - } + if (BRW_DEBUG & DEBUG_VS) + tgsi_dump(c->vp->tokens, 0); + + c->stack_index = brw_indirect(0, 0); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); @@ -1523,12 +1515,15 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* Static register allocation */ brw_vs_alloc_regs(c); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); - for (insn = 0; insn < nr_insns; insn++) { + while (ureg_next_decl(&parse, &decl)) { + } - const struct ureg_instruction *inst = &c->vp->program.Base.Instructions[insn]; - + while (ureg_next_immediate(&parse, &imm)) { + } + + while (ureg_next_instruction(&parse, &insn)) { } end_inst = &p->store[end_offset]; diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 5bc2a49c1f..084430cf28 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -34,6 +34,7 @@ #define BRW_WM_H #include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_ureg_parse.h" #include "brw_context.h" #include "brw_eu.h" @@ -163,14 +164,6 @@ struct brw_wm_instruction { #define BRW_WM_MAX_SUBROUTINE 16 -struct ureg_instruction { - unsigned opcode:8; - unsigned tex_target:3; - struct ureg_dst dst; - struct ureg_src src[3]; -}; - - /* New opcodes to track internal operations required for WM unit. * These are added early so that the registers used can be tracked, * freed and reused like those of other instructions. diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index 23f7ba16fd..59bc4ef701 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -1867,7 +1867,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_access_mode(p, BRW_ALIGN_16); brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(4)); - brw_save_call(&c->func, inst->Comment, p->nr_insn); + brw_save_call(&c->func, inst->label, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); brw_pop_insn_state(p); break; -- cgit v1.2.3 From 99cc0fd67597cbcd6106afcf437a0d5e2431c9df Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 29 Oct 2009 20:18:01 +0000 Subject: i965g: work in progress on fragment shaders --- src/gallium/drivers/i965/brw_context.h | 10 +- src/gallium/drivers/i965/brw_eu.c | 20 +- src/gallium/drivers/i965/brw_eu.h | 8 +- src/gallium/drivers/i965/brw_pipe_depth.c | 42 +- src/gallium/drivers/i965/brw_pipe_rast.c | 18 + src/gallium/drivers/i965/brw_pipe_rast.h | 1 + src/gallium/drivers/i965/brw_pipe_shader.c | 4 +- src/gallium/drivers/i965/brw_screen.h | 7 + src/gallium/drivers/i965/brw_vs_emit.c | 2 - src/gallium/drivers/i965/brw_wm.c | 167 ++--- src/gallium/drivers/i965/brw_wm.h | 41 +- src/gallium/drivers/i965/brw_wm_debug.c | 17 +- src/gallium/drivers/i965/brw_wm_emit.c | 195 +++--- src/gallium/drivers/i965/brw_wm_fp.c | 1031 ++++++++++------------------ src/gallium/drivers/i965/brw_wm_glsl.c | 12 +- src/gallium/drivers/i965/brw_wm_pass0.c | 73 +- src/gallium/drivers/i965/brw_wm_pass1.c | 26 +- src/gallium/drivers/i965/brw_wm_state.c | 8 +- 18 files changed, 682 insertions(+), 1000 deletions(-) (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 7b85363e9f..e6c3161066 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -132,6 +132,8 @@ struct brw_depth_stencil_state { struct brw_cc2 cc2; struct brw_cc3 cc3; struct brw_cc7 cc7; + + unsigned iz_lookup; }; @@ -164,7 +166,10 @@ struct brw_fragment_shader { const struct tgsi_token *tokens; struct tgsi_shader_info info; - GLboolean isGLSL; + unsigned iz_lookup; + + boolean uses_depth:1; + boolean has_flow_control:1; unsigned id; struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ @@ -194,6 +199,7 @@ struct brw_fragment_shader { #define PIPE_NEW_COLOR_BUFFERS 0x40000 #define PIPE_NEW_QUERY 0x80000 #define PIPE_NEW_SCISSOR 0x100000 +#define PIPE_NEW_BOUND_TEXTURES 0x200000 @@ -487,7 +493,7 @@ struct brw_context const struct brw_rasterizer_state *rast; const struct brw_depth_stencil_state *zstencil; - const struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + const struct brw_texture *texture[PIPE_MAX_SAMPLERS]; const struct pipe_sampler *sampler[PIPE_MAX_SAMPLERS]; unsigned num_textures; unsigned num_samplers; diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c index 1189a35b6f..de43b14512 100644 --- a/src/gallium/drivers/i965/brw_eu.c +++ b/src/gallium/drivers/i965/brw_eu.c @@ -150,22 +150,22 @@ const GLuint *brw_get_program( struct brw_compile *p, /** * For each OPCODE_BGNSUB we create one of these. */ -struct brw_glsl_label +struct brw_eu_label { GLuint label; /**< the label number */ GLuint position; /**< the position of the brw instruction for this label */ - struct brw_glsl_label *next; /**< next in linked list */ + struct brw_eu_label *next; /**< next in linked list */ }; /** * For each OPCODE_CAL we create one of these. */ -struct brw_glsl_call +struct brw_eu_call { GLuint call_inst_pos; /**< location of the CAL instruction */ GLuint label; - struct brw_glsl_call *next; /**< next in linked list */ + struct brw_eu_call *next; /**< next in linked list */ }; @@ -175,7 +175,7 @@ struct brw_glsl_call void brw_save_label(struct brw_compile *c, unsigned l, GLuint position) { - struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label); + struct brw_eu_label *label = CALLOC_STRUCT(brw_eu_label); label->label = l; label->position = position; label->next = c->first_label; @@ -189,7 +189,7 @@ brw_save_label(struct brw_compile *c, unsigned l, GLuint position) void brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos) { - struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call); + struct brw_eu_call *call = CALLOC_STRUCT(brw_eu_call); call->call_inst_pos = call_pos; call->label = label; call->next = c->first_call; @@ -203,7 +203,7 @@ brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos) static GLuint brw_lookup_label(struct brw_compile *c, unsigned l) { - const struct brw_glsl_label *label; + const struct brw_eu_label *label; for (label = c->first_label; label; label = label->next) { if (l == label->label) { return label->position; @@ -221,7 +221,7 @@ brw_lookup_label(struct brw_compile *c, unsigned l) void brw_resolve_cals(struct brw_compile *c) { - const struct brw_glsl_call *call; + const struct brw_eu_call *call; for (call = c->first_call; call; call = call->next) { const GLuint sub_loc = brw_lookup_label(c, call->label); @@ -235,7 +235,7 @@ brw_resolve_cals(struct brw_compile *c) /* free linked list of calls */ { - struct brw_glsl_call *call, *next; + struct brw_eu_call *call, *next; for (call = c->first_call; call; call = next) { next = call->next; FREE(call); @@ -245,7 +245,7 @@ brw_resolve_cals(struct brw_compile *c) /* free linked list of labels */ { - struct brw_glsl_label *label, *next; + struct brw_eu_label *label, *next; for (label = c->first_label; label; label = next) { next = label->next; FREE(label); diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h index 3379522104..7bddc3859c 100644 --- a/src/gallium/drivers/i965/brw_eu.h +++ b/src/gallium/drivers/i965/brw_eu.h @@ -109,8 +109,8 @@ struct brw_indirect { }; -struct brw_glsl_label; -struct brw_glsl_call; +struct brw_eu_label; +struct brw_eu_call; @@ -130,8 +130,8 @@ struct brw_compile { GLboolean single_program_flow; struct brw_context *brw; - struct brw_glsl_label *first_label; /**< linked list of labels */ - struct brw_glsl_call *first_call; /**< linked list of CALs */ + struct brw_eu_label *first_label; /**< linked list of labels */ + struct brw_eu_call *first_call; /**< linked list of CALs */ }; diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c index 33fe517e0b..e010d76e0d 100644 --- a/src/gallium/drivers/i965/brw_pipe_depth.c +++ b/src/gallium/drivers/i965/brw_pipe_depth.c @@ -5,6 +5,10 @@ #include "brw_context.h" #include "brw_defines.h" +/* XXX: Fixme - include this to get IZ_ defines + */ +#include "brw_wm.h" + static unsigned brw_translate_compare_func(unsigned func) { switch (func) { @@ -55,13 +59,9 @@ static unsigned translate_stencil_op(unsigned op) } } - -static void * -brw_create_depth_stencil_state( struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *templ ) +static void create_bcc_state( struct brw_depth_stencil_state *zstencil, + const struct pipe_depth_stencil_alpha_state *templ ) { - struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state); - if (templ->stencil[0].enabled) { zstencil->cc0.stencil_enable = 1; zstencil->cc0.stencil_func = @@ -108,6 +108,36 @@ brw_create_depth_stencil_state( struct pipe_context *pipe, zstencil->cc2.depth_test_function = brw_translate_compare_func(templ->depth.func); zstencil->cc2.depth_write_enable = templ->depth.writemask; } +} + +static void create_wm_iz_state( struct brw_depth_stencil_state *zstencil ) +{ + if (zstencil->cc3.alpha_test) + zstencil->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (zstencil->cc2.depth_test) + zstencil->iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (zstencil->cc2.depth_write_enable) + zstencil->iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + if (zstencil->cc0.stencil_enable) + zstencil->iz_lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (zstencil->cc0.stencil_write_enable) + zstencil->iz_lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + +} + + +static void * +brw_create_depth_stencil_state( struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *templ ) +{ + struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state); + + create_bcc_state( zstencil, templ ); + create_wm_iz_state( zstencil ); return (void *)zstencil; } diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c index 86822d478a..51159bf147 100644 --- a/src/gallium/drivers/i965/brw_pipe_rast.c +++ b/src/gallium/drivers/i965/brw_pipe_rast.c @@ -64,3 +64,21 @@ calculate_line_stipple_rast() bls.bits1.inverse_repeat_count = tmpi; } + + + +static void +calculate_wm_lookup() +{ + if (rast->fill_cw == PIPE_POLYGON_MODE_LINE && + rast->fill_ccw == PIPE_POLYGON_MODE_LINE) { + line_aa = AA_ALWAYS; + } + else if (rast->fill_cw == PIPE_POLYGON_MODE_LINE || + rast->fill_ccw == PIPE_POLYGON_MODE_LINE) { + line_aa = AA_SOMETIMES; + } + else { + line_aa = AA_NEVER; + } +} diff --git a/src/gallium/drivers/i965/brw_pipe_rast.h b/src/gallium/drivers/i965/brw_pipe_rast.h index 800a9208a7..9354f01e18 100644 --- a/src/gallium/drivers/i965/brw_pipe_rast.h +++ b/src/gallium/drivers/i965/brw_pipe_rast.h @@ -10,6 +10,7 @@ struct brw_rasterizer_state { */ struct brw_clip_prog_key clip_key; struct brw_line_stipple bls; + unsigned unfilled_aa_line; }; #endif diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 8b61da763c..6e37eac634 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -39,7 +39,7 @@ * as flow conditionals, loops, subroutines. * Some GLSL shaders may use these features, others might not. */ -GLboolean brw_wm_is_glsl(const struct brw_fragment_shader *fp) +GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp) { return (fp->info.insn_count[TGSI_OPCODE_ARL] > 0 || fp->info.insn_count[TGSI_OPCODE_IF] > 0 || @@ -144,7 +144,7 @@ static void brwProgramStringNotify( struct brw_context *brw, if (newFP == curFP) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; - newFP->isGLSL = brw_wm_is_glsl(fprog); + newFP->has_flow_control = brw_wm_has_flow_control(fprog); } else if (target == GL_VERTEX_PROGRAM_ARB) { struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index eafd8ddf77..efa27db1e0 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -64,6 +64,13 @@ struct brw_buffer boolean is_user_buffer; }; +struct brw_texture +{ + struct pipe_texture base; + + ubyte shader_swizzle; +}; + /* * Cast wrappers diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 6809bccdec..bcc5c5f713 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -1013,8 +1013,6 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, src->SrcRegister.SwizzleZ, src->SrcRegister.SwizzleW); - /* Note this is ok for non-swizzle instructions: - */ reg.negate = src->SrcRegister.Negate ? 1 : 0; /* XXX: abs, absneg diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index f0dabfcfd0..33602b59c1 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -28,14 +28,17 @@ * Authors: * Keith Whitwell */ +#include "pipe/p_error.h" #include "tgsi/tgsi_info.h" #include "brw_context.h" +#include "brw_screen.h" #include "brw_util.h" #include "brw_wm.h" #include "brw_state.h" #include "brw_debug.h" +#include "brw_pipe_rast.h" /** Return number of src args for given instruction */ @@ -85,12 +88,12 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) /** - * Do GPU code generation for non-GLSL shader. non-GLSL shaders have - * no flow control instructions so we can more readily do SSA-style - * optimizations. + * Do GPU code generation for shaders without flow control. Shaders + * without flow control instructions can more readily be analysed for + * SSA-style optimizations. */ static void -brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +brw_wm_linear_shader_emit(struct brw_context *brw, struct brw_wm_compile *c) { /* Augment fragment program. Add instructions for pre- and * post-fragment-program tasks such as interpolation and fogging. @@ -136,7 +139,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) * Depending on the instructions used (i.e. flow control instructions) * we'll use one of two code generators. */ -static void do_wm_prog( struct brw_context *brw, +static int do_wm_prog( struct brw_context *brw, struct brw_fragment_shader *fp, struct brw_wm_prog_key *key) { @@ -153,7 +156,7 @@ static void do_wm_prog( struct brw_context *brw, * without triggering a segfault, no way to signal, * so just return. */ - return; + return PIPE_ERROR_OUT_OF_MEMORY; } } else { memset(c, 0, sizeof(*brw->wm.compile_data)); @@ -166,19 +169,19 @@ static void do_wm_prog( struct brw_context *brw, brw_init_compile(brw, &c->func); /* temporary sanity check assertion */ - assert(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + assert(fp->has_flow_control == brw_wm_has_flow_control(c->fp)); /* * Shader which use GLSL features such as flow control are handled * differently from "simple" shaders. */ - if (fp->isGLSL) { + if (fp->has_flow_control) { c->dispatch_width = 8; - brw_wm_glsl_emit(brw, c); + brw_wm_branching_shader_emit(brw, c); } else { c->dispatch_width = 16; - brw_wm_non_glsl_emit(brw, c); + brw_wm_linear_shader_emit(brw, c); } if (BRW_DEBUG & DEBUG_WM) @@ -195,6 +198,8 @@ static void do_wm_prog( struct brw_context *brw, program, program_size, &c->prog_data, &brw->wm.prog_data ); + + return 0; } @@ -202,71 +207,36 @@ static void do_wm_prog( struct brw_context *brw, static void brw_wm_populate_key( struct brw_context *brw, struct brw_wm_prog_key *key ) { - /* BRW_NEW_FRAGMENT_PROGRAM */ - const struct brw_fragment_program *fp = brw->curr.fragment_shader; - GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; - GLuint lookup = 0; - GLuint line_aa; - GLuint i; + unsigned lookup, line_aa; + unsigned i; memset(key, 0, sizeof(*key)); - /* Build the index for table lookup + /* PIPE_NEW_FRAGMENT_SHADER + * PIPE_NEW_DEPTH_STENCIL_ALPHA */ - /* _NEW_COLOR */ - if (fp->program.UsesKill || - ctx->Color.AlphaEnabled) - lookup |= IZ_PS_KILL_ALPHATEST_BIT; - - if (fp->program.Base.OutputsWritten & (1<Depth.Test) - lookup |= IZ_DEPTH_TEST_ENABLE_BIT; - - if (ctx->Depth.Test && - ctx->Depth.Mask) /* ?? */ - lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + lookup = (brw->curr.zstencil->iz_lookup | + brw->curr.fragment_shader->iz_lookup); - /* _NEW_STENCIL */ - if (ctx->Stencil._Enabled) { - lookup |= IZ_STENCIL_TEST_ENABLE_BIT; - if (ctx->Stencil.WriteMask[0] || - ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) - lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; - } - - line_aa = AA_NEVER; - - /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ - if (ctx->Line.SmoothFlag) { - if (brw->intel.reduced_primitive == GL_LINES) { - line_aa = AA_ALWAYS; - } - else if (brw->intel.reduced_primitive == GL_TRIANGLES) { - if (ctx->Polygon.FrontMode == GL_LINE) { - line_aa = AA_SOMETIMES; - - if (ctx->Polygon.BackMode == GL_LINE || - (ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_BACK)) - line_aa = AA_ALWAYS; - } - else if (ctx->Polygon.BackMode == GL_LINE) { - line_aa = AA_SOMETIMES; - - if ((ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_FRONT)) - line_aa = AA_ALWAYS; - } - } + /* PIPE_NEW_RAST + * BRW_NEW_REDUCED_PRIMITIVE + */ + switch (brw->reduced_primitive) { + case PIPE_PRIM_POINTS: + line_aa = AA_NEVER; + break; + case PIPE_PRIM_LINES: + line_aa = AA_ALWAYS; + break; + default: + line_aa = brw->curr.rast->unfilled_aa_line; + break; } brw_wm_lookup_iz(line_aa, lookup, - uses_depth, + brw->curr.fragment_shader->uses_depth, key); /* Revisit this, figure out if it's really useful, and either push @@ -276,54 +246,39 @@ static void brw_wm_populate_key( struct brw_context *brw, key->proj_attrib_mask = ~0; /*brw->wm.input_size_masks[4-1];*/ /* PIPE_NEW_RAST */ - key->flat_shade = brw->rast.flat_shade; + key->flat_shade = brw->curr.rast->templ.flatshade; /* This can be determined by looking at the INTERP mode each input decl. */ - key->linear_color = 0; - - /* _NEW_TEXTURE */ - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - if (i < brw->nr_textures) { - const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; - const struct gl_texture_object *t = unit->_Current; - const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; - - if (img->InternalFormat == GL_YCBCR_MESA) { - key->yuvtex_mask |= 1 << i; - if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR) - key->yuvtex_swap_mask |= 1 << i; - } + key->linear_attrib_mask = 0; - key->tex_swizzles[i] = t->_Swizzle; + /* PIPE_NEW_BOUND_TEXTURES */ + for (i = 0; i < brw->curr.num_textures; i++) { + const struct brw_texture *tex = brw->curr.texture[i]; - if (0) - key->shadowtex_mask |= 1<tex_swizzles[i] = SWIZZLE_NOOP; - } - } + if (tex->base.format == PIPE_FORMAT_YCBCR) + key->yuvtex_mask |= 1 << i; + if (tex->base.format == PIPE_FORMAT_YCBCR_REV) + key->yuvtex_swap_mask |= 1 << i; - /* _NEW_FRAMEBUFFER */ - if (brw->intel.driDrawable != NULL) { - key->drawable_height = brw->fb.cbufs[0].height; + /* XXX: shadow texture + */ + /* key->shadowtex_mask |= 1<vp_nr_outputs_written = brw->vs.prog_data->nr_outputs_written; + key->vp_nr_outputs = brw->vs.prog_data->nr_outputs; /* The unique fragment program ID */ - key->program_string_id = fp->id; + key->program_string_id = brw->curr.fragment_shader->id; } -static void brw_prepare_wm_prog(struct brw_context *brw) +static int brw_prepare_wm_prog(struct brw_context *brw) { struct brw_wm_prog_key key; - struct brw_fragment_program *fp = (struct brw_fragment_program *) - brw->fragment_program; + struct brw_fragment_shader *fs = brw->curr.fragment_shader; brw_wm_populate_key(brw, &key); @@ -335,23 +290,19 @@ static void brw_prepare_wm_prog(struct brw_context *brw) NULL, 0, &brw->wm.prog_data); if (brw->wm.prog_bo == NULL) - do_wm_prog(brw, fp, &key); + return do_wm_prog(brw, fs, &key); + + return 0; } const struct brw_tracked_state brw_wm_prog = { .dirty = { - .mesa = (_NEW_COLOR | - _NEW_DEPTH | - _NEW_HINT | - _NEW_STENCIL | - _NEW_POLYGON | - _NEW_LINE | - _NEW_LIGHT | - _NEW_BUFFERS | - _NEW_TEXTURE), - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_WM_INPUT_DIMENSIONS | + .mesa = (PIPE_NEW_FRAGMENT_SHADER | + PIPE_NEW_DEPTH_STENCIL_ALPHA | + PIPE_NEW_RAST | + PIPE_NEW_BOUND_TEXTURES), + .brw = (BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG, }, diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 084430cf28..2cd5bb7081 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -33,9 +33,6 @@ #ifndef BRW_WM_H #define BRW_WM_H -#include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_ureg_parse.h" - #include "brw_context.h" #include "brw_eu.h" @@ -59,8 +56,8 @@ #define AA_ALWAYS 2 struct brw_wm_prog_key { - unsigned proj_attrib_mask; /**< one bit per fragment program attribute */ - unsigned linear_attrib_mask:1; /**< linear interpolation vs perspective interp */ + unsigned proj_attrib_mask; /**< one bit per fragment program attribute */ + unsigned linear_attrib_mask; /**< linear interpolation vs perspective interp */ GLuint source_depth_reg:3; GLuint aa_dest_stencil_reg:3; @@ -75,11 +72,10 @@ struct brw_wm_prog_key { GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ - GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; - - GLuint program_string_id:32; + GLuint vp_nr_outputs:6; + GLuint nr_cbufs:3; - GLuint vp_nr_outputs_written; + GLuint program_string_id; }; @@ -146,9 +142,8 @@ struct brw_wm_instruction { GLuint opcode:8; GLuint saturate:1; GLuint writemask:4; - GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ - GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ - GLuint tex_shadow:1; /* do shadow comparison? */ + GLuint tex_unit:4; /* texture/sampler unit for texture instructions */ + GLuint tex_target:4; /* TGSI_TEXTURE_x for texture instructions*/ GLuint eot:1; /* End of thread indicator for FB_WRITE*/ GLuint target:10; /* target binding table index for FB_WRITE*/ }; @@ -180,15 +175,17 @@ struct brw_wm_instruction { #define WM_FRONTFACING (MAX_OPCODE + 8) #define MAX_WM_OPCODE (MAX_OPCODE + 9) -#define PROGRAM_PAYLOAD (TGSI_FILE_COUNT) -#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) +#define BRW_FILE_PAYLOAD (TGSI_FILE_COUNT) +#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) /* ?? */ + +struct brw_passfp_program; struct brw_wm_compile { struct brw_compile func; struct brw_wm_prog_key key; struct brw_wm_prog_data prog_data; - struct brw_fragment_program *fp; + struct brw_fragment_shader *fp; GLfloat (*env_param)[4]; @@ -201,15 +198,7 @@ struct brw_wm_compile { * simplifying and adding instructions for interpolation and * framebuffer writes. */ - struct ureg_instruction prog_instructions[BRW_WM_MAX_INSN]; - GLuint nr_fp_insns; - GLuint fp_temp; - GLuint fp_interp_emitted; - GLuint fp_fragcolor_emitted; - - struct ureg_src pixel_xy; - struct ureg_src delta_xy; - struct ureg_src pixel_w; + struct brw_passfp_program *pass_fp; struct brw_wm_value vreg[BRW_WM_MAX_VREG]; @@ -298,8 +287,8 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean ps_uses_depth, struct brw_wm_prog_key *key ); -//GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); +GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp); +void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c); void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c index 04dec5ba39..65d7626eea 100644 --- a/src/gallium/drivers/i965/brw_wm_debug.c +++ b/src/gallium/drivers/i965/brw_wm_debug.c @@ -28,7 +28,8 @@ * Authors: * Keith Whitwell */ - + +#include "tgsi/tgsi_info.h" #include "brw_context.h" #include "brw_wm.h" @@ -49,10 +50,10 @@ void brw_wm_print_value( struct brw_wm_compile *c, value - c->creg < BRW_WM_MAX_PARAM) debug_printf("c%d", value - c->creg); else if (value - c->payload.input_interp >= 0 && - value - c->payload.input_interp < FRAG_ATTRIB_MAX) + value - c->payload.input_interp < PIPE_MAX_SHADER_INPUTS) debug_printf("i%d", value - c->payload.input_interp); else if (value - c->payload.depth >= 0 && - value - c->payload.depth < FRAG_ATTRIB_MAX) + value - c->payload.depth < PIPE_MAX_SHADER_INPUTS) debug_printf("d%d", value - c->payload.depth); else debug_printf("?"); @@ -100,10 +101,10 @@ void brw_wm_print_insn( struct brw_wm_compile *c, if (inst->writemask != BRW_WRITEMASK_XYZW) debug_printf(".%s%s%s%s", - GET_BIT(inst->writemask, 0) ? "x" : "", - GET_BIT(inst->writemask, 1) ? "y" : "", - GET_BIT(inst->writemask, 2) ? "z" : "", - GET_BIT(inst->writemask, 3) ? "w" : ""); + (inst->writemask & BRW_WRITEMASK_X) ? "x" : "", + (inst->writemask & BRW_WRITEMASK_Y) ? "y" : "", + (inst->writemask & BRW_WRITEMASK_Z) ? "z" : "", + (inst->writemask & BRW_WRITEMASK_W) ? "w" : ""); switch (inst->opcode) { case WM_PIXELXY: @@ -134,7 +135,7 @@ void brw_wm_print_insn( struct brw_wm_compile *c, debug_printf(" = FRONTFACING"); break; default: - debug_printf(" = %s", _mesa_opcode_string(inst->opcode)); + debug_printf(" = %s", tgsi_get_opcode_info(inst->opcode)->mnemonic); break; } diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 5f7ae6592c..a705d8b344 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -28,10 +28,13 @@ * Authors: * Keith Whitwell */ - + +#include "util/u_math.h" +#include "tgsi/tgsi_info.h" #include "brw_context.h" #include "brw_wm.h" +#include "brw_debug.h" /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. @@ -45,15 +48,15 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) /* Payload R0: * - * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, + * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 quads, * corresponding to each of the 16 execution channels. * R0.1..8 -- ? * R1.0 -- triangle vertex 0.X * R1.1 -- triangle vertex 0.Y - * R1.2 -- tile 0 x,y coords (2 packed uwords) - * R1.3 -- tile 1 x,y coords (2 packed uwords) - * R1.4 -- tile 2 x,y coords (2 packed uwords) - * R1.5 -- tile 3 x,y coords (2 packed uwords) + * R1.2 -- quad 0 x,y coords (2 packed uwords) + * R1.3 -- quad 1 x,y coords (2 packed uwords) + * R1.4 -- quad 2 x,y coords (2 packed uwords) + * R1.5 -- quad 3 x,y coords (2 packed uwords) * R1.6 -- ? * R1.7 -- ? * R1.8 -- ? @@ -134,11 +137,17 @@ static void emit_wpos_xy(struct brw_wm_compile *c, /* XXX: is this needed any more, or is this a NOOP? */ if (mask & BRW_WRITEMASK_Y) { +#if 0 /* Y' = height - 1 - Y */ brw_ADD(p, dst[1], negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), brw_imm_d(c->key.drawable_height - 1)); +#else + brw_MOV(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_W)); +#endif } } @@ -279,28 +288,28 @@ static void emit_frontfacing( struct brw_compile *p, /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input * looking like: * - * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br + * arg0: q0.tl q0.tr q0.bl q0.br q1.tl q1.tr q1.bl q1.br * * and we're trying to produce: * * DDX DDY - * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) - * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) - * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) - * (ss0.br - ss0.bl) (ss0.tr - ss0.br) - * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) - * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) - * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) - * (ss1.br - ss1.bl) (ss1.tr - ss1.br) + * dst: (q0.tr - q0.tl) (q0.tl - q0.bl) + * (q0.tr - q0.tl) (q0.tr - q0.br) + * (q0.br - q0.bl) (q0.tl - q0.bl) + * (q0.br - q0.bl) (q0.tr - q0.br) + * (q1.tr - q1.tl) (q1.tl - q1.bl) + * (q1.tr - q1.tl) (q1.tr - q1.br) + * (q1.br - q1.bl) (q1.tl - q1.bl) + * (q1.br - q1.bl) (q1.tr - q1.br) * - * and add another set of two more subspans if in 16-pixel dispatch mode. + * and add two more quads if in 16-pixel dispatch mode. * * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result * for each pair, and vertstride = 2 jumps us 2 elements after processing a * pair. But for DDY, it's harder, as we want to produce the pairs swizzled * between each other. We could probably do it like ddx and swizzle the right * order later, but bail for now and just produce - * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) + * ((q0.tl - q0.bl)x4 (q1.tl - q1.bl)x4) */ void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, @@ -611,12 +620,12 @@ static void emit_dp3( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); @@ -633,12 +642,12 @@ static void emit_dp4( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); @@ -656,12 +665,12 @@ static void emit_dph( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - const int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; + const int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); @@ -704,12 +713,12 @@ static void emit_math1( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { - int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MOV(p, brw_message_reg(2), arg0[0]); @@ -732,12 +741,12 @@ static void emit_math2( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1) { - int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_push_insn_state(p); @@ -790,21 +799,32 @@ static void emit_tex( struct brw_wm_compile *c, GLuint i, nr; GLuint emit; GLuint msg_type; + GLboolean shadow = FALSE; /* How many input regs are there? */ - switch (inst->tex_idx) { - case TEXTURE_1D_INDEX: + switch (inst->tex_target) { + case TGSI_TEXTURE_1D: emit = BRW_WRITEMASK_X; nr = 1; break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: + case TGSI_TEXTURE_SHADOW1D: + emit = BRW_WRITEMASK_XW; + nr = 4; + shadow = TRUE; + break; + case TGSI_TEXTURE_2D: emit = BRW_WRITEMASK_XY; nr = 2; break; - case TEXTURE_3D_INDEX: - case TEXTURE_CUBE_INDEX: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + emit = BRW_WRITEMASK_XYW; + nr = 4; + shadow = TRUE; + break; + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: emit = BRW_WRITEMASK_XYZ; nr = 3; break; @@ -813,11 +833,6 @@ static void emit_tex( struct brw_wm_compile *c, abort(); } - if (inst->tex_shadow) { - nr = 4; - emit |= BRW_WRITEMASK_W; - } - msgLength = 1; for (i = 0; i < nr; i++) { @@ -832,12 +847,12 @@ static void emit_tex( struct brw_wm_compile *c, responseLength = 8; /* always */ if (BRW_IS_IGDNG(p->brw)) { - if (inst->tex_shadow) + if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; } else { - if (inst->tex_shadow) + if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; @@ -870,20 +885,23 @@ static void emit_txb( struct brw_wm_compile *c, GLuint msg_type; /* Shadow ignored for txb. */ - switch (inst->tex_idx) { - case TEXTURE_1D_INDEX: + switch (inst->tex_target) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; - case TEXTURE_3D_INDEX: - case TEXTURE_CUBE_INDEX: + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), arg[2]); @@ -976,10 +994,10 @@ static void emit_kil( struct brw_wm_compile *c, } } -/* KIL_NV kills the pixels that are currently executing, not based on a test +/* KILLP kills the pixels that are currently executing, not based on a test * of the arguments. */ -static void emit_kil_nv( struct brw_wm_compile *c ) +static void emit_killp( struct brw_wm_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); @@ -1259,7 +1277,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) */ spill_values(c, c->payload.depth, 4); spill_values(c, c->creg, c->nr_creg); - spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX); + spill_values(c, c->payload.input_interp, PIPE_MAX_SHADER_INPUTS); for (insn = 0; insn < c->nr_insns; insn++) { @@ -1328,89 +1346,89 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Straightforward arithmetic: */ - case OPCODE_ADD: + case TGSI_OPCODE_ADD: emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); break; - case OPCODE_FRC: + case TGSI_OPCODE_FRC: emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); break; - case OPCODE_FLR: + case TGSI_OPCODE_FLR: emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; - case OPCODE_DDX: + case TGSI_OPCODE_DDX: emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]); break; - case OPCODE_DDY: + case TGSI_OPCODE_DDY: emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]); break; - case OPCODE_DP3: + case TGSI_OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_DP4: + case TGSI_OPCODE_DP4: emit_dp4(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_DPH: + case TGSI_OPCODE_DPH: emit_dph(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_TRUNC: + case TGSI_OPCODE_TRUNC: emit_trunc(p, dst, dst_flags, args[0]); break; - case OPCODE_LRP: + case TGSI_OPCODE_LRP: emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; - case OPCODE_MAD: + case TGSI_OPCODE_MAD: emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); break; - case OPCODE_MOV: + case TGSI_OPCODE_MOV: emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; - case OPCODE_MUL: + case TGSI_OPCODE_MUL: emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: emit_xpd(p, dst, dst_flags, args[0], args[1]); break; /* Higher math functions: */ - case OPCODE_RCP: + case TGSI_OPCODE_RCP: emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; - case OPCODE_RSQ: + case TGSI_OPCODE_RSQ: emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; - case OPCODE_SIN: + case TGSI_OPCODE_SIN: emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; - case OPCODE_COS: + case TGSI_OPCODE_COS: emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; - case OPCODE_EX2: + case TGSI_OPCODE_EX2: emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; - case OPCODE_LG2: + case TGSI_OPCODE_LG2: emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; - case OPCODE_SCS: + case TGSI_OPCODE_SCS: /* There is an scs math function, but it would need some * fixup for 16-element execution. */ @@ -1420,71 +1438,70 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]); break; - case OPCODE_POW: + case TGSI_OPCODE_POW: emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); break; /* Comparisons: */ - case OPCODE_CMP: + case TGSI_OPCODE_CMP: emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); break; - case OPCODE_MAX: + case TGSI_OPCODE_MAX: emit_max(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_MIN: + case TGSI_OPCODE_MIN: emit_min(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_SLT: + case TGSI_OPCODE_SLT: emit_slt(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_SLE: + case TGSI_OPCODE_SLE: emit_sle(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_SGT: + case TGSI_OPCODE_SGT: emit_sgt(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_SGE: + case TGSI_OPCODE_SGE: emit_sge(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_SEQ: + case TGSI_OPCODE_SEQ: emit_seq(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_SNE: + case TGSI_OPCODE_SNE: emit_sne(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_LIT: + case TGSI_OPCODE_LIT: emit_lit(p, dst, dst_flags, args[0]); break; /* Texturing operations: */ - case OPCODE_TEX: + case TGSI_OPCODE_TEX: emit_tex(c, inst, dst, dst_flags, args[0]); break; - case OPCODE_TXB: + case TGSI_OPCODE_TXB: emit_txb(c, inst, dst, dst_flags, args[0]); break; - case OPCODE_KIL: + case TGSI_OPCODE_KIL: emit_kil(c, args[0]); break; - case OPCODE_KIL_NV: - emit_kil_nv(c); + case TGSI_OPCODE_KILP: + emit_killp(c); break; default: debug_printf("Unsupported opcode %i (%s) in fragment shader\n", - inst->opcode, inst->opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->opcode) : - "unknown"); + inst->opcode, + tgsi_get_opcode_info(inst->opcode)->mnemonic); } for (i = 0; i < 4; i++) diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index d594730730..8ba037cdae 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -30,9 +30,8 @@ */ -#include "pipe/p_shader_constants.h" +#include "pipe/p_shader_tokens.h" -#include "brw_context.h" #include "brw_wm.h" #include "brw_util.h" @@ -43,7 +42,7 @@ #define W 3 -static const char *wm_opcode_strings[] = { +static const char *wm_opcode_strings[] = { "PIXELXY", "DELTAXY", "PIXELW", @@ -57,143 +56,6 @@ static const char *wm_opcode_strings[] = { -/*********************************************************************** - * Source regs - */ - -static struct prog_src_register src_reg(GLuint file, GLuint idx) -{ - struct prog_src_register reg; - reg.File = file; - reg.Index = idx; - reg.Swizzle = SWIZZLE_NOOP; - reg.RelAddr = 0; - reg.Negate = NEGATE_NONE; - reg.Abs = 0; - return reg; -} - -static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst) -{ - return src_reg(dst.File, dst.Index); -} - -static struct prog_src_register src_undef( void ) -{ - return src_reg(PROGRAM_UNDEFINED, 0); -} - -static GLboolean src_is_undef(struct prog_src_register src) -{ - return src.File == PROGRAM_UNDEFINED; -} - -static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w ) -{ - reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w); - return reg; -} - -static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x ) -{ - return src_swizzle(reg, x, x, x, x); -} - -static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle ) -{ - reg.Swizzle = swizzle; - return reg; -} - - -/*********************************************************************** - * Dest regs - */ - -static struct prog_dst_register dst_reg(GLuint file, GLuint idx) -{ - struct prog_dst_register reg; - reg.File = file; - reg.Index = idx; - reg.WriteMask = BRW_WRITEMASK_XYZW; - reg.RelAddr = 0; - reg.CondMask = COND_TR; - reg.CondSwizzle = 0; - reg.CondSrc = 0; - reg.pad = 0; - return reg; -} - -static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask ) -{ - reg.WriteMask &= mask; - return reg; -} - -static struct prog_dst_register dst_undef( void ) -{ - return dst_reg(PROGRAM_UNDEFINED, 0); -} - - - -static struct prog_dst_register get_temp( struct brw_wm_compile *c ) -{ - int bit = _mesa_ffs( ~c->fp_temp ); - - if (!bit) { - debug_printf("%s: out of temporaries\n", __FILE__); - exit(1); - } - - c->fp_temp |= 1<<(bit-1); - return dst_reg(PROGRAM_TEMPORARY, c->first_internal_temp+(bit-1)); -} - - -static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) -{ - c->fp_temp &= ~(1 << (temp.Index - c->first_internal_temp)); -} - - -/*********************************************************************** - * Instructions - */ - -static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) -{ - return &c->prog_instructions[c->nr_fp_insns++]; -} - -static struct prog_instruction *emit_insn(struct brw_wm_compile *c, - const struct prog_instruction *inst0) -{ - struct prog_instruction *inst = get_fp_inst(c); - *inst = *inst0; - return inst; -} - -static struct prog_instruction * emit_op(struct brw_wm_compile *c, - GLuint op, - struct prog_dst_register dest, - GLuint saturate, - struct prog_src_register src0, - struct prog_src_register src1, - struct prog_src_register src2 ) -{ - struct prog_instruction *inst = get_fp_inst(c); - - memset(inst, 0, sizeof(*inst)); - - inst->Opcode = op; - inst->DstReg = dest; - inst->SaturateMode = saturate; - inst->SrcReg[0] = src0; - inst->SrcReg[1] = src1; - inst->SrcReg[2] = src2; - return inst; -} /* Many opcodes produce the same value across all the result channels. @@ -202,32 +64,28 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, * anyway. We can easily get both by emitting the opcode to one channel, and * then MOVing it to the others, which brw_wm_pass*.c already understands. */ -static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, - const struct prog_instruction *inst0) -{ - struct prog_instruction *inst; - unsigned int dst_chan; - unsigned int other_channel_mask; - - if (inst0->DstReg.WriteMask == 0) - return NULL; - - dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1; - inst = get_fp_inst(c); - *inst = *inst0; - inst->DstReg.WriteMask = 1 << dst_chan; - - other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); - if (other_channel_mask != 0) { - inst = emit_op(c, - TGSI_OPCODE_MOV, - dst_mask(inst0->DstReg, other_channel_mask), - 0, - src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), - src_undef(), - src_undef()); +static void emit_scalar_insn(struct brw_wm_compile *c, + unsigned opcode, + struct brw_dst dst, + struct brw_src src0, + struct brw_src src1, + struct brw_src src2 ) +{ + unsigned first_chan = ffs(dst.writemask) - 1; + unsigned first_mask = 1 << first_chan; + + if (dst.writemask == 0) + return; + + emit_op( c, opcode, + brw_writemask(dst, first_mask), + src0, src1, src2 ); + + if (dst.writemask != first_mask) { + emit_op1(c, TGSI_OPCODE_MOV, + brw_writemask(dst, ~first_mask), + src_swizzle1(brw_src(dst), first_chan)); } - return inst; } @@ -235,11 +93,11 @@ static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, * Special instructions for interpolation and other tasks */ -static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) +static struct ureg_src get_pixel_xy( struct brw_wm_compile *c ) { if (src_is_undef(c->pixel_xy)) { - struct prog_dst_register pixel_xy = get_temp(c); - struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); + struct ureg_dst pixel_xy = get_temp(c); + struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); /* Emit the out calculations, and hold onto the results. Use @@ -250,7 +108,6 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) emit_op(c, WM_PIXELXY, dst_mask(pixel_xy, BRW_WRITEMASK_XY), - 0, payload_r0_depth, src_undef(), src_undef()); @@ -261,19 +118,18 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) return c->pixel_xy; } -static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) +static struct ureg_src get_delta_xy( struct brw_wm_compile *c ) { if (src_is_undef(c->delta_xy)) { - struct prog_dst_register delta_xy = get_temp(c); - struct prog_src_register pixel_xy = get_pixel_xy(c); - struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); + struct ureg_dst delta_xy = get_temp(c); + struct ureg_src pixel_xy = get_pixel_xy(c); + struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); /* deltas.xy = DELTAXY pixel_xy, payload[0] */ emit_op(c, WM_DELTAXY, dst_mask(delta_xy, BRW_WRITEMASK_XY), - 0, pixel_xy, payload_r0_depth, src_undef()); @@ -284,19 +140,18 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) return c->delta_xy; } -static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) +static struct ureg_src get_pixel_w( struct brw_wm_compile *c ) { if (src_is_undef(c->pixel_w)) { - struct prog_dst_register pixel_w = get_temp(c); - struct prog_src_register deltas = get_delta_xy(c); - struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); + struct ureg_dst pixel_w = get_temp(c); + struct ureg_src deltas = get_delta_xy(c); + struct ureg_src interp_wpos = src_reg(TGSI_FILE_PAYLOAD, FRAG_ATTRIB_WPOS); /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x */ emit_op(c, WM_PIXELW, dst_mask(pixel_w, BRW_WRITEMASK_W), - 0, interp_wpos, deltas, src_undef()); @@ -313,9 +168,9 @@ static void emit_interp( struct brw_wm_compile *c, GLuint semantic_index, GLuint interp_mode ) { - struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - struct prog_src_register deltas = get_delta_xy(c); + struct ureg_dst dst = dst_reg(TGSI_FILE_INPUT, idx); + struct ureg_src interp = src_reg(TGSI_FILE_PAYLOAD, idx); + struct ureg_src deltas = get_delta_xy(c); /* Need to use PINTERP on attributes which have been * multiplied by 1/W in the SF program, and LINTERP on those @@ -325,271 +180,197 @@ static void emit_interp( struct brw_wm_compile *c, case FRAG_ATTRIB_WPOS: /* Have to treat wpos.xy specially: */ - emit_op(c, + emit_op1(c, WM_WPOSXY, dst_mask(dst, BRW_WRITEMASK_XY), - 0, - get_pixel_xy(c), - src_undef(), - src_undef()); + get_pixel_xy(c)); - dst = dst_mask(dst, BRW_WRITEMASK_ZW); - - /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw + /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw */ - emit_op(c, - WM_LINTERP, - dst, - 0, - interp, - deltas, - src_undef()); + emit_op2(c, + WM_LINTERP, + dst_mask(dst, BRW_WRITEMASK_ZW), + interp, + deltas); break; case TGSI_SEMANTIC_COLOR: if (c->key.flat_shade) { - emit_op(c, + emit_op1(c, WM_CINTERP, dst, - 0, - interp, - src_undef(), - src_undef()); + interp); + } + else if (interp_mode == TGSI_INTERPOLATE_LINEAR) { + emit_op2(c, + WM_LINTERP, + dst, + interp, + deltas); } else { - emit_op(c, - translate_interp_mode(interp_mode), - dst, - 0, - interp, - deltas, - src_undef()); + emit_op3(c, + WM_PINTERP, + dst, + interp, + deltas, + get_pixel_w(c)); } + break; case FRAG_ATTRIB_FOGC: /* Interpolate the fog coordinate */ - emit_op(c, + emit_op3(c, WM_PINTERP, dst_mask(dst, BRW_WRITEMASK_X), - 0, interp, deltas, get_pixel_w(c)); - emit_op(c, + emit_op1(c, TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_YZW), - 0, - src_swizzle(interp, - SWIZZLE_ZERO, - SWIZZLE_ZERO, - SWIZZLE_ZERO, - SWIZZLE_ONE), - src_undef(), - src_undef()); + dst_mask(dst, BRW_WRITEMASK_YZ), + brw_imm1f(0.0)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + brw_imm1f(1.0)); break; case FRAG_ATTRIB_FACE: /* XXX review/test this case */ - emit_op(c, - WM_FRONTFACING, - dst_mask(dst, BRW_WRITEMASK_X), - 0, - src_undef(), - src_undef(), - src_undef()); + emit_op0(c, + WM_FRONTFACING, + dst_mask(dst, BRW_WRITEMASK_X)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_YZ), + brw_imm1f(0.0)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + brw_imm1f(1.0)); break; case FRAG_ATTRIB_PNTC: /* XXX review/test this case */ - emit_op(c, - WM_PINTERP, - dst_mask(dst, BRW_WRITEMASK_XY), - 0, - interp, - deltas, - get_pixel_w(c)); - - emit_op(c, + emit_op3(c, + WM_PINTERP, + dst_mask(dst, BRW_WRITEMASK_XY), + interp, + deltas, + get_pixel_w(c)); + + emit_op1(c, TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_ZW), - 0, - src_swizzle(interp, - SWIZZLE_ZERO, - SWIZZLE_ZERO, - SWIZZLE_ZERO, - SWIZZLE_ONE), - src_undef(), - src_undef()); - break; + dst_mask(dst, BRW_WRITEMASK_Z), + brw_imm1f(c->pass_fp, 0.0f)); - default: - emit_op(c, - translate_interp_mode(interp_mode), - dst, - 0, - interp, - deltas, - get_pixel_w(c)); + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + brw_imm1f(c->pass_fp, 1.0f)); break; - } -} - -/*********************************************************************** - * Hacks to extend the program parameter and constant lists. - */ - -/* Add the fog parameters to the parameter list of the original - * program, rather than creating a new list. Doesn't really do any - * harm and it's not as if the parameter handling isn't a big hack - * anyway. - */ -static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, - GLint s0, - GLint s1, - GLint s2, - GLint s3, - GLint s4) -{ - struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; - gl_state_index tokens[STATE_LENGTH]; - GLuint idx; - tokens[0] = s0; - tokens[1] = s1; - tokens[2] = s2; - tokens[3] = s3; - tokens[4] = s4; - - for (idx = 0; idx < paramList->NumParameters; idx++) { - if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR && - memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0) - return src_reg(PROGRAM_STATE_VAR, idx); - } - - idx = _mesa_add_state_reference( paramList, tokens ); - - return src_reg(PROGRAM_STATE_VAR, idx); -} + default: + switch (interp_mode) { + case TGSI_INTERPOLATE_CONSTANT: + emit_op1(c, + WM_CINTERP, + dst, + interp); + break; -static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, - GLfloat s0, - GLfloat s1, - GLfloat s2, - GLfloat s3) -{ - struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; - GLfloat values[4]; - GLuint idx; - GLuint swizzle; - - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; - - /* Have to search, otherwise multiple compilations will each grow - * the parameter list. - */ - for (idx = 0; idx < paramList->NumParameters; idx++) { - if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT && - memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0) + case TGSI_INTERPOLATE_LINEAR: + emit_op2(c, + WM_LINTERP, + dst, + interp, + deltas); + break; - /* XXX: this mimics the mesa bug which puts all constants and - * parameters into the "PROGRAM_STATE_VAR" category: - */ - return src_reg(PROGRAM_STATE_VAR, idx); + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_op3(c, + WM_PINTERP, + dst, + interp, + deltas, + get_pixel_w(c)); + break; + } + break; } - - idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); - assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */ - return src_reg(PROGRAM_STATE_VAR, idx); } - /*********************************************************************** * Expand various instructions here to simpler forms. */ static void precalc_dst( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + struct brw_dst dst, + struct brw_src src0, + struct brw_src src1 ) { - struct prog_src_register src0 = inst->SrcReg[0]; - struct prog_src_register src1 = inst->SrcReg[1]; - struct prog_dst_register dst = inst->DstReg; - if (dst.WriteMask & BRW_WRITEMASK_Y) { /* dst.y = mul src0.y, src1.y */ - emit_op(c, - TGSI_OPCODE_MUL, - dst_mask(dst, BRW_WRITEMASK_Y), - inst->SaturateMode, - src0, - src1, - src_undef()); + emit_op2(c, + TGSI_OPCODE_MUL, + dst_mask(dst, BRW_WRITEMASK_Y), + src0, + src1); } if (dst.WriteMask & BRW_WRITEMASK_XZ) { struct prog_instruction *swz; GLuint z = GET_SWZ(src0.Swizzle, Z); - /* dst.xz = swz src0.1zzz + /* dst.z = mov src0.zzzz + */ + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_Z), + src_swizzle1(src0, Z)); + + /* dst.x = immf(1.0) */ - swz = emit_op(c, - TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_XZ), - inst->SaturateMode, - src_swizzle(src0, SWIZZLE_ONE, z, z, z), - src_undef(), - src_undef()); - /* Avoid letting negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].Negate &= ~NEGATE_X; + emit_op1(c, + TGSI_OPCODE_MOV, + brw_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0), + src_immf(c, 1.0)); } if (dst.WriteMask & BRW_WRITEMASK_W) { /* dst.w = mov src1.w */ - emit_op(c, - TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_W), - inst->SaturateMode, - src1, - src_undef(), - src_undef()); + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + src1); } } static void precalc_lit( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + struct ureg_dst dst, + struct ureg_src src0 ) { - struct prog_src_register src0 = inst->SrcReg[0]; - struct prog_dst_register dst = inst->DstReg; - if (dst.WriteMask & BRW_WRITEMASK_XW) { - struct prog_instruction *swz; - - /* dst.xw = swz src0.1111 + /* dst.xw = imm(1.0f) */ - swz = emit_op(c, - TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_XW), - 0, - src_swizzle1(src0, SWIZZLE_ONE), - src_undef(), - src_undef()); - /* Avoid letting the negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].Negate = NEGATE_NONE; + emit_op1(c, + TGSI_OPCODE_MOV, + brw_saturate(brw_writemask(dst, BRW_WRITEMASK_XW), 0), + brw_imm1f(1.0f)); } if (dst.WriteMask & BRW_WRITEMASK_YZ) { - emit_op(c, - TGSI_OPCODE_LIT, - dst_mask(dst, BRW_WRITEMASK_YZ), - inst->SaturateMode, - src0, - src_undef(), - src_undef()); + emit_op1(c, + TGSI_OPCODE_LIT, + brw_writemask(dst, BRW_WRITEMASK_YZ), + src0); } } @@ -601,99 +382,62 @@ static void precalc_lit( struct brw_wm_compile *c, * instruction itself. */ static void precalc_tex( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + struct brw_dst dst, + unsigned unit, + struct brw_src src0 ) { - struct prog_src_register coord; - struct prog_dst_register tmpcoord; - const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + struct ureg_src coord = src_undef(); + struct ureg_dst tmp = dst_undef(); assert(unit < BRW_MAX_TEX_UNIT); + /* Cubemap: find longest component of coord vector and normalize + * it. + */ if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { - struct prog_instruction *out; - struct prog_dst_register tmp0 = get_temp(c); - struct prog_src_register tmp0src = src_reg_from_dst(tmp0); - struct prog_dst_register tmp1 = get_temp(c); - struct prog_src_register tmp1src = src_reg_from_dst(tmp1); - struct prog_src_register src0 = inst->SrcReg[0]; - - /* find longest component of coord vector and normalize it */ - tmpcoord = get_temp(c); - coord = src_reg_from_dst(tmpcoord); - - /* tmpcoord = src0 (i.e.: coord = src0) */ - out = emit_op(c, TGSI_OPCODE_MOV, - tmpcoord, - 0, - src0, - src_undef(), - src_undef()); - out->SrcReg[0].Negate = NEGATE_NONE; - out->SrcReg[0].Abs = 1; - - /* tmp0 = MAX(coord.X, coord.Y) */ - emit_op(c, TGSI_OPCODE_MAX, - tmp0, - 0, - src_swizzle1(coord, X), - src_swizzle1(coord, Y), - src_undef()); - - /* tmp1 = MAX(tmp0, coord.Z) */ - emit_op(c, TGSI_OPCODE_MAX, - tmp1, - 0, - tmp0src, - src_swizzle1(coord, Z), - src_undef()); - - /* tmp0 = 1 / tmp1 */ - emit_op(c, TGSI_OPCODE_RCP, - dst_mask(tmp0, BRW_WRITEMASK_X), - 0, - tmp1src, - src_undef(), - src_undef()); - - /* tmpCoord = src0 * tmp0 */ - emit_op(c, TGSI_OPCODE_MUL, - tmpcoord, - 0, - src0, - src_swizzle1(tmp0src, SWIZZLE_X), - src_undef()); - - release_temp(c, tmp0); - release_temp(c, tmp1); + struct ureg_src tmpsrc; + + tmp = get_temp(c); + tmpsrc = brw_src(tmpcoord) + + /* tmp = abs(src0) */ + emit_op1(c, + TGSI_OPCODE_MOV, + tmp, + brw_abs(src0)); + + /* tmp.X = MAX(tmp.X, tmp.Y) */ + emit_op2(c, TGSI_OPCODE_MAX, + brw_writemask(tmp, BRW_WRITEMASK_X), + src_swizzle1(tmpsrc, X), + src_swizzle1(tmpsrc, Y)); + + /* tmp.X = MAX(tmp.X, tmp.Z) */ + emit_op2(c, TGSI_OPCODE_MAX, + brw_writemask(tmp, BRW_WRITEMASK_X), + tmpsrc, + src_swizzle1(tmpsrc, Z)); + + /* tmp.X = 1 / tmp.X */ + emit_op1(c, TGSI_OPCODE_RCP, + dst_mask(tmp, BRW_WRITEMASK_X), + tmpsrc); + + /* tmp = src0 * tmp.xxxx */ + emit_op2(c, TGSI_OPCODE_MUL, + tmp, + src0, + src_swizzle1(tmpsrc, SWIZZLE_X)); + + coord = tmpsrc; } else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { - struct prog_src_register scale = - search_or_add_param5( c, - STATE_INTERNAL, - STATE_TEXRECT_SCALE, - unit, - 0,0 ); - - tmpcoord = get_temp(c); - - /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } + /* XXX: need a mechanism for internally generated constants. */ - emit_op(c, - TGSI_OPCODE_MUL, - tmpcoord, - 0, - inst->SrcReg[0], - src_swizzle(scale, - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_ONE, - SWIZZLE_ONE), - src_undef()); - - coord = src_reg_from_dst(tmpcoord); + coord = src0; } else { - coord = inst->SrcReg[0]; + coord = src0; } /* Need to emit YUV texture conversions by hand. Probably need to @@ -704,58 +448,36 @@ static void precalc_tex( struct brw_wm_compile *c, if (c->key.yuvtex_mask & (1 << unit)) { /* convert ycbcr to RGBA */ GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<DstReg; - struct prog_dst_register tmp = get_temp(c); - struct prog_src_register tmpsrc = src_reg_from_dst(tmp); - struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); - struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 ); + struct ureg_dst dst = inst->DstReg; + struct ureg_dst tmp = get_temp(c); + struct ureg_src tmpsrc = src_reg_from_dst(tmp); + struct ureg_src C0 = ureg_imm4f( c->ureg, -.5, -.0625, -.5, 1.164 ); + struct ureg_src C1 = ureg_imm4f( c->ureg, 1.596, -0.813, 2.018, -.391 ); /* tmp = TEX ... */ emit_tex_op(c, TGSI_OPCODE_TEX, - tmp, - inst->SaturateMode, + brw_saturate(tmp, dst.Saturate), unit, inst->TexSrcTarget, - inst->TexShadow, coord, src_undef(), src_undef()); /* tmp.xyz = ADD TMP, C0 */ - emit_op(c, - TGSI_OPCODE_ADD, - dst_mask(tmp, BRW_WRITEMASK_XYZ), - 0, - tmpsrc, - C0, - src_undef()); + emit_op2(c, TGSI_OPCODE_ADD, + dst_mask(tmp, BRW_WRITEMASK_XYZ), + tmpsrc, + C0); /* YUV.y = MUL YUV.y, C0.w */ - - emit_op(c, - TGSI_OPCODE_MUL, - dst_mask(tmp, BRW_WRITEMASK_Y), - 0, - tmpsrc, - src_swizzle1(C0, W), - src_undef()); + emit_op2(c, TGSI_OPCODE_MUL, + dst_mask(tmp, BRW_WRITEMASK_Y), + tmpsrc, + src_swizzle1(C0, W)); /* * if (UV swaped) @@ -764,23 +486,22 @@ static void precalc_tex( struct brw_wm_compile *c, * RGB.xyz = MAD YUV.xxz, C1, YUV.y */ - emit_op(c, - TGSI_OPCODE_MAD, - dst_mask(dst, BRW_WRITEMASK_XYZ), - 0, - swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), - C1, - src_swizzle1(tmpsrc, Y)); + emit_op3(c, TGSI_OPCODE_MAD, + dst_mask(dst, BRW_WRITEMASK_XYZ), + ( swap_uv ? + src_swizzle(tmpsrc, Z,Z,X,X) : + src_swizzle(tmpsrc, X,X,Z,Z)), + C1, + src_swizzle1(tmpsrc, Y)); /* RGB.y = MAD YUV.z, C1.w, RGB.y */ - emit_op(c, - TGSI_OPCODE_MAD, - dst_mask(dst, BRW_WRITEMASK_Y), - 0, - src_swizzle1(tmpsrc, Z), - src_swizzle1(C1, W), - src_swizzle1(src_reg_from_dst(dst), Y)); + emit_op3(c, + TGSI_OPCODE_MAD, + dst_mask(dst, BRW_WRITEMASK_Y), + src_swizzle1(tmpsrc, Z), + src_swizzle1(C1, W), + src_swizzle1(src_reg_from_dst(dst), Y)); release_temp(c, tmp); } @@ -789,29 +510,20 @@ static void precalc_tex( struct brw_wm_compile *c, emit_tex_op(c, TGSI_OPCODE_TEX, inst->DstReg, - inst->SaturateMode, unit, inst->TexSrcTarget, - inst->TexShadow, coord, src_undef(), src_undef()); } - /* For GL_EXT_texture_swizzle: */ - if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) { - /* swizzle the result of the TEX instruction */ - struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg); - emit_op(c, TGSI_OPCODE_MOV, - inst->DstReg, - SATURATE_OFF, /* saturate already done above */ - src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), - src_undef(), - src_undef()); - } + /* XXX: add GL_EXT_texture_swizzle support to gallium -- by + * generating shader varients in mesa state tracker. + */ - if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || - (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) + /* Release this temp if we ended up allocating it: + */ + if (!brw_dst_is_undef(tmpcoord)) release_temp(c, tmpcoord); } @@ -822,7 +534,7 @@ static void precalc_tex( struct brw_wm_compile *c, static GLboolean projtex( struct brw_wm_compile *c, const struct prog_instruction *inst ) { - const struct prog_src_register src = inst->SrcReg[0]; + const struct ureg_src src = inst->SrcReg[0]; GLboolean retVal; assert(inst->Opcode == TGSI_OPCODE_TXP); @@ -836,7 +548,7 @@ static GLboolean projtex( struct brw_wm_compile *c, */ if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) retVal = GL_FALSE; /* ut2004 gun rendering !?! */ - else if (src.File == PROGRAM_INPUT && + else if (src.File == TGSI_FILE_INPUT && GET_SWZ(src.Swizzle, W) == W && (c->key.proj_attrib_mask & (1 << src.Index)) == 0) retVal = GL_FALSE; @@ -853,10 +565,10 @@ static GLboolean projtex( struct brw_wm_compile *c, static void precalc_txp( struct brw_wm_compile *c, const struct prog_instruction *inst ) { - struct prog_src_register src0 = inst->SrcReg[0]; + struct ureg_src src0 = inst->SrcReg[0]; if (projtex(c, inst)) { - struct prog_dst_register tmp = get_temp(c); + struct ureg_dst tmp = get_temp(c); struct prog_instruction tmp_inst; /* tmp0.w = RCP inst.arg[0][3] @@ -864,7 +576,6 @@ static void precalc_txp( struct brw_wm_compile *c, emit_op(c, TGSI_OPCODE_RCP, dst_mask(tmp, BRW_WRITEMASK_W), - 0, src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), src_undef(), src_undef()); @@ -874,7 +585,6 @@ static void precalc_txp( struct brw_wm_compile *c, emit_op(c, TGSI_OPCODE_MUL, dst_mask(tmp, BRW_WRITEMASK_XYZ), - 0, src0, src_swizzle1(src_reg_from_dst(tmp), W), src_undef()); @@ -899,43 +609,30 @@ static void precalc_txp( struct brw_wm_compile *c, static void emit_fb_write( struct brw_wm_compile *c ) { - struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); - struct prog_src_register outcolor; + struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); + struct ureg_src outdepth = src_reg(TGSI_FILE_OUTPUT, FRAG_RESULT_DEPTH); + struct ureg_src outcolor; + struct prog_instruction *inst; GLuint i; - struct prog_instruction *inst, *last_inst; - struct brw_context *brw = c->func.brw; /* The inst->Aux field is used for FB write target and the EOT marker */ - if (brw->state.nr_color_regions > 1) { - for (i = 0 ; i < brw->state.nr_color_regions; i++) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); - last_inst = inst = emit_op(c, - WM_FB_WRITE, dst_mask(dst_undef(),0), 0, - outcolor, payload_r0_depth, outdepth); - inst->Aux = (i<<1); - if (c->fp_fragcolor_emitted) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = (i<<1); - } - } - last_inst->Aux |= 1; //eot - } - else { - /* if gl_FragData[0] is written, use it, else use gl_FragColor */ - if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); - else - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - - inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = 1|(0<<1); + for (i = 0 ; i < c->key.nr_cbufs; i++) { + outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i); + + inst = emit_op(c, WM_FB_WRITE, + dst_mask(dst_undef(), 0), + outcolor, + payload_r0_depth, + outdepth); + + inst->Aux = (i<<1); } + + /* Set EOT flag on last inst: + */ + inst->Aux |= 1; //eot } @@ -952,7 +649,7 @@ static void validate_src_regs( struct brw_wm_compile *c, GLuint i; for (i = 0; i < nr_args; i++) { - if (inst->SrcReg[i].File == PROGRAM_INPUT) { + if (inst->SrcReg[i].File == TGSI_FILE_INPUT) { GLuint idx = inst->SrcReg[i].Index; if (!(c->fp_interp_emitted & (1<DstReg.File == PROGRAM_OUTPUT) { + if (inst->DstReg.File == TGSI_FILE_OUTPUT) { GLuint idx = inst->DstReg.Index; if (idx == FRAG_RESULT_COLOR) - c->fp_fragcolor_emitted = 1; + c->fp_fragcolor_emitted |= inst->DstReg.WriteMask; } } -static void print_insns( const struct prog_instruction *insn, - GLuint nr ) + + +static void emit_insn( struct brw_wm_compile *c, + const struct tgsi_full_instruction *inst ) { - GLuint i; - for (i = 0; i < nr; i++, insn++) { - debug_printf("%3d: ", i); - if (insn->Opcode < MAX_OPCODE) - _mesa_print_instruction(insn); - else if (insn->Opcode < MAX_WM_OPCODE) { - GLuint idx = insn->Opcode - MAX_OPCODE; - - _mesa_print_alu_instruction(insn, - wm_opcode_strings[idx], - 3); - } - else - debug_printf("965 Opcode %d\n", insn->Opcode); + + switch (inst->Opcode) { + case TGSI_OPCODE_ABS: + emit_op1(c, TGSI_OPCODE_MOV, + dst, + brw_abs(src[0])); + break; + + case TGSI_OPCODE_SUB: + emit_op2(c, TGSI_OPCODE_ADD, + dst, + src[0], + brw_negate(src[1])); + break; + + case TGSI_OPCODE_SCS: + emit_op1(c, TGSI_OPCODE_SCS, + brw_writemask(dst, BRW_WRITEMASK_XY), + src[0]); + break; + + case TGSI_OPCODE_DST: + precalc_dst(c, inst); + break; + + case TGSI_OPCODE_LIT: + precalc_lit(c, inst); + break; + + case TGSI_OPCODE_TEX: + precalc_tex(c, inst); + break; + + case TGSI_OPCODE_TXP: + precalc_txp(c, inst); + break; + + case TGSI_OPCODE_TXB: + out = emit_insn(c, inst); + out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); + break; + + case TGSI_OPCODE_XPD: + emit_op2(c, TGSI_OPCODE_XPD, + brw_writemask(dst, BRW_WRITEMASK_XYZ), + src[0], + src[1]); + break; + + case TGSI_OPCODE_KIL: + emit_op1(c, TGSI_OPCODE_KIL, + brw_writemask(dst_undef(), 0), + src[0]); + break; + + case TGSI_OPCODE_END: + emit_fb_write(c); + break; + default: + if (brw_wm_is_scalar_result(inst->Opcode)) + emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]); + else + emit_op(c, opcode, dst, src[0], src[1], src[2]); + break; } } - /** * Initial pass for fragment program code generation. * This function is used by both the GLSL and non-GLSL paths. @@ -1004,108 +753,62 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) if (BRW_DEBUG & DEBUG_WM) { debug_printf("pre-fp:\n"); - _mesa_print_program(&fp->program.Base); - debug_printf("\n"); + tgsi_dump(fp->tokens, 0); } - c->pixel_xy = src_undef(); - c->delta_xy = src_undef(); - c->pixel_w = src_undef(); + c->pixel_xy = brw_src_undef(); + c->delta_xy = brw_src_undef(); + c->pixel_w = brw_src_undef(); c->nr_fp_insns = 0; c->fp->tex_units_used = 0x0; - /* Emit preamble instructions. This is where special instructions such as - * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to - * compute shader inputs from varying vars. - */ - for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { - const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; - validate_src_regs(c, inst); - validate_dst_regs(c, inst); - } /* Loop over all instructions doing assorted simplifications and * transformations. */ - for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { - const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; - struct prog_instruction *out; - - /* Check for INPUT values, emit INTERP instructions where - * necessary: - */ - - switch (inst->Opcode) { - case TGSI_OPCODE_ABS: - out = emit_insn(c, inst); - out->Opcode = TGSI_OPCODE_MOV; - out->SrcReg[0].Negate = NEGATE_NONE; - out->SrcReg[0].Abs = 1; - break; - - case TGSI_OPCODE_SUB: - out = emit_insn(c, inst); - out->Opcode = TGSI_OPCODE_ADD; - out->SrcReg[1].Negate ^= NEGATE_XYZW; - break; - - case TGSI_OPCODE_SCS: - out = emit_insn(c, inst); - /* This should probably be done in the parser. + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* If branching shader, emit preamble instructions at decl time, as + * instruction order in the shader does not correspond to the order + * instructions are executed in the wild. + * + * This is where special instructions such as WM_CINTERP, + * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to compute + * shader inputs from varying vars. + * + * XXX: For non-branching shaders, consider deferring variable + * initialization as late as possible to minimize register + * usage. This is how the original BRW driver worked. */ - out->DstReg.WriteMask &= BRW_WRITEMASK_XY; - break; - - case TGSI_OPCODE_DST: - precalc_dst(c, inst); - break; - - case TGSI_OPCODE_LIT: - precalc_lit(c, inst); - break; - - case TGSI_OPCODE_TEX: - precalc_tex(c, inst); - break; - - case TGSI_OPCODE_TXP: - precalc_txp(c, inst); - break; - - case TGSI_OPCODE_TXB: - out = emit_insn(c, inst); - out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); - break; - - case TGSI_OPCODE_XPD: - out = emit_insn(c, inst); - /* This should probably be done in the parser. + validate_src_regs(c, inst); + validate_dst_regs(c, inst); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* Unlike VS programs we can probably manage fine encoding + * immediate values directly into the emitted EU + * instructions, as we probably only need to reference one + * float value per instruction. Just save the data for now + * and use directly later. */ - out->DstReg.WriteMask &= BRW_WRITEMASK_XYZ; break; - case TGSI_OPCODE_KIL: - out = emit_insn(c, inst); - /* This should probably be done in the parser. - */ - out->DstReg.WriteMask = 0; - break; - case TGSI_OPCODE_END: - emit_fb_write(c); - break; - default: - if (brw_wm_is_scalar_result(inst->Opcode)) - emit_scalar_insn(c, inst); - else - emit_insn(c, inst); + case TGSI_TOKEN_TYPE_INSTRUCTION: + inst = &parse.FullToken.FullInstruction; + emit_insn( c, inst ); break; } } + c->brw_program = brw_finalize( c->builder ); + if (BRW_DEBUG & DEBUG_WM) { debug_printf("pass_fp:\n"); - print_insns( c->prog_instructions, c->nr_fp_insns ); + brw_print_program( c->brw_program ); debug_printf("\n"); } } diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index 59bc4ef701..cdc10484a6 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -332,7 +332,7 @@ static void prealloc_reg(struct brw_wm_compile *c) for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); } - if (c->key.vp_outputs_written & (1 << i)) { + if (c->key.nr_vp_outputs > i) { reg_index += 2; } } @@ -1670,7 +1670,7 @@ get_argument_regs(struct brw_wm_compile *c, } } -static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) +static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_compile *c) { #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 @@ -1943,20 +1943,20 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) * Do GPU code generation for shaders that use GLSL features such as * flow control. Other shaders will be compiled with the */ -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c) { if (BRW_DEBUG & DEBUG_WM) { - debug_printf("brw_wm_glsl_emit:\n"); + debug_printf("%s:\n", __FUNCTION__); } /* initial instruction translation/simplification */ brw_wm_pass_fp(c); /* actual code generation */ - brw_wm_emit_glsl(brw, c); + brw_wm_emit_branching_shader(brw, c); if (BRW_DEBUG & DEBUG_WM) { - brw_wm_print_program(c, "brw_wm_glsl_emit done"); + brw_wm_print_program(c, "brw_wm_branching_shader_emit done"); } c->prog_data.total_grf = num_grf_used(c); diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 71e4c56835..d8b9028927 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -168,54 +168,20 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, if (!ref) { switch (file) { - case PROGRAM_INPUT: - case PROGRAM_PAYLOAD: - case PROGRAM_TEMPORARY: - case PROGRAM_OUTPUT: - case PROGRAM_VARYING: + case TGSI_FILE_INPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_OUTPUT: + case BRW_FILE_PAYLOAD: + /* should already be done?? */ break; - case PROGRAM_LOCAL_PARAM: - ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]); - break; - - case PROGRAM_ENV_PARAM: + case TGSI_FILE_CONSTANT: ref = get_param_ref(c, &c->env_param[idx][component]); break; - case PROGRAM_STATE_VAR: - case PROGRAM_UNIFORM: - case PROGRAM_CONSTANT: - case PROGRAM_NAMED_PARAM: { - struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; - - /* There's something really hokey about parameters parsed in - * arb programs - they all end up in here, whether they be - * state values, parameters or constants. This duplicates the - * structure above & also seems to subvert the limits set for - * each type of constant/param. - */ - switch (plist->Parameters[idx].Type) { - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - /* These are invarient: - */ - ref = get_imm_ref(c, &plist->ParameterValues[idx][component]); - break; - - case PROGRAM_STATE_VAR: - case PROGRAM_UNIFORM: - /* These may change from run to run: - */ - ref = get_param_ref(c, &plist->ParameterValues[idx][component] ); - break; - - default: - assert(0); - break; - } + case TGSI_FILE_IMMEDIATE: + ref = get_imm_ref(c, &plist->ParameterValues[idx][component]); break; - } default: assert(0); @@ -310,17 +276,16 @@ translate_insn(struct brw_wm_compile *c, const struct prog_instruction *inst) { struct brw_wm_instruction *out = get_instruction(c); - GLuint writemask = inst->DstReg.WriteMask; + GLuint writemask = inst->dst.WriteMask; GLuint nr_args = brw_wm_nr_args(inst->Opcode); GLuint i, j; /* Copy some data out of the instruction */ out->opcode = inst->Opcode; - out->saturate = (inst->SaturateMode != SATURATE_OFF); + out->saturate = inst->dst.Saturate; out->tex_unit = inst->TexSrcUnit; - out->tex_idx = inst->TexSrcTarget; - out->tex_shadow = inst->TexShadow; + out->tex_target = inst->TexSrcTarget; out->eot = inst->Aux & 1; out->target = inst->Aux >> 1; @@ -328,7 +293,7 @@ translate_insn(struct brw_wm_compile *c, */ for (i = 0; i < nr_args; i++) { for (j = 0; j < 4; j++) { - out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out); + out->src[i][j] = get_new_ref(c, inst->src[i], j, out); } } @@ -380,15 +345,6 @@ static void pass0_init_payload( struct brw_wm_compile *c ) &c->payload.depth[j] ); } -#if 0 - /* This seems to be an alternative to the INTERP_WPOS stuff I do - * elsewhere: - */ - if (c->key.source_depth_reg) - pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2, - &c->payload.depth[c->key.source_depth_reg/2]); -#endif - for (i = 0; i < FRAG_ATTRIB_MAX; i++) pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0, &c->payload.input_interp[i] ); @@ -403,6 +359,9 @@ static void pass0_init_payload( struct brw_wm_compile *c ) * the same number. * * Translate away swizzling and eliminate non-saturating moves. + * + * Translate instructions from Mesa's prog_instruction structs to our + * internal brw_wm_instruction representation. */ void brw_wm_pass0( struct brw_wm_compile *c ) { @@ -421,7 +380,7 @@ void brw_wm_pass0( struct brw_wm_compile *c ) */ switch (inst->Opcode) { case OPCODE_MOV: - if (!inst->SaturateMode) { + if (!inst->dst.Saturate) { pass0_precalc_mov(c, inst); } else { diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index 85a3a55ca4..b0356b1bd5 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -90,17 +90,24 @@ static void track_arg(struct brw_wm_compile *c, static GLuint get_texcoord_mask( GLuint tex_idx ) { switch (tex_idx) { - case TEXTURE_1D_INDEX: + case TGSI_TEXTURE_1D: return BRW_WRITEMASK_X; - case TEXTURE_2D_INDEX: + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: return BRW_WRITEMASK_XY; - case TEXTURE_3D_INDEX: + case TGSI_TEXTURE_3D: return BRW_WRITEMASK_XYZ; - case TEXTURE_CUBE_INDEX: + case TGSI_TEXTURE_CUBE: return BRW_WRITEMASK_XYZ; - case TEXTURE_RECT_INDEX: - return BRW_WRITEMASK_XY; - default: return 0; + + case TGSI_TEXTURE_SHADOW1D: + return BRW_WRITEMASK_XZ; + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + return BRW_WRITEMASK_XYZ; + default: + assert(0); + return 0; } } @@ -217,14 +224,9 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: read0 = get_texcoord_mask(inst->tex_idx); - - if (inst->tex_shadow) - read0 |= BRW_WRITEMASK_Z; break; case TGSI_OPCODE_TXB: - /* Shadow ignored for txb. - */ read0 = get_texcoord_mask(inst->tex_idx) | BRW_WRITEMASK_W; break; diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index edabf6ceb6..1898f38cef 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -52,7 +52,7 @@ struct brw_wm_unit_key { unsigned int max_threads; unsigned int nr_surfaces, sampler_count; - GLboolean uses_depth, computes_depth, uses_kill, is_glsl; + GLboolean uses_depth, computes_depth, uses_kill, has_flow_control; GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable; GLfloat offset_units, offset_factor; }; @@ -114,10 +114,10 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; - key->is_glsl = bfp->isGLSL; + key->has_flow_control = bfp->has_flow_control; /* temporary sanity check assertion */ - ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); + ASSERT(bfp->has_flow_control == brw_wm_has_flow_control(fp)); /* _NEW_QUERY */ key->stats_wm = (brw->query.stats_wm != 0); @@ -184,7 +184,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.wm5.program_computes_depth = key->computes_depth; wm.wm5.program_uses_killpixel = key->uses_kill; - if (key->is_glsl) + if (key->has_flow_control) wm.wm5.enable_8_pix = 1; else wm.wm5.enable_16_pix = 1; -- cgit v1.2.3 From 5d61b6f1f64ca26dd038af0679873ef0353660dd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 31 Oct 2009 15:05:01 +0000 Subject: i965g: wip on fragment shaders --- src/gallium/drivers/i965/brw_wm.h | 63 ++- src/gallium/drivers/i965/brw_wm_fp.c | 871 ++++++++++++++++++++++++++--------- 2 files changed, 698 insertions(+), 236 deletions(-) (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 2cd5bb7081..8ee99420aa 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -74,6 +74,7 @@ struct brw_wm_prog_key { GLuint vp_nr_outputs:6; GLuint nr_cbufs:3; + GLuint has_flow_control:1; GLuint program_string_id; }; @@ -176,9 +177,36 @@ struct brw_wm_instruction { #define MAX_WM_OPCODE (MAX_OPCODE + 9) #define BRW_FILE_PAYLOAD (TGSI_FILE_COUNT) -#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) /* ?? */ +#define PAYLOAD_DEPTH (PIPE_MAX_SHADER_INPUTS) /* ?? */ + + +struct brw_fp_src { + unsigned file:4; + unsigned index:16; + unsigned swizzle:8; + unsigned indirect:1; + unsigned negate:1; + unsigned abs:1; +}; + +struct brw_fp_dst { + unsigned file:4; + unsigned index:16; + unsigned writemask:4; + unsigned indirect:1; + unsigned saturate:1; +}; + +struct brw_fp_instruction { + struct brw_fp_dst dst; + struct brw_fp_src src[3]; + unsigned opcode:8; + unsigned tex_unit:4; + unsigned tex_target:4; + unsigned target:10; /* destination surface for FB_WRITE */ + unsigned eot:1; /* mark last instruction (usually FB_WRITE) */ +}; -struct brw_passfp_program; struct brw_wm_compile { struct brw_compile func; @@ -198,9 +226,26 @@ struct brw_wm_compile { * simplifying and adding instructions for interpolation and * framebuffer writes. */ - struct brw_passfp_program *pass_fp; - - + struct { + GLfloat v[4]; + unsigned nr; + } immediate[BRW_WM_MAX_CONST+3]; + GLuint nr_immediates; + + struct brw_fp_instruction fp_instructions[BRW_WM_MAX_INSN]; + GLuint nr_fp_insns; + GLuint fp_temp; + GLuint fp_interp_emitted; + GLuint fp_fragcolor_emitted; + GLuint fp_first_internal_temp; + + struct brw_fp_src fp_pixel_xy; + struct brw_fp_src fp_delta_xy; + struct brw_fp_src fp_pixel_w; + + + /* Subsequent passes using SSA representation: + */ struct brw_wm_value vreg[BRW_WM_MAX_VREG]; GLuint nr_vreg; @@ -213,7 +258,7 @@ struct brw_wm_compile { } payload; - const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4]; + const struct brw_wm_ref *pass0_fp_reg[BRW_FILE_PAYLOAD+1][256][4]; struct brw_wm_ref undef_ref; struct brw_wm_value undef_value; @@ -241,7 +286,7 @@ struct brw_wm_compile { struct { GLboolean inited; struct brw_reg reg; - } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + } wm_regs[BRW_FILE_PAYLOAD+1][256][4]; GLboolean used_grf[BRW_WM_MAX_GRF]; GLuint first_free_grf; @@ -258,13 +303,15 @@ struct brw_wm_compile { GLint index; struct brw_reg reg; } current_const[3]; + + GLuint error; }; GLuint brw_wm_nr_args( GLuint opcode ); GLuint brw_wm_is_scalar_result( GLuint opcode ); -void brw_wm_pass_fp( struct brw_wm_compile *c ); +int brw_wm_pass_fp( struct brw_wm_compile *c ); void brw_wm_pass0( struct brw_wm_compile *c ); void brw_wm_pass1( struct brw_wm_compile *c ); void brw_wm_pass2( struct brw_wm_compile *c ); diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 8ba037cdae..57933afbbe 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -31,15 +31,26 @@ #include "pipe/p_shader_tokens.h" +#include "pipe/p_error.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_util.h" #include "brw_wm.h" #include "brw_util.h" +#include "brw_debug.h" #define X 0 #define Y 1 #define Z 2 #define W 3 +#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3) static const char *wm_opcode_strings[] = { @@ -54,7 +65,294 @@ static const char *wm_opcode_strings[] = { "FRONTFACING", }; +/*********************************************************************** + * Source regs + */ + +static struct brw_fp_src src_reg(GLuint file, GLuint idx) +{ + struct brw_fp_src reg; + reg.file = file; + reg.index = idx; + reg.swizzle = BRW_SWIZZLE_XYZW; + reg.indirect = 0; + reg.negate = 0; + reg.abs = 0; + return reg; +} + +static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst) +{ + return src_reg(dst.file, dst.index); +} + +static struct brw_fp_src src_undef( void ) +{ + return src_reg(TGSI_FILE_NULL, 0); +} + +static GLboolean src_is_undef(struct brw_fp_src src) +{ + return src.file == TGSI_FILE_NULL; +} + +static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w ) +{ + unsigned swz = reg.swizzle; + + reg.swizzle = ( GET_SWZ(swz, x) << 0 | + GET_SWZ(swz, y) << 2 | + GET_SWZ(swz, z) << 4 | + GET_SWZ(swz, w) << 6 ); + + return reg; +} + +static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x ) +{ + return src_swizzle(reg, x, x, x, x); +} + +static struct brw_fp_src src_abs( struct brw_fp_src src ) +{ + src.negate = 0; + src.abs = 1; + return src; +} + +static struct brw_fp_src src_negate( struct brw_fp_src src ) +{ + src.negate = 1; + src.abs = 0; + return src; +} + + +static int match_or_expand_immediate( const float *v, + unsigned nr, + float *v2, + unsigned *nr2, + unsigned *swizzle ) +{ + unsigned i, j; + + *swizzle = 0; + + for (i = 0; i < nr; i++) { + boolean found = FALSE; + + for (j = 0; j < *nr2 && !found; j++) { + if (v[i] == v2[j]) { + *swizzle |= j << (i * 2); + found = TRUE; + } + } + + if (!found) { + if (*nr2 >= 4) + return FALSE; + + v2[*nr2] = v[i]; + *swizzle |= *nr2 << (i * 2); + (*nr2)++; + } + } + + return TRUE; +} + + + +/* Internally generated immediates: overkill... + */ +static struct brw_fp_src src_imm( struct brw_wm_compile *c, + const GLfloat *v, + unsigned nr) +{ + unsigned i, j; + unsigned swizzle; + + /* Could do a first pass where we examine all existing immediates + * without expanding. + */ + + for (i = 0; i < c->nr_immediates; i++) { + if (match_or_expand_immediate( v, + nr, + c->immediate[i].v, + &c->immediate[i].nr, + &swizzle )) + goto out; + } + + if (c->nr_immediates < Elements(c->immediate)) { + i = c->nr_immediates++; + if (match_or_expand_immediate( v, + nr, + c->immediate[i].v, + &c->immediate[i].nr, + &swizzle )) + goto out; + } + + c->error = 1; + return src_undef(); + +out: + /* Make sure that all referenced elements are from this immediate. + * Has the effect of making size-one immediates into scalars. + */ + for (j = nr; j < 4; j++) + swizzle |= (swizzle & 0x3) << (j * 2); + + return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ), + GET_SWZ(swizzle, X), + GET_SWZ(swizzle, Y), + GET_SWZ(swizzle, Z), + GET_SWZ(swizzle, W) ); +} + + + +static struct brw_fp_src src_imm1f( struct brw_wm_compile *c, + GLfloat f ) +{ + return src_imm(c, &f, 1); +} + +static struct brw_fp_src src_imm4f( struct brw_wm_compile *c, + GLfloat x, + GLfloat y, + GLfloat z, + GLfloat w) +{ + GLfloat f[4] = {x,y,z,w}; + return src_imm(c, f, 4); +} + + + +/*********************************************************************** + * Dest regs + */ + +static struct brw_fp_dst dst_reg(GLuint file, GLuint idx) +{ + struct brw_fp_dst reg; + reg.file = file; + reg.index = idx; + reg.writemask = BRW_WRITEMASK_XYZW; + reg.indirect = 0; + return reg; +} + +static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask ) +{ + reg.writemask &= mask; + return reg; +} + +static struct brw_fp_dst dst_undef( void ) +{ + return dst_reg(TGSI_FILE_NULL, 0); +} + +static boolean dst_is_undef( struct brw_fp_dst dst ) +{ + return dst.file == TGSI_FILE_NULL; +} + +static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag ) +{ + reg.saturate = flag; + return reg; +} + +static struct brw_fp_dst get_temp( struct brw_wm_compile *c ) +{ + int bit = ffs( ~c->fp_temp ); + + if (!bit) { + debug_printf("%s: out of temporaries\n", __FILE__); + } + + c->fp_temp |= 1<<(bit-1); + return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1)); +} + + +static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp ) +{ + c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp)); +} + + +/*********************************************************************** + * Instructions + */ + +static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c) +{ + return &c->fp_instructions[c->nr_fp_insns++]; +} + +static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + GLuint tex_src_unit, + GLuint tex_src_target, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) +{ + struct brw_fp_instruction *inst = get_fp_inst(c); + + inst->opcode = op; + inst->dst = dest; + inst->tex_unit = tex_src_unit; + inst->tex_target = tex_src_target; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + return inst; +} + + +static INLINE void emit_op3(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) +{ + emit_tex_op(c, op, dest, 0, 0, src0, src1, src2); +} + + +static INLINE void emit_op2(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0, + struct brw_fp_src src1) +{ + emit_tex_op(c, op, dest, 0, 0, src0, src1, src_undef()); +} + +static INLINE void emit_op1(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0) +{ + emit_tex_op(c, op, dest, 0, 0, src0, src_undef(), src_undef()); +} + +static INLINE void emit_op0(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest) +{ + emit_tex_op(c, op, dest, 0, 0, src_undef(), src_undef(), src_undef()); +} @@ -66,10 +364,10 @@ static const char *wm_opcode_strings[] = { */ static void emit_scalar_insn(struct brw_wm_compile *c, unsigned opcode, - struct brw_dst dst, - struct brw_src src0, - struct brw_src src1, - struct brw_src src2 ) + struct brw_fp_dst dst, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) { unsigned first_chan = ffs(dst.writemask) - 1; unsigned first_mask = 1 << first_chan; @@ -77,14 +375,14 @@ static void emit_scalar_insn(struct brw_wm_compile *c, if (dst.writemask == 0) return; - emit_op( c, opcode, - brw_writemask(dst, first_mask), - src0, src1, src2 ); + emit_op3( c, opcode, + dst_mask(dst, first_mask), + src0, src1, src2 ); if (dst.writemask != first_mask) { emit_op1(c, TGSI_OPCODE_MOV, - brw_writemask(dst, ~first_mask), - src_swizzle1(brw_src(dst), first_chan)); + dst_mask(dst, ~first_mask), + src_scalar(src_reg_from_dst(dst), first_chan)); } } @@ -93,11 +391,11 @@ static void emit_scalar_insn(struct brw_wm_compile *c, * Special instructions for interpolation and other tasks */ -static struct ureg_src get_pixel_xy( struct brw_wm_compile *c ) +static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c ) { - if (src_is_undef(c->pixel_xy)) { - struct ureg_dst pixel_xy = get_temp(c); - struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); + if (src_is_undef(c->fp_pixel_xy)) { + struct brw_fp_dst pixel_xy = get_temp(c); + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); /* Emit the out calculations, and hold onto the results. Use @@ -105,79 +403,85 @@ static struct ureg_src get_pixel_xy( struct brw_wm_compile *c ) */ /* pixel_xy.xy = PIXELXY payload[0]; */ - emit_op(c, - WM_PIXELXY, - dst_mask(pixel_xy, BRW_WRITEMASK_XY), - payload_r0_depth, - src_undef(), - src_undef()); + emit_op1(c, + WM_PIXELXY, + dst_mask(pixel_xy, BRW_WRITEMASK_XY), + payload_r0_depth); - c->pixel_xy = src_reg_from_dst(pixel_xy); + c->fp_pixel_xy = src_reg_from_dst(pixel_xy); } - return c->pixel_xy; + return c->fp_pixel_xy; } -static struct ureg_src get_delta_xy( struct brw_wm_compile *c ) +static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c ) { - if (src_is_undef(c->delta_xy)) { - struct ureg_dst delta_xy = get_temp(c); - struct ureg_src pixel_xy = get_pixel_xy(c); - struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); + if (src_is_undef(c->fp_delta_xy)) { + struct brw_fp_dst delta_xy = get_temp(c); + struct brw_fp_src pixel_xy = get_pixel_xy(c); + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); /* deltas.xy = DELTAXY pixel_xy, payload[0] */ - emit_op(c, + emit_op3(c, WM_DELTAXY, dst_mask(delta_xy, BRW_WRITEMASK_XY), pixel_xy, payload_r0_depth, src_undef()); - c->delta_xy = src_reg_from_dst(delta_xy); + c->fp_delta_xy = src_reg_from_dst(delta_xy); } - return c->delta_xy; + return c->fp_delta_xy; } -static struct ureg_src get_pixel_w( struct brw_wm_compile *c ) +static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c ) { - if (src_is_undef(c->pixel_w)) { - struct ureg_dst pixel_w = get_temp(c); - struct ureg_src deltas = get_delta_xy(c); - struct ureg_src interp_wpos = src_reg(TGSI_FILE_PAYLOAD, FRAG_ATTRIB_WPOS); + if (src_is_undef(c->fp_pixel_w)) { + struct brw_fp_dst pixel_w = get_temp(c); + struct brw_fp_src deltas = get_delta_xy(c); + + /* XXX: assuming position is always first -- valid? + */ + struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0); /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x */ - emit_op(c, - WM_PIXELW, - dst_mask(pixel_w, BRW_WRITEMASK_W), - interp_wpos, - deltas, - src_undef()); + emit_op3(c, + WM_PIXELW, + dst_mask(pixel_w, BRW_WRITEMASK_W), + interp_wpos, + deltas, + src_undef()); - c->pixel_w = src_reg_from_dst(pixel_w); + c->fp_pixel_w = src_reg_from_dst(pixel_w); } - return c->pixel_w; + return c->fp_pixel_w; } + +/*********************************************************************** + * Emit INTERP instructions ahead of first use of each attrib. + */ + static void emit_interp( struct brw_wm_compile *c, + GLuint idx, GLuint semantic, - GLuint semantic_index, GLuint interp_mode ) { - struct ureg_dst dst = dst_reg(TGSI_FILE_INPUT, idx); - struct ureg_src interp = src_reg(TGSI_FILE_PAYLOAD, idx); - struct ureg_src deltas = get_delta_xy(c); + struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx); + struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx); + struct brw_fp_src deltas = get_delta_xy(c); /* Need to use PINTERP on attributes which have been * multiplied by 1/W in the SF program, and LINTERP on those * which have not: */ switch (semantic) { - case FRAG_ATTRIB_WPOS: + case TGSI_SEMANTIC_POSITION: /* Have to treat wpos.xy specially: */ emit_op1(c, @@ -218,7 +522,8 @@ static void emit_interp( struct brw_wm_compile *c, } break; - case FRAG_ATTRIB_FOGC: + + case TGSI_SEMANTIC_FOG: /* Interpolate the fog coordinate */ emit_op3(c, WM_PINTERP, @@ -228,17 +533,17 @@ static void emit_interp( struct brw_wm_compile *c, get_pixel_w(c)); emit_op1(c, - TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_YZ), - brw_imm1f(0.0)); + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_YZ), + src_imm1f(c, 0.0)); emit_op1(c, - TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_W), - brw_imm1f(1.0)); + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + src_imm1f(c, 1.0)); break; - case FRAG_ATTRIB_FACE: + case TGSI_SEMANTIC_FACE: /* XXX review/test this case */ emit_op0(c, WM_FRONTFACING, @@ -247,15 +552,15 @@ static void emit_interp( struct brw_wm_compile *c, emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_YZ), - brw_imm1f(0.0)); + src_imm1f(c, 0.0)); emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_W), - brw_imm1f(1.0)); + src_imm1f(c, 1.0)); break; - case FRAG_ATTRIB_PNTC: + case TGSI_SEMANTIC_PSIZE: /* XXX review/test this case */ emit_op3(c, WM_PINTERP, @@ -267,12 +572,12 @@ static void emit_interp( struct brw_wm_compile *c, emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_Z), - brw_imm1f(c->pass_fp, 0.0f)); + src_imm1f(c, 0.0f)); emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_W), - brw_imm1f(c->pass_fp, 1.0f)); + src_imm1f(c, 1.0f)); break; default: @@ -310,11 +615,11 @@ static void emit_interp( struct brw_wm_compile *c, * Expand various instructions here to simpler forms. */ static void precalc_dst( struct brw_wm_compile *c, - struct brw_dst dst, - struct brw_src src0, - struct brw_src src1 ) + struct brw_fp_dst dst, + struct brw_fp_src src0, + struct brw_fp_src src1 ) { - if (dst.WriteMask & BRW_WRITEMASK_Y) { + if (dst.writemask & BRW_WRITEMASK_Y) { /* dst.y = mul src0.y, src1.y */ emit_op2(c, @@ -324,25 +629,22 @@ static void precalc_dst( struct brw_wm_compile *c, src1); } - if (dst.WriteMask & BRW_WRITEMASK_XZ) { - struct prog_instruction *swz; - GLuint z = GET_SWZ(src0.Swizzle, Z); - + if (dst.writemask & BRW_WRITEMASK_XZ) { /* dst.z = mov src0.zzzz */ emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_Z), - src_swizzle1(src0, Z)); + src_scalar(src0, Z)); - /* dst.x = immf(1.0) + /* dst.x = imm1f(1.0) */ emit_op1(c, TGSI_OPCODE_MOV, - brw_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0), - src_immf(c, 1.0)); + dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0), + src_imm1f(c, 1.0)); } - if (dst.WriteMask & BRW_WRITEMASK_W) { + if (dst.writemask & BRW_WRITEMASK_W) { /* dst.w = mov src1.w */ emit_op1(c, @@ -354,22 +656,22 @@ static void precalc_dst( struct brw_wm_compile *c, static void precalc_lit( struct brw_wm_compile *c, - struct ureg_dst dst, - struct ureg_src src0 ) + struct brw_fp_dst dst, + struct brw_fp_src src0 ) { - if (dst.WriteMask & BRW_WRITEMASK_XW) { + if (dst.writemask & BRW_WRITEMASK_XW) { /* dst.xw = imm(1.0f) */ emit_op1(c, TGSI_OPCODE_MOV, - brw_saturate(brw_writemask(dst, BRW_WRITEMASK_XW), 0), - brw_imm1f(1.0f)); + dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0), + src_imm1f(c, 1.0f)); } - if (dst.WriteMask & BRW_WRITEMASK_YZ) { + if (dst.writemask & BRW_WRITEMASK_YZ) { emit_op1(c, TGSI_OPCODE_LIT, - brw_writemask(dst, BRW_WRITEMASK_YZ), + dst_mask(dst, BRW_WRITEMASK_YZ), src0); } } @@ -382,41 +684,42 @@ static void precalc_lit( struct brw_wm_compile *c, * instruction itself. */ static void precalc_tex( struct brw_wm_compile *c, - struct brw_dst dst, + struct brw_fp_dst dst, + unsigned target, unsigned unit, - struct brw_src src0 ) + struct brw_fp_src src0 ) { - struct ureg_src coord = src_undef(); - struct ureg_dst tmp = dst_undef(); + struct brw_fp_src coord = src_undef(); + struct brw_fp_dst tmp = dst_undef(); assert(unit < BRW_MAX_TEX_UNIT); /* Cubemap: find longest component of coord vector and normalize * it. */ - if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { - struct ureg_src tmpsrc; + if (target == TGSI_TEXTURE_CUBE) { + struct brw_fp_src tmpsrc; tmp = get_temp(c); - tmpsrc = brw_src(tmpcoord) + tmpsrc = src_reg_from_dst(tmp); /* tmp = abs(src0) */ emit_op1(c, TGSI_OPCODE_MOV, tmp, - brw_abs(src0)); + src_abs(src0)); /* tmp.X = MAX(tmp.X, tmp.Y) */ emit_op2(c, TGSI_OPCODE_MAX, - brw_writemask(tmp, BRW_WRITEMASK_X), - src_swizzle1(tmpsrc, X), - src_swizzle1(tmpsrc, Y)); + dst_mask(tmp, BRW_WRITEMASK_X), + src_scalar(tmpsrc, X), + src_scalar(tmpsrc, Y)); /* tmp.X = MAX(tmp.X, tmp.Z) */ emit_op2(c, TGSI_OPCODE_MAX, - brw_writemask(tmp, BRW_WRITEMASK_X), + dst_mask(tmp, BRW_WRITEMASK_X), tmpsrc, - src_swizzle1(tmpsrc, Z)); + src_scalar(tmpsrc, Z)); /* tmp.X = 1 / tmp.X */ emit_op1(c, TGSI_OPCODE_RCP, @@ -427,11 +730,12 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op2(c, TGSI_OPCODE_MUL, tmp, src0, - src_swizzle1(tmpsrc, SWIZZLE_X)); + src_scalar(tmpsrc, X)); coord = tmpsrc; } - else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { + else if (target == TGSI_TEXTURE_RECT || + target == TGSI_TEXTURE_SHADOWRECT) { /* XXX: need a mechanism for internally generated constants. */ coord = src0; @@ -448,19 +752,18 @@ static void precalc_tex( struct brw_wm_compile *c, if (c->key.yuvtex_mask & (1 << unit)) { /* convert ycbcr to RGBA */ GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<DstReg; - struct ureg_dst tmp = get_temp(c); - struct ureg_src tmpsrc = src_reg_from_dst(tmp); - struct ureg_src C0 = ureg_imm4f( c->ureg, -.5, -.0625, -.5, 1.164 ); - struct ureg_src C1 = ureg_imm4f( c->ureg, 1.596, -0.813, 2.018, -.391 ); + struct brw_fp_dst tmp = get_temp(c); + struct brw_fp_src tmpsrc = src_reg_from_dst(tmp); + struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 ); + struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 ); /* tmp = TEX ... */ emit_tex_op(c, TGSI_OPCODE_TEX, - brw_saturate(tmp, dst.Saturate), + dst_saturate(tmp, dst.saturate), unit, - inst->TexSrcTarget, + target, coord, src_undef(), src_undef()); @@ -477,7 +780,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op2(c, TGSI_OPCODE_MUL, dst_mask(tmp, BRW_WRITEMASK_Y), tmpsrc, - src_swizzle1(C0, W)); + src_scalar(C0, W)); /* * if (UV swaped) @@ -492,16 +795,16 @@ static void precalc_tex( struct brw_wm_compile *c, src_swizzle(tmpsrc, Z,Z,X,X) : src_swizzle(tmpsrc, X,X,Z,Z)), C1, - src_swizzle1(tmpsrc, Y)); + src_scalar(tmpsrc, Y)); /* RGB.y = MAD YUV.z, C1.w, RGB.y */ emit_op3(c, TGSI_OPCODE_MAD, dst_mask(dst, BRW_WRITEMASK_Y), - src_swizzle1(tmpsrc, Z), - src_swizzle1(C1, W), - src_swizzle1(src_reg_from_dst(dst), Y)); + src_scalar(tmpsrc, Z), + src_scalar(C1, W), + src_scalar(src_reg_from_dst(dst), Y)); release_temp(c, tmp); } @@ -509,9 +812,9 @@ static void precalc_tex( struct brw_wm_compile *c, /* ordinary RGBA tex instruction */ emit_tex_op(c, TGSI_OPCODE_TEX, - inst->DstReg, + dst, unit, - inst->TexSrcTarget, + target, coord, src_undef(), src_undef()); @@ -523,8 +826,8 @@ static void precalc_tex( struct brw_wm_compile *c, /* Release this temp if we ended up allocating it: */ - if (!brw_dst_is_undef(tmpcoord)) - release_temp(c, tmpcoord); + if (!dst_is_undef(tmp)) + release_temp(c, tmp); } @@ -532,13 +835,9 @@ static void precalc_tex( struct brw_wm_compile *c, * Check if the given TXP instruction really needs the divide-by-W step. */ static GLboolean projtex( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + unsigned target, + struct brw_fp_src src ) { - const struct ureg_src src = inst->SrcReg[0]; - GLboolean retVal; - - assert(inst->Opcode == TGSI_OPCODE_TXP); - /* Only try to detect the simplest cases. Could detect (later) * cases where we are trying to emit code like RCP {1.0}, MUL x, * {1.0}, and so on. @@ -546,16 +845,15 @@ static GLboolean projtex( struct brw_wm_compile *c, * More complex cases than this typically only arise from * user-provided fragment programs anyway: */ - if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) - retVal = GL_FALSE; /* ut2004 gun rendering !?! */ - else if (src.File == TGSI_FILE_INPUT && - GET_SWZ(src.Swizzle, W) == W && - (c->key.proj_attrib_mask & (1 << src.Index)) == 0) - retVal = GL_FALSE; - else - retVal = GL_TRUE; - - return retVal; + if (target == TGSI_TEXTURE_CUBE) + return GL_FALSE; /* ut2004 gun rendering !?! */ + + if (src.file == TGSI_FILE_INPUT && + GET_SWZ(src.swizzle, W) == W && + (c->key.proj_attrib_mask & (1 << src.index)) == 0) + return GL_FALSE; + + return GL_TRUE; } @@ -563,110 +861,168 @@ static GLboolean projtex( struct brw_wm_compile *c, * Emit code for TXP. */ static void precalc_txp( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + struct brw_fp_dst dst, + unsigned target, + unsigned unit, + struct brw_fp_src src0 ) { - struct ureg_src src0 = inst->SrcReg[0]; - - if (projtex(c, inst)) { - struct ureg_dst tmp = get_temp(c); - struct prog_instruction tmp_inst; + if (projtex(c, target, src0)) { + struct brw_fp_dst tmp = get_temp(c); /* tmp0.w = RCP inst.arg[0][3] */ - emit_op(c, + emit_op1(c, TGSI_OPCODE_RCP, dst_mask(tmp, BRW_WRITEMASK_W), - src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), - src_undef(), - src_undef()); + src_scalar(src0, W)); /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww */ - emit_op(c, - TGSI_OPCODE_MUL, - dst_mask(tmp, BRW_WRITEMASK_XYZ), - src0, - src_swizzle1(src_reg_from_dst(tmp), W), - src_undef()); + emit_op2(c, + TGSI_OPCODE_MUL, + dst_mask(tmp, BRW_WRITEMASK_XYZ), + src0, + src_scalar(src_reg_from_dst(tmp), W)); - /* dst = precalc(TEX tmp0) + /* dst = TEX tmp0 */ - tmp_inst = *inst; - tmp_inst.SrcReg[0] = src_reg_from_dst(tmp); - precalc_tex(c, &tmp_inst); + precalc_tex(c, + dst, + target, + unit, + src_reg_from_dst(tmp)); release_temp(c, tmp); } else { - /* dst = precalc(TEX src0) + /* dst = TEX src0 */ - precalc_tex(c, inst); + precalc_tex(c, dst, target, unit, src0); } } +/* XXX: note this returns a src_reg. + */ +static struct brw_fp_src +find_output_by_semantic( struct brw_wm_compile *c, + unsigned semantic, + unsigned index ) +{ + const struct tgsi_shader_info *info = &c->fp->info; + unsigned i; + + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == semantic && + info->output_semantic_index[i] == index) + return src_reg( TGSI_FILE_OUTPUT, i ); + + /* If not found, return some arbitrary immediate value: + */ + return src_imm1f(c, 1.0); +} + static void emit_fb_write( struct brw_wm_compile *c ) { - struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); - struct ureg_src outdepth = src_reg(TGSI_FILE_OUTPUT, FRAG_RESULT_DEPTH); - struct ureg_src outcolor; - struct prog_instruction *inst; + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); + struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0); GLuint i; - /* The inst->Aux field is used for FB write target and the EOT marker */ + outdepth = src_scalar(outdepth, Z); for (i = 0 ; i < c->key.nr_cbufs; i++) { - outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i); + struct brw_fp_src outcolor; + unsigned target = 1<key.nr_cbufs - 1) + target |= 1; + + outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i); - inst->Aux = (i<<1); + /* Use emit_tex_op so that we can specify the inst->tex_target + * field, which is abused to contain the FB write target and the + * EOT marker + */ + emit_tex_op(c, WM_FB_WRITE, + dst_undef(), + target, + 0, + outcolor, + payload_r0_depth, + outdepth); } - - /* Set EOT flag on last inst: - */ - inst->Aux |= 1; //eot } +static struct brw_fp_dst translate_dst( struct brw_wm_compile *c, + const struct tgsi_full_dst_register *dst, + unsigned saturate ) +{ + struct brw_fp_dst out; + + out.file = dst->DstRegister.File; + out.index = dst->DstRegister.Index; + out.writemask = dst->DstRegister.WriteMask; + out.indirect = dst->DstRegister.Indirect; + out.saturate = (saturate == TGSI_SAT_ZERO_ONE); + + if (out.indirect) { + assert(dst->DstRegisterInd.File == TGSI_FILE_ADDRESS); + assert(dst->DstRegisterInd.Index == 0); + } + + return out; +} -/*********************************************************************** - * Emit INTERP instructions ahead of first use of each attrib. - */ - -static void validate_src_regs( struct brw_wm_compile *c, - const struct prog_instruction *inst ) +static struct brw_fp_src translate_src( struct brw_wm_compile *c, + const struct tgsi_full_src_register *src ) { - GLuint nr_args = brw_wm_nr_args( inst->Opcode ); - GLuint i; + struct brw_fp_src out; + + out.file = src->SrcRegister.File; + out.index = src->SrcRegister.Index; + out.indirect = src->SrcRegister.Indirect; + + out.swizzle = ((src->SrcRegister.SwizzleX << 0) | + (src->SrcRegister.SwizzleY << 2) | + (src->SrcRegister.SwizzleZ << 4) | + (src->SrcRegister.SwizzleW << 6)); + + switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) { + case TGSI_UTIL_SIGN_CLEAR: + out.abs = 1; + out.negate = 0; + break; - for (i = 0; i < nr_args; i++) { - if (inst->SrcReg[i].File == TGSI_FILE_INPUT) { - GLuint idx = inst->SrcReg[i].Index; - if (!(c->fp_interp_emitted & (1<fp_interp_emitted |= 1<DstReg.File == TGSI_FILE_OUTPUT) { - GLuint idx = inst->DstReg.Index; - if (idx == FRAG_RESULT_COLOR) - c->fp_fragcolor_emitted |= inst->DstReg.WriteMask; + + if (out.indirect) { + assert(src->SrcRegisterInd.File == TGSI_FILE_ADDRESS); + assert(src->SrcRegisterInd.Index == 0); } + + return out; } @@ -674,59 +1030,78 @@ static void validate_dst_regs( struct brw_wm_compile *c, static void emit_insn( struct brw_wm_compile *c, const struct tgsi_full_instruction *inst ) { - - switch (inst->Opcode) { + unsigned opcode = inst->Instruction.Opcode; + struct brw_fp_dst dst; + struct brw_fp_src src[3]; + int i; + + dst = translate_dst( c, &inst->FullDstRegisters[0], + inst->Instruction.Saturate ); + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) + src[i] = translate_src( c, &inst->FullSrcRegisters[0] ); + + switch (opcode) { case TGSI_OPCODE_ABS: emit_op1(c, TGSI_OPCODE_MOV, dst, - brw_abs(src[0])); + src_abs(src[0])); break; case TGSI_OPCODE_SUB: emit_op2(c, TGSI_OPCODE_ADD, dst, src[0], - brw_negate(src[1])); + src_negate(src[1])); break; case TGSI_OPCODE_SCS: emit_op1(c, TGSI_OPCODE_SCS, - brw_writemask(dst, BRW_WRITEMASK_XY), + dst_mask(dst, BRW_WRITEMASK_XY), src[0]); break; case TGSI_OPCODE_DST: - precalc_dst(c, inst); + precalc_dst(c, dst, src[0], src[1]); break; case TGSI_OPCODE_LIT: - precalc_lit(c, inst); + precalc_lit(c, dst, src[0]); break; case TGSI_OPCODE_TEX: - precalc_tex(c, inst); + precalc_tex(c, dst, + inst->InstructionExtTexture.Texture, + src[0].file, /* sampler unit */ + src[1] ); break; case TGSI_OPCODE_TXP: - precalc_txp(c, inst); + precalc_txp(c, dst, + inst->InstructionExtTexture.Texture, + src[0].file, /* sampler unit */ + src[1] ); break; case TGSI_OPCODE_TXB: - out = emit_insn(c, inst); - out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); + /* XXX: TXB not done + */ + precalc_tex(c, dst, + inst->InstructionExtTexture.Texture, + src[0].file, /* sampler unit */ + src[1] ); break; case TGSI_OPCODE_XPD: emit_op2(c, TGSI_OPCODE_XPD, - brw_writemask(dst, BRW_WRITEMASK_XYZ), + dst_mask(dst, BRW_WRITEMASK_XYZ), src[0], src[1]); break; case TGSI_OPCODE_KIL: emit_op1(c, TGSI_OPCODE_KIL, - brw_writemask(dst_undef(), 0), + dst_mask(dst_undef(), 0), src[0]); break; @@ -734,10 +1109,11 @@ static void emit_insn( struct brw_wm_compile *c, emit_fb_write(c); break; default: - if (brw_wm_is_scalar_result(inst->Opcode)) + if (!c->key.has_flow_control && + brw_wm_is_scalar_result(opcode)) emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]); else - emit_op(c, opcode, dst, src[0], src[1], src[2]); + emit_op3(c, opcode, dst, src[0], src[1], src[2]); break; } } @@ -746,46 +1122,70 @@ static void emit_insn( struct brw_wm_compile *c, * Initial pass for fragment program code generation. * This function is used by both the GLSL and non-GLSL paths. */ -void brw_wm_pass_fp( struct brw_wm_compile *c ) +int brw_wm_pass_fp( struct brw_wm_compile *c ) { - struct brw_fragment_program *fp = c->fp; - GLuint insn; + struct brw_fragment_shader *fs = c->fp; + struct tgsi_parse_context parse; + struct tgsi_full_instruction *inst; + struct tgsi_full_declaration *decl; + const float *imm; + GLuint size; + GLuint i; if (BRW_DEBUG & DEBUG_WM) { debug_printf("pre-fp:\n"); - tgsi_dump(fp->tokens, 0); + tgsi_dump(fs->tokens, 0); } - c->pixel_xy = brw_src_undef(); - c->delta_xy = brw_src_undef(); - c->pixel_w = brw_src_undef(); + c->fp_pixel_xy = src_undef(); + c->fp_delta_xy = src_undef(); + c->fp_pixel_w = src_undef(); c->nr_fp_insns = 0; - c->fp->tex_units_used = 0x0; + c->nr_immediates = 0; /* Loop over all instructions doing assorted simplifications and * transformations. */ - tgsi_parse_init( &parse, tokens ); + tgsi_parse_init( &parse, fs->tokens ); while( !tgsi_parse_end_of_tokens( &parse ) ) { tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: - /* If branching shader, emit preamble instructions at decl time, as - * instruction order in the shader does not correspond to the order - * instructions are executed in the wild. - * - * This is where special instructions such as WM_CINTERP, - * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to compute - * shader inputs from varying vars. + /* Turn intput declarations into special WM_* instructions. * * XXX: For non-branching shaders, consider deferring variable * initialization as late as possible to minimize register * usage. This is how the original BRW driver worked. + * + * In a branching shader, must preamble instructions at decl + * time, as instruction order in the shader does not + * correspond to the order instructions are executed in the + * wild. + * + * This is where special instructions such as WM_CINTERP, + * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to + * compute shader inputs from the payload registers and pixel + * position. */ - validate_src_regs(c, inst); - validate_dst_regs(c, inst); + decl = &parse.FullToken.FullDeclaration; + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + unsigned attrib; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + for (attrib = first; attrib <= last; attrib++) { + emit_interp(c, + attrib, + decl->Semantic.SemanticName, + decl->Declaration.Interpolate ); + } + } + break; case TGSI_TOKEN_TYPE_IMMEDIATE: @@ -795,21 +1195,36 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) * float value per instruction. Just save the data for now * and use directly later. */ + i = c->nr_immediates++; + imm = &parse.FullToken.FullImmediate.u[i].Float; + size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + + if (c->nr_immediates >= BRW_WM_MAX_CONST) + return PIPE_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < size; i++) + c->immediate[c->nr_immediates].v[i] = imm[i]; + + for (; i < 4; i++) + c->immediate[c->nr_immediates].v[i] = 0.0; + + c->immediate[c->nr_immediates].nr = size; + c->nr_immediates++; break; case TGSI_TOKEN_TYPE_INSTRUCTION: inst = &parse.FullToken.FullInstruction; - emit_insn( c, inst ); + emit_insn(c, inst); break; } } - c->brw_program = brw_finalize( c->builder ); - if (BRW_DEBUG & DEBUG_WM) { debug_printf("pass_fp:\n"); - brw_print_program( c->brw_program ); + //brw_print_program( c->fp_brw_program ); debug_printf("\n"); } + + return c->error; } -- cgit v1.2.3 From f202a34cb1eca41cf5d12bd72016f284bc81ccf8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 31 Oct 2009 18:23:14 +0000 Subject: i965g: non-glsl fragment shader path is compiling Disabled glsl code for now, probably want to clean this up somehow. --- src/gallium/drivers/i965/Makefile | 1 - src/gallium/drivers/i965/brw_wm.c | 14 +- src/gallium/drivers/i965/brw_wm.h | 10 +- src/gallium/drivers/i965/brw_wm_fp.c | 7 +- src/gallium/drivers/i965/brw_wm_glsl.c | 268 ++++++++++++++++++++------------ src/gallium/drivers/i965/brw_wm_pass0.c | 87 +++++------ src/gallium/drivers/i965/brw_wm_pass1.c | 8 +- src/gallium/drivers/i965/brw_wm_pass2.c | 27 +--- 8 files changed, 230 insertions(+), 192 deletions(-) (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index c3dbad72ae..896cb234a6 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -47,7 +47,6 @@ C_SOURCES = \ brw_wm_debug.c \ brw_wm_emit.c \ brw_wm_fp.c \ - brw_wm_glsl.c \ brw_wm_iz.c \ brw_wm_pass0.c \ brw_wm_pass1.c \ diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 33602b59c1..4fbf9de9bb 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -177,7 +177,10 @@ static int do_wm_prog( struct brw_context *brw, */ if (fp->has_flow_control) { c->dispatch_width = 8; - brw_wm_branching_shader_emit(brw, c); + /* XXX: GLSL support + */ + exit(1); + //brw_wm_branching_shader_emit(brw, c); } else { c->dispatch_width = 16; @@ -239,18 +242,9 @@ static void brw_wm_populate_key( struct brw_context *brw, brw->curr.fragment_shader->uses_depth, key); - /* Revisit this, figure out if it's really useful, and either push - * it into the state tracker so that everyone benefits (use to - * create fs varients with TEX rather than TXP), or discard. - */ - key->proj_attrib_mask = ~0; /*brw->wm.input_size_masks[4-1];*/ - /* PIPE_NEW_RAST */ key->flat_shade = brw->curr.rast->templ.flatshade; - /* This can be determined by looking at the INTERP mode each input decl. - */ - key->linear_attrib_mask = 0; /* PIPE_NEW_BOUND_TEXTURES */ for (i = 0; i < brw->curr.num_textures; i++) { diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 8ee99420aa..48dac39756 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -56,9 +56,6 @@ #define AA_ALWAYS 2 struct brw_wm_prog_key { - unsigned proj_attrib_mask; /**< one bit per fragment program attribute */ - unsigned linear_attrib_mask; /**< linear interpolation vs perspective interp */ - GLuint source_depth_reg:3; GLuint aa_dest_stencil_reg:3; GLuint dest_depth_reg:3; @@ -73,6 +70,7 @@ struct brw_wm_prog_key { GLuint yuvtex_swap_mask:16; /* UV swaped */ GLuint vp_nr_outputs:6; + GLuint nr_inputs:6; GLuint nr_cbufs:3; GLuint has_flow_control:1; @@ -179,6 +177,12 @@ struct brw_wm_instruction { #define BRW_FILE_PAYLOAD (TGSI_FILE_COUNT) #define PAYLOAD_DEPTH (PIPE_MAX_SHADER_INPUTS) /* ?? */ +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 +#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3) + struct brw_fp_src { unsigned file:4; diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 57933afbbe..58f1d35b7d 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -46,11 +46,6 @@ #include "brw_debug.h" -#define X 0 -#define Y 1 -#define Z 2 -#define W 3 -#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3) static const char *wm_opcode_strings[] = { @@ -850,7 +845,7 @@ static GLboolean projtex( struct brw_wm_compile *c, if (src.file == TGSI_FILE_INPUT && GET_SWZ(src.swizzle, W) == W && - (c->key.proj_attrib_mask & (1 << src.index)) == 0) + c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE) return GL_FALSE; return GL_TRUE; diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index cdc10484a6..a06b0a446e 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -1,10 +1,13 @@ +#include "util/u_math.h" + + #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, GLuint component); @@ -63,7 +66,7 @@ alloc_grf(struct brw_wm_compile *c) /* really, no free GRF regs found */ if (!c->out_of_regs) { /* print warning once per compilation */ - _mesa_warning(NULL, "i965: ran out of registers for fragment program"); + debug_printf("%s: ran out of registers for fragment program", __FUNCTION__); c->out_of_regs = GL_TRUE; } @@ -154,20 +157,18 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, { struct brw_reg reg; switch (file) { - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - case PROGRAM_UNIFORM: - file = PROGRAM_STATE_VAR; - break; - case PROGRAM_UNDEFINED: + case TGSI_FILE_NULL: return brw_null_reg(); - case PROGRAM_TEMPORARY: - case PROGRAM_INPUT: - case PROGRAM_OUTPUT: - case PROGRAM_PAYLOAD: + + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + case BRW_FILE_PAYLOAD: break; + default: - debug_printf("Unexpected file in get_reg()"); + debug_printf("%s: Unexpected file type\n", __FUNCTION__); return brw_null_reg(); } @@ -204,6 +205,76 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, + +/** + * Find first/last instruction that references each temporary register. + */ +GLboolean +_mesa_find_temp_intervals(const struct prog_instruction *instructions, + GLuint numInstructions, + GLint intBegin[MAX_PROGRAM_TEMPS], + GLint intEnd[MAX_PROGRAM_TEMPS]) +{ + struct loop_info + { + GLuint Start, End; /**< Start, end instructions of loop */ + }; + struct loop_info loopStack[MAX_LOOP_NESTING]; + GLuint loopStackDepth = 0; + GLuint i; + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + intBegin[i] = intEnd[i] = -1; + } + + /* Scan instructions looking for temporary registers */ + for (i = 0; i < numInstructions; i++) { + const struct prog_instruction *inst = instructions + i; + if (inst->Opcode == OPCODE_BGNLOOP) { + loopStack[loopStackDepth].Start = i; + loopStack[loopStackDepth].End = inst->BranchTarget; + loopStackDepth++; + } + else if (inst->Opcode == OPCODE_ENDLOOP) { + loopStackDepth--; + } + else if (inst->Opcode == OPCODE_CAL) { + return GL_FALSE; + } + else { + const GLuint numSrc = 3; + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + const GLuint index = inst->SrcReg[j].Index; + if (inst->SrcReg[j].RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + const GLuint index = inst->DstReg.Index; + if (inst->DstReg.RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + } + + return GL_TRUE; +} + + /** * This is called if we run out of GRF registers. Examine the live intervals * of temp regs in the program and free those which won't be used again. @@ -211,29 +282,29 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, static void reclaim_temps(struct brw_wm_compile *c) { - GLint intBegin[MAX_PROGRAM_TEMPS]; - GLint intEnd[MAX_PROGRAM_TEMPS]; + GLint intBegin[BRW_WM_MAX_TEMPS]; + GLint intEnd[BRW_WM_MAX_TEMPS]; int index; /*printf("Reclaim temps:\n");*/ - _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, + _mesa_find_temp_intervals(c->fp_instructions, c->nr_fp_insns, intBegin, intEnd); - for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { + for (index = 0; index < BRW_WM_MAX_TEMPS; index++) { if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { /* program temp[i] can be freed */ int component; /*printf(" temp[%d] is dead\n", index);*/ for (component = 0; component < 4; component++) { - if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { - int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; + if (c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited) { + int r = c->wm_regs[TGSI_FILE_TEMPORARY][index][component].reg.nr; release_grf(c, r); /* printf(" Reclaim temp %d, reg %d at inst %d\n", index, r, c->cur_inst); */ - c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; + c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited = GL_FALSE; } } } @@ -264,7 +335,7 @@ static void prealloc_reg(struct brw_wm_compile *c) reg = brw_vec8_grf(i * 2, 0); else reg = brw_vec8_grf(0, 0); - set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); + set_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, i, reg); } reg_index += 2 * c->key.nr_depth_regs; @@ -306,7 +377,7 @@ static void prealloc_reg(struct brw_wm_compile *c) * Constants will be copied in prepare_constant_buffer() */ c->prog_data.param[index] = &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + set_reg(c, TGSI_FILE_STATE_VAR, i, j, reg); } } /* number of constant regs used (each reg is float[8]) */ @@ -330,7 +401,7 @@ static void prealloc_reg(struct brw_wm_compile *c) urb_read_length = reg_index; reg = brw_vec8_grf(reg_index, 0); for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); + set_reg(c, TGSI_FILE_PAYLOAD, fp_input, j, reg); } if (c->key.nr_vp_outputs > i) { reg_index += 2; @@ -354,7 +425,7 @@ static void prealloc_reg(struct brw_wm_compile *c) prealloc_grf(c, 127); for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; + const struct brw_fp_instruction *inst = &c->fp_instructions[i]; struct brw_reg dst[4]; switch (inst->Opcode) { @@ -397,7 +468,7 @@ static void prealloc_reg(struct brw_wm_compile *c) * the three GRF slots. */ static void fetch_constants(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint i; @@ -405,9 +476,8 @@ static void fetch_constants(struct brw_wm_compile *c, /* loop over instruction src regs */ for (i = 0; i < 3; i++) { const struct prog_src_register *src = &inst->SrcReg[i]; - if (src->File == PROGRAM_STATE_VAR || - src->File == PROGRAM_CONSTANT || - src->File == PROGRAM_UNIFORM) { + if (src->File == TGSI_FILE_IMMEDIATE || + src->File == TGSI_FILE_CONSTANT) { c->current_const[i].index = src->Index; #if 0 @@ -431,7 +501,7 @@ static void fetch_constants(struct brw_wm_compile *c, * Convert Mesa dst register to brw register. */ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, GLuint component) { const int nr = 1; @@ -442,7 +512,7 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, static struct brw_reg get_src_reg_const(struct brw_wm_compile *c, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, GLuint srcRegIndex, GLuint component) { /* We should have already fetched the constant from the constant @@ -462,7 +532,7 @@ get_src_reg_const(struct brw_wm_compile *c, const_reg = stride(const_reg, 0, 1, 0); const_reg.subnr = component * 4; - if (src->Negate & (1 << component)) + if (src->Negate) const_reg = negate(const_reg); if (src->Abs) const_reg = brw_abs(const_reg); @@ -483,7 +553,7 @@ get_src_reg_const(struct brw_wm_compile *c, * Convert Mesa src register to brw register. */ static struct brw_reg get_src_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, GLuint srcRegIndex, GLuint channel) { const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; @@ -499,9 +569,9 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, } if (c->fp->use_const_buffer && - (src->File == PROGRAM_STATE_VAR || - src->File == PROGRAM_CONSTANT || - src->File == PROGRAM_UNIFORM)) { + (src->File == TGSI_FILE_STATE_VAR || + src->File == TGSI_FILE_CONSTANT || + src->File == TGSI_FILE_UNIFORM)) { return get_src_reg_const(c, inst, srcRegIndex, component); } else { @@ -513,26 +583,26 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, /** - * Same as \sa get_src_reg() but if the register is a literal, emit - * a brw_reg encoding the literal. - * Note that a brw instruction only allows one src operand to be a literal. + * Same as \sa get_src_reg() but if the register is a immediate, emit + * a brw_reg encoding the immediate. + * Note that a brw instruction only allows one src operand to be a immediate. * For instructions with more than one operand, only the second can be a - * literal. This means that we treat some literals as constants/uniforms - * (which why PROGRAM_CONSTANT is checked in fetch_constants()). + * immediate. This means that we treat some immediates as constants + * (which why TGSI_FILE_IMMEDIATE is checked in fetch_constants()). * */ static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, GLuint srcRegIndex, GLuint channel) { const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - if (src->File == PROGRAM_CONSTANT) { - /* a literal */ + if (src->File == TGSI_FILE_IMMEDIATE) { + /* an immediate */ const int component = GET_SWZ(src->Swizzle, channel); const GLfloat *param = c->fp->program.Base.Parameters->ParameterValues[src->Index]; GLfloat value = param[component]; - if (src->Negate & (1 << channel)) + if (src->Negate) value = -value; if (src->Abs) value = FABSF(value); @@ -612,7 +682,7 @@ static void invoke_subroutine( struct brw_wm_compile *c, } static void emit_trunc( struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -630,7 +700,7 @@ static void emit_trunc( struct brw_wm_compile *c, } static void emit_mov( struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -650,7 +720,7 @@ static void emit_mov( struct brw_wm_compile *c, } static void emit_pixel_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); @@ -680,7 +750,7 @@ static void emit_pixel_xy(struct brw_wm_compile *c, } static void emit_delta_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg dst0, dst1, src0, src1; @@ -740,7 +810,7 @@ static void fire_fb_write( struct brw_wm_compile *c, } static void emit_fb_write(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; int nr = 2; @@ -808,7 +878,7 @@ static void emit_fb_write(struct brw_wm_compile *c, } static void emit_pixel_w( struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -838,7 +908,7 @@ static void emit_pixel_w( struct brw_wm_compile *c, } static void emit_linterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -867,7 +937,7 @@ static void emit_linterp(struct brw_wm_compile *c, } static void emit_cinterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -893,7 +963,7 @@ static void emit_cinterp(struct brw_wm_compile *c, } static void emit_pinterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -927,7 +997,7 @@ static void emit_pinterp(struct brw_wm_compile *c, /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ static void emit_frontfacing(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); @@ -956,7 +1026,7 @@ static void emit_frontfacing(struct brw_wm_compile *c, } static void emit_xpd(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -981,13 +1051,13 @@ static void emit_xpd(struct brw_wm_compile *c, } static void emit_dp3(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_reg src0[3], src1[3], dst; int i; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; @@ -1008,13 +1078,13 @@ static void emit_dp3(struct brw_wm_compile *c, } static void emit_dp4(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; @@ -1035,13 +1105,13 @@ static void emit_dp4(struct brw_wm_compile *c, } static void emit_dph(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; @@ -1067,12 +1137,12 @@ static void emit_dph(struct brw_wm_compile *c, * register's X, Y, Z and W channels (subject to writemasking of course). */ static void emit_math1(struct brw_wm_compile *c, - const struct prog_instruction *inst, GLuint func) + const struct brw_fp_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; @@ -1095,43 +1165,43 @@ static void emit_math1(struct brw_wm_compile *c, } static void emit_rcp(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_INV); } static void emit_rsq(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); } static void emit_sin(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); } static void emit_cos(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_COS); } static void emit_ex2(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); } static void emit_lg2(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); } static void emit_add(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -1150,7 +1220,7 @@ static void emit_add(struct brw_wm_compile *c, } static void emit_arl(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, addr_reg; @@ -1164,7 +1234,7 @@ static void emit_arl(struct brw_wm_compile *c, static void emit_mul(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -1183,7 +1253,7 @@ static void emit_mul(struct brw_wm_compile *c, } static void emit_frc(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -1202,7 +1272,7 @@ static void emit_frc(struct brw_wm_compile *c, } static void emit_flr(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -1221,7 +1291,7 @@ static void emit_flr(struct brw_wm_compile *c, static void emit_min_max(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; const GLuint mask = inst->DstReg.WriteMask; @@ -1269,12 +1339,12 @@ static void emit_min_max(struct brw_wm_compile *c, } static void emit_pow(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; @@ -1299,7 +1369,7 @@ static void emit_pow(struct brw_wm_compile *c, } static void emit_lrp(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1352,7 +1422,7 @@ static void emit_kil(struct brw_wm_compile *c) } static void emit_mad(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1375,7 +1445,7 @@ static void emit_mad(struct brw_wm_compile *c, } static void emit_sop(struct brw_wm_compile *c, - const struct prog_instruction *inst, GLuint cond) + const struct brw_fp_instruction *inst, GLuint cond) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1399,37 +1469,37 @@ static void emit_sop(struct brw_wm_compile *c, } static void emit_slt(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_L); } static void emit_sle(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_LE); } static void emit_sgt(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_G); } static void emit_sge(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_GE); } static void emit_seq(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_EQ); } static void emit_sne(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_NEQ); } @@ -1459,7 +1529,7 @@ static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) static void emit_wpos_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1494,25 +1564,25 @@ static void emit_wpos_xy(struct brw_wm_compile *c, BIAS on SIMD8 not working yet... */ static void emit_txb(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; - /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ - const GLuint unit = inst->TexSrcUnit; + /* Note: tex_unit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->tex_unit; GLuint i; GLuint msg_type; assert(unit < BRW_MAX_TEX_UNIT); - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) src[i] = get_src_reg(c, inst, 0, i); - switch (inst->TexSrcTarget) { + switch (inst->tex_target) { case TEXTURE_1D_INDEX: brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ @@ -1561,12 +1631,12 @@ static void emit_txb(struct brw_wm_compile *c, static void emit_tex(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; - /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ - const GLuint unit = inst->TexSrcUnit; + /* Note: tex_unit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->tex_unit; GLuint msg_len; GLuint i, nr; GLuint emit; @@ -1575,14 +1645,14 @@ static void emit_tex(struct brw_wm_compile *c, assert(unit < BRW_MAX_TEX_UNIT); - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) src[i] = get_src_reg(c, inst, 0, i); - switch (inst->TexSrcTarget) { + switch (inst->tex_target) { case TEXTURE_1D_INDEX: emit = WRITEMASK_X; nr = 1; @@ -1657,7 +1727,7 @@ static void post_wm_emit( struct brw_wm_compile *c ) static void get_argument_regs(struct brw_wm_compile *c, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, int index, struct brw_reg *regs, int mask) @@ -1686,7 +1756,7 @@ static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_ brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; + const struct brw_fp_instruction *inst = &c->fp_instructions[i]; int dst_flags; struct brw_reg args[3][4], dst[4]; int j; diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index d8b9028927..7b18335dec 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -28,9 +28,10 @@ * Authors: * Keith Whitwell */ - -#include "brw_context.h" +#include "util/u_memory.h" + +#include "brw_debug.h" #include "brw_wm.h" @@ -133,19 +134,19 @@ static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c, /* Search for an existing const value matching the request: */ for (i = 0; i < c->nr_imm_refs; i++) { - if (c->imm_ref[i].imm_val == *imm1f) + if (c->imm_ref[i].imm1f == *imm1f) return c->imm_ref[i].ref; } /* Else try to add a new one: */ - if (c->nr_imm_refs < BRW_WM_MAX_IMM) { + if (c->nr_imm_refs < Elements(c->imm_ref)) { GLuint i = c->nr_imm_refs++; /* An immediate is a special type of parameter: */ - c->imm_ref[i].imm_val = *imm_val; - c->imm_ref[i].ref = get_param_ref(c, imm_val); + c->imm_ref[i].imm1f = *imm1f; + c->imm_ref[i].ref = get_param_ref(c, imm1f); return c->imm_ref[i].ref; } @@ -180,7 +181,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, break; case TGSI_FILE_IMMEDIATE: - ref = get_imm_ref(c, &plist->ParameterValues[idx][component]); + ref = get_imm_ref(c, &c->immediate[idx].v[component]); break; default: @@ -205,16 +206,16 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, static void pass0_set_dst( struct brw_wm_compile *c, struct brw_wm_instruction *out, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, GLuint writemask ) { - const struct prog_dst_register *dst = &inst->DstReg; + const struct brw_fp_dst dst = inst->dst; GLuint i; for (i = 0; i < 4; i++) { if (writemask & (1<dst[i] = get_value(c); - pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]); + pass0_set_fpreg_value(c, dst.file, dst.index, i, out->dst[i]); } } @@ -223,27 +224,15 @@ static void pass0_set_dst( struct brw_wm_compile *c, static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, - struct prog_src_register src, + struct brw_fp_src src, GLuint i ) { - GLuint component = GET_SWZ(src.Swizzle,i); - const struct brw_wm_ref *src_ref; - static const GLfloat const_zero = 0.0; - static const GLfloat const_one = 1.0; - - if (component == SWIZZLE_ZERO) - src_ref = get_imm_ref(c, &const_zero); - else if (component == SWIZZLE_ONE) - src_ref = get_imm_ref(c, &const_one); - else - src_ref = pass0_get_reg(c, src.File, src.Index, component); - - return src_ref; + return pass0_get_reg(c, src.file, src.index, GET_SWZ(src.swizzle,i)); } static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, - struct prog_src_register src, + struct brw_fp_src src, GLuint i, struct brw_wm_instruction *insn) { @@ -259,10 +248,10 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, newref->value->lastuse = newref; } - if (src.Negate & (1 << i)) + if (src.negate) newref->hw_reg.negate ^= 1; - if (src.Abs) { + if (src.abs) { newref->hw_reg.negate = 0; newref->hw_reg.abs = 1; } @@ -273,21 +262,21 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, static void translate_insn(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_wm_instruction *out = get_instruction(c); - GLuint writemask = inst->dst.WriteMask; - GLuint nr_args = brw_wm_nr_args(inst->Opcode); + GLuint writemask = inst->dst.writemask; + GLuint nr_args = brw_wm_nr_args(inst->opcode); GLuint i, j; /* Copy some data out of the instruction */ - out->opcode = inst->Opcode; - out->saturate = inst->dst.Saturate; - out->tex_unit = inst->TexSrcUnit; - out->tex_target = inst->TexSrcTarget; - out->eot = inst->Aux & 1; - out->target = inst->Aux >> 1; + out->opcode = inst->opcode; + out->saturate = inst->dst.saturate; + out->tex_unit = inst->tex_unit; + out->tex_target = inst->tex_target; + out->eot = inst->eot; //inst->Aux & 1; + out->target = inst->target; //inst->Aux >> 1; /* Args: */ @@ -308,10 +297,10 @@ translate_insn(struct brw_wm_compile *c, * Optimize moves and swizzles away: */ static void pass0_precalc_mov( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + const struct brw_fp_instruction *inst ) { - const struct prog_dst_register *dst = &inst->DstReg; - GLuint writemask = inst->DstReg.WriteMask; + const struct brw_fp_dst dst = inst->dst; + GLuint writemask = dst.writemask; struct brw_wm_ref *refs[4]; GLuint i; @@ -323,11 +312,11 @@ static void pass0_precalc_mov( struct brw_wm_compile *c, * one loop and the above case was incorrectly handled. */ for (i = 0; i < 4; i++) { - refs[i] = get_new_ref(c, inst->SrcReg[0], i, NULL); + refs[i] = get_new_ref(c, inst->src[0], i, NULL); } for (i = 0; i < 4; i++) { if (writemask & (1 << i)) { - pass0_set_fpreg_ref( c, dst->File, dst->Index, i, refs[i]); + pass0_set_fpreg_ref( c, dst.file, dst.index, i, refs[i]); } } } @@ -341,12 +330,12 @@ static void pass0_init_payload( struct brw_wm_compile *c ) for (i = 0; i < 4; i++) { GLuint j = i >= c->key.nr_depth_regs ? 0 : i; - pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, + pass0_set_fpreg_value( c, BRW_FILE_PAYLOAD, PAYLOAD_DEPTH, i, &c->payload.depth[j] ); } - for (i = 0; i < FRAG_ATTRIB_MAX; i++) - pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0, + for (i = 0; i < c->key.nr_inputs; i++) + pass0_set_fpreg_value( c, BRW_FILE_PAYLOAD, i, 0, &c->payload.input_interp[i] ); } @@ -360,7 +349,7 @@ static void pass0_init_payload( struct brw_wm_compile *c ) * * Translate away swizzling and eliminate non-saturating moves. * - * Translate instructions from Mesa's prog_instruction structs to our + * Translate instructions from our fp_instruction structs to our * internal brw_wm_instruction representation. */ void brw_wm_pass0( struct brw_wm_compile *c ) @@ -374,13 +363,13 @@ void brw_wm_pass0( struct brw_wm_compile *c ) pass0_init_payload(c); for (insn = 0; insn < c->nr_fp_insns; insn++) { - const struct prog_instruction *inst = &c->prog_instructions[insn]; + const struct brw_fp_instruction *inst = &c->fp_instructions[insn]; /* Optimize away moves, otherwise emit translated instruction: */ - switch (inst->Opcode) { - case OPCODE_MOV: - if (!inst->dst.Saturate) { + switch (inst->opcode) { + case TGSI_OPCODE_MOV: + if (!inst->dst.saturate) { pass0_precalc_mov(c, inst); } else { diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index b0356b1bd5..09ad2b8f5b 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -30,8 +30,8 @@ */ -#include "brw_context.h" #include "brw_wm.h" +#include "brw_debug.h" static GLuint get_tracked_mask(struct brw_wm_compile *c, @@ -223,11 +223,11 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: - read0 = get_texcoord_mask(inst->tex_idx); + read0 = get_texcoord_mask(inst->tex_target); break; case TGSI_OPCODE_TXB: - read0 = get_texcoord_mask(inst->tex_idx) | BRW_WRITEMASK_W; + read0 = get_texcoord_mask(inst->tex_target) | BRW_WRITEMASK_W; break; case WM_WPOSXY: @@ -276,7 +276,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case TGSI_OPCODE_DST: case WM_FRONTFACING: - case TGSI_OPCODE_KIL_NV: + case TGSI_OPCODE_KILP: default: break; } diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c index a19ca62328..d3d678a5e6 100644 --- a/src/gallium/drivers/i965/brw_wm_pass2.c +++ b/src/gallium/drivers/i965/brw_wm_pass2.c @@ -30,7 +30,7 @@ */ -#include "brw_context.h" +#include "brw_debug.h" #include "brw_wm.h" @@ -82,27 +82,14 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], i++); - for (j = 0; j < FRAG_ATTRIB_MAX; j++) { - if (c->key.vp_outputs_written & (1<= VERT_RESULT_VAR0) - fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); - else if (j <= VERT_RESULT_TEX7) - fp_index = j; - else - fp_index = -1; - - nr_interp_regs++; - if (fp_index >= 0) - prealloc_reg(c, &c->payload.input_interp[fp_index], i++); - } + for (j = 0; j < c->key.vp_nr_outputs; j++) { + prealloc_reg(c, &c->payload.input_interp[j], i++); } assert(nr_interp_regs >= 1); c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = nr_interp_regs * 2; + c->prog_data.urb_read_length = c->key.vp_nr_outputs * 2; c->prog_data.curb_read_length = c->nr_creg * 2; c->max_wm_grf = i * 2; @@ -308,9 +295,9 @@ void brw_wm_pass2( struct brw_wm_compile *c ) /* Allocate registers to hold results: */ switch (inst->opcode) { - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXP: alloc_contiguous_dest(c, inst->dst, 4, insn); break; -- cgit v1.2.3 From 212fb8adbd0e5e28a5d20b0cc03cde46df2831f4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 10:24:19 +0000 Subject: i965g: don't set up vs stack register for non-branching shaders --- src/gallium/drivers/i965/brw_context.h | 2 ++ src/gallium/drivers/i965/brw_pipe_shader.c | 20 ++++++++++---------- src/gallium/drivers/i965/brw_vs_emit.c | 11 ++++++++--- src/gallium/drivers/i965/brw_wm.c | 3 --- src/gallium/drivers/i965/brw_wm.h | 1 - 5 files changed, 20 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 34799d5211..b81dff0aa0 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -154,6 +154,8 @@ struct brw_vertex_shader { const struct tgsi_token *tokens; struct tgsi_shader_info info; + unsigned has_flow_control:1; + unsigned id; struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ GLboolean use_const_buffer; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 662c43c3e5..44f9ad6f9c 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -43,15 +43,15 @@ * Determine if the given shader uses complex features such as flow * conditionals, loops, subroutines. */ -GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp) +static GLboolean has_flow_control(const struct tgsi_shader_info *info) { - return (fp->info.opcode_count[TGSI_OPCODE_ARL] > 0 || - fp->info.opcode_count[TGSI_OPCODE_IF] > 0 || - fp->info.opcode_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */ - fp->info.opcode_count[TGSI_OPCODE_CAL] > 0 || - fp->info.opcode_count[TGSI_OPCODE_BRK] > 0 || /* redundant - BGNLOOP */ - fp->info.opcode_count[TGSI_OPCODE_RET] > 0 || /* redundant - CAL */ - fp->info.opcode_count[TGSI_OPCODE_BGNLOOP] > 0); + return (info->opcode_count[TGSI_OPCODE_ARL] > 0 || + info->opcode_count[TGSI_OPCODE_IF] > 0 || + info->opcode_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */ + info->opcode_count[TGSI_OPCODE_CAL] > 0 || + info->opcode_count[TGSI_OPCODE_BRK] > 0 || /* redundant - BGNLOOP */ + info->opcode_count[TGSI_OPCODE_RET] > 0 || /* redundant - CAL */ + info->opcode_count[TGSI_OPCODE_BGNLOOP] > 0); } @@ -88,7 +88,7 @@ static void *brw_create_fs_state( struct pipe_context *pipe, /* Duplicate tokens, scan shader */ fs->id = brw->program_id++; - fs->has_flow_control = brw_wm_has_flow_control(fs); + fs->has_flow_control = has_flow_control(&fs->info); fs->tokens = tgsi_dup_tokens(shader->tokens); if (fs->tokens == NULL) @@ -126,7 +126,7 @@ static void *brw_create_vs_state( struct pipe_context *pipe, /* Duplicate tokens, scan shader */ vs->id = brw->program_id++; - //vs->has_flow_control = brw_wm_has_flow_control(vs); + vs->has_flow_control = has_flow_control(&vs->info); vs->tokens = tgsi_dup_tokens(shader->tokens); if (vs->tokens == NULL) diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 25aea87b8f..e0fadc8dce 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -252,8 +252,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } #endif - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); - reg += 2; + if (c->vp->has_flow_control) { + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; + } /* Some opcodes need an internal temporary: */ @@ -1592,7 +1594,10 @@ void brw_vs_emit(struct brw_vs_compile *c) /* Static register allocation */ brw_vs_alloc_regs(c); - brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); + + if (c->vp->has_flow_control) { + brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); + } /* Instructions */ diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 93f90bf329..7f2cb15256 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -162,9 +162,6 @@ static enum pipe_error do_wm_prog( struct brw_context *brw, brw_init_compile(brw, &c->func); - /* temporary sanity check assertion */ - assert(fp->has_flow_control == brw_wm_has_flow_control(c->fp)); - /* * Shader which use GLSL features such as flow control are handled * differently from "simple" shaders. diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 48dac39756..28d216260e 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -338,7 +338,6 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean ps_uses_depth, struct brw_wm_prog_key *key ); -GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp); void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c); void emit_ddxy(struct brw_compile *p, -- cgit v1.2.3 From a485341455bb270001aad8b39c7b9fa36ac74478 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 11:56:52 +0000 Subject: i965g: add dumping for our new pass_fp output --- src/gallium/drivers/i965/brw_screen.c | 2 +- src/gallium/drivers/i965/brw_wm.h | 4 +- src/gallium/drivers/i965/brw_wm_debug.c | 163 ++++++++++++++++++++++++-------- src/gallium/drivers/i965/brw_wm_fp.c | 35 ++----- src/gallium/drivers/i965/brw_wm_glsl.c | 4 +- src/gallium/drivers/i965/brw_wm_pass0.c | 2 +- 6 files changed, 139 insertions(+), 71 deletions(-) (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 9d8066442b..575a418b7d 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -293,7 +293,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) #ifdef DEBUG BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0); BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0); - BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB; + BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB | DEBUG_WM; #endif memset(&chipset, 0, sizeof chipset); diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 28d216260e..7d044ff6ec 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -181,7 +181,6 @@ struct brw_wm_instruction { #define Y 1 #define Z 2 #define W 3 -#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3) struct brw_fp_src { @@ -333,6 +332,9 @@ void brw_wm_print_insn( struct brw_wm_compile *c, void brw_wm_print_program( struct brw_wm_compile *c, const char *stage ); +void brw_wm_print_fp_program( struct brw_wm_compile *c, + const char *stage ); + void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, GLboolean ps_uses_depth, diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c index 65d7626eea..3d11fa074c 100644 --- a/src/gallium/drivers/i965/brw_wm_debug.c +++ b/src/gallium/drivers/i965/brw_wm_debug.c @@ -34,6 +34,62 @@ #include "brw_context.h" #include "brw_wm.h" +static void print_writemask( unsigned writemask ) +{ + if (writemask != BRW_WRITEMASK_XYZW) + debug_printf(".%s%s%s%s", + (writemask & BRW_WRITEMASK_X) ? "x" : "", + (writemask & BRW_WRITEMASK_Y) ? "y" : "", + (writemask & BRW_WRITEMASK_Z) ? "z" : "", + (writemask & BRW_WRITEMASK_W) ? "w" : ""); +} + +static void print_swizzle( unsigned swizzle ) +{ + char *swz = "xyzw"; + if (swizzle != BRW_SWIZZLE_XYZW) + debug_printf(".%c%c%c%c", + swz[BRW_GET_SWZ(swizzle, X)], + swz[BRW_GET_SWZ(swizzle, Y)], + swz[BRW_GET_SWZ(swizzle, Z)], + swz[BRW_GET_SWZ(swizzle, W)]); +} + +static void print_opcode( unsigned opcode ) +{ + switch (opcode) { + case WM_PIXELXY: + debug_printf("PIXELXY"); + break; + case WM_DELTAXY: + debug_printf("DELTAXY"); + break; + case WM_PIXELW: + debug_printf("PIXELW"); + break; + case WM_WPOSXY: + debug_printf("WPOSXY"); + break; + case WM_PINTERP: + debug_printf("PINTERP"); + break; + case WM_LINTERP: + debug_printf("LINTERP"); + break; + case WM_CINTERP: + debug_printf("CINTERP"); + break; + case WM_FB_WRITE: + debug_printf("FB_WRITE"); + break; + case WM_FRONTFACING: + debug_printf("FRONTFACING"); + break; + default: + debug_printf("%s", tgsi_get_opcode_info(opcode)->mnemonic); + break; + } +} void brw_wm_print_value( struct brw_wm_compile *c, struct brw_wm_value *value ) @@ -98,47 +154,11 @@ void brw_wm_print_insn( struct brw_wm_compile *c, debug_printf(","); } debug_printf("]"); - - if (inst->writemask != BRW_WRITEMASK_XYZW) - debug_printf(".%s%s%s%s", - (inst->writemask & BRW_WRITEMASK_X) ? "x" : "", - (inst->writemask & BRW_WRITEMASK_Y) ? "y" : "", - (inst->writemask & BRW_WRITEMASK_Z) ? "z" : "", - (inst->writemask & BRW_WRITEMASK_W) ? "w" : ""); - - switch (inst->opcode) { - case WM_PIXELXY: - debug_printf(" = PIXELXY"); - break; - case WM_DELTAXY: - debug_printf(" = DELTAXY"); - break; - case WM_PIXELW: - debug_printf(" = PIXELW"); - break; - case WM_WPOSXY: - debug_printf(" = WPOSXY"); - break; - case WM_PINTERP: - debug_printf(" = PINTERP"); - break; - case WM_LINTERP: - debug_printf(" = LINTERP"); - break; - case WM_CINTERP: - debug_printf(" = CINTERP"); - break; - case WM_FB_WRITE: - debug_printf(" = FB_WRITE"); - break; - case WM_FRONTFACING: - debug_printf(" = FRONTFACING"); - break; - default: - debug_printf(" = %s", tgsi_get_opcode_info(inst->opcode)->mnemonic); - break; - } - + print_writemask(inst->writemask); + + debug_printf(" = "); + print_opcode(inst->opcode); + if (inst->saturate) debug_printf("_SAT"); @@ -173,3 +193,64 @@ void brw_wm_print_program( struct brw_wm_compile *c, debug_printf("\n"); } +static const char *file_strings[TGSI_FILE_COUNT+1] = { + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMPLER", + "ADDR", + "IMM", + "LOOP", + "PAYLOAD" +}; + +static void brw_wm_print_fp_insn( struct brw_wm_compile *c, + struct brw_fp_instruction *inst ) +{ + GLuint i; + GLuint nr_args = brw_wm_nr_args(inst->opcode); + + print_opcode(inst->opcode); + if (inst->dst.saturate) + debug_printf("_SAT"); + debug_printf(" "); + + if (inst->dst.indirect) + debug_printf("["); + + debug_printf("%s[%d]", + file_strings[inst->dst.file], + inst->dst.index ); + print_writemask(inst->dst.writemask); + + if (inst->dst.indirect) + debug_printf("]"); + + debug_printf(nr_args ? ", " : "\n"); + + for (i = 0; i < nr_args; i++) { + debug_printf("%s%s%s[%d]%s", + inst->src[i].negate ? "-" : "", + inst->src[i].abs ? "ABS(" : "", + file_strings[inst->src[i].file], + inst->src[i].index, + inst->src[i].abs ? ")" : ""); + print_swizzle(inst->src[i].swizzle); + debug_printf("%s", i == nr_args - 1 ? "\n" : ", "); + } +} + + +void brw_wm_print_fp_program( struct brw_wm_compile *c, + const char *stage ) +{ + GLuint insn; + + debug_printf("%s:\n", stage); + for (insn = 0; insn < c->nr_fp_insns; insn++) + brw_wm_print_fp_insn(c, &c->fp_instructions[insn]); + debug_printf("\n"); +} + diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index bba448815b..74aa02f198 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -45,20 +45,6 @@ #include "brw_debug.h" - - -static const char *wm_opcode_strings[] = { - "PIXELXY", - "DELTAXY", - "PIXELW", - "LINTERP", - "PINTERP", - "CINTERP", - "WPOSXY", - "FB_WRITE", - "FRONTFACING", -}; - /*********************************************************************** * Source regs */ @@ -94,10 +80,10 @@ static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z { unsigned swz = reg.swizzle; - reg.swizzle = ( GET_SWZ(swz, x) << 0 | - GET_SWZ(swz, y) << 2 | - GET_SWZ(swz, z) << 4 | - GET_SWZ(swz, w) << 6 ); + reg.swizzle = ( BRW_GET_SWZ(swz, x) << 0 | + BRW_GET_SWZ(swz, y) << 2 | + BRW_GET_SWZ(swz, z) << 4 | + BRW_GET_SWZ(swz, w) << 6 ); return reg; } @@ -200,10 +186,10 @@ out: swizzle |= (swizzle & 0x3) << (j * 2); return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ), - GET_SWZ(swizzle, X), - GET_SWZ(swizzle, Y), - GET_SWZ(swizzle, Z), - GET_SWZ(swizzle, W) ); + BRW_GET_SWZ(swizzle, X), + BRW_GET_SWZ(swizzle, Y), + BRW_GET_SWZ(swizzle, Z), + BRW_GET_SWZ(swizzle, W) ); } @@ -843,7 +829,7 @@ static GLboolean projtex( struct brw_wm_compile *c, return GL_FALSE; /* ut2004 gun rendering !?! */ if (src.file == TGSI_FILE_INPUT && - GET_SWZ(src.swizzle, W) == W && + BRW_GET_SWZ(src.swizzle, W) == W && c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE) return GL_FALSE; @@ -1214,8 +1200,7 @@ int brw_wm_pass_fp( struct brw_wm_compile *c ) } if (BRW_DEBUG & DEBUG_WM) { - debug_printf("pass_fp:\n"); - //brw_print_program( c->fp_brw_program ); + brw_wm_print_fp_program( c, "pass_fp" ); debug_printf("\n"); } diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index 284f819bf8..3b3afc39d3 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -558,7 +558,7 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, { const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; const GLuint nr = 1; - const GLuint component = GET_SWZ(src->Swizzle, channel); + const GLuint component = BRW_GET_SWZ(src->Swizzle, channel); /* Extended swizzle terms */ if (component == SWIZZLE_ZERO) { @@ -598,7 +598,7 @@ static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; if (src->File == TGSI_FILE_IMMEDIATE) { /* an immediate */ - const int component = GET_SWZ(src->Swizzle, channel); + const int component = BRW_GET_SWZ(src->Swizzle, channel); const GLfloat *param = c->fp->program.Base.Parameters->ParameterValues[src->Index]; GLfloat value = param[component]; diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 7b18335dec..53232325d2 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -227,7 +227,7 @@ static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, struct brw_fp_src src, GLuint i ) { - return pass0_get_reg(c, src.file, src.index, GET_SWZ(src.swizzle,i)); + return pass0_get_reg(c, src.file, src.index, BRW_GET_SWZ(src.swizzle,i)); } -- cgit v1.2.3 From eacd13bcc809e1e877a48c2942eb6285aa21f6be Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 13:09:12 +0000 Subject: i965g: plumb through fb_write target and eot data --- src/gallium/drivers/i965/brw_wm.h | 10 +++++----- src/gallium/drivers/i965/brw_wm_emit.c | 4 ++-- src/gallium/drivers/i965/brw_wm_fp.c | 26 +++++++++++++------------- src/gallium/drivers/i965/brw_wm_pass0.c | 10 +++++++--- src/gallium/drivers/i965/brw_wm_pass1.c | 4 ++-- 5 files changed, 29 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 7d044ff6ec..f85a8af878 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -142,9 +142,10 @@ struct brw_wm_instruction { GLuint saturate:1; GLuint writemask:4; GLuint tex_unit:4; /* texture/sampler unit for texture instructions */ - GLuint tex_target:4; /* TGSI_TEXTURE_x for texture instructions*/ + GLuint target:4; /* TGSI_TEXTURE_x for texture instructions, + * target binding table index for FB_WRITE + */ GLuint eot:1; /* End of thread indicator for FB_WRITE*/ - GLuint target:10; /* target binding table index for FB_WRITE*/ }; @@ -204,10 +205,9 @@ struct brw_fp_instruction { struct brw_fp_dst dst; struct brw_fp_src src[3]; unsigned opcode:8; + unsigned target:8; /* XXX: special usage for FB_WRITE */ unsigned tex_unit:4; - unsigned tex_target:4; - unsigned target:10; /* destination surface for FB_WRITE */ - unsigned eot:1; /* mark last instruction (usually FB_WRITE) */ + unsigned pad:12; }; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 1c38f80cda..a14e12f35b 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -803,7 +803,7 @@ static void emit_tex( struct brw_wm_compile *c, /* How many input regs are there? */ - switch (inst->tex_target) { + switch (inst->target) { case TGSI_TEXTURE_1D: emit = BRW_WRITEMASK_X; nr = 1; @@ -885,7 +885,7 @@ static void emit_txb( struct brw_wm_compile *c, GLuint msg_type; /* Shadow ignored for txb. */ - switch (inst->tex_target) { + switch (inst->target) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: brw_MOV(p, brw_message_reg(2), arg[0]); diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index d27a768a0c..2a207958eb 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -280,18 +280,24 @@ static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c) static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c, GLuint op, struct brw_fp_dst dest, - GLuint tex_src_unit, - GLuint tex_src_target, + GLuint tex_unit, + GLuint target, struct brw_fp_src src0, struct brw_fp_src src1, struct brw_fp_src src2 ) { struct brw_fp_instruction *inst = get_fp_inst(c); + if (tex_unit || target) + assert(op == TGSI_OPCODE_TXP || + op == TGSI_OPCODE_TXB || + op == TGSI_OPCODE_TEX || + op == WM_FB_WRITE); + inst->opcode = op; inst->dst = dest; - inst->tex_unit = tex_src_unit; - inst->tex_target = tex_src_target; + inst->tex_unit = tex_unit; + inst->target = target; inst->src[0] = src0; inst->src[1] = src1; inst->src[2] = src2; @@ -916,23 +922,17 @@ static void emit_fb_write( struct brw_wm_compile *c ) for (i = 0 ; i < c->key.nr_cbufs; i++) { struct brw_fp_src outcolor; - unsigned target = 1<key.nr_cbufs - 1) - target |= 1; outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i); - /* Use emit_tex_op so that we can specify the inst->tex_target + /* Use emit_tex_op so that we can specify the inst->target * field, which is abused to contain the FB write target and the * EOT marker */ emit_tex_op(c, WM_FB_WRITE, dst_undef(), - target, - 0, + (i == c->key.nr_cbufs - 1), /* EOT */ + i, outcolor, payload_r0_depth, outdepth); diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 53232325d2..7bb341e2c2 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -274,9 +274,13 @@ translate_insn(struct brw_wm_compile *c, out->opcode = inst->opcode; out->saturate = inst->dst.saturate; out->tex_unit = inst->tex_unit; - out->tex_target = inst->tex_target; - out->eot = inst->eot; //inst->Aux & 1; - out->target = inst->target; //inst->Aux >> 1; + out->target = inst->target; + + /* Nasty hack: + */ + out->eot = (inst->opcode == WM_FB_WRITE && + inst->tex_unit != 0); + /* Args: */ diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index 09ad2b8f5b..005747f00b 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -223,11 +223,11 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: - read0 = get_texcoord_mask(inst->tex_target); + read0 = get_texcoord_mask(inst->target); break; case TGSI_OPCODE_TXB: - read0 = get_texcoord_mask(inst->tex_target) | BRW_WRITEMASK_W; + read0 = get_texcoord_mask(inst->target) | BRW_WRITEMASK_W; break; case WM_WPOSXY: -- cgit v1.2.3 From 9507a6c206627b3ae76e2ae8398fff518e39941a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 20:02:42 -0800 Subject: i965g: fragment shader immediates working --- src/gallium/drivers/i965/brw_curbe.c | 30 ++++++++++++++++----- src/gallium/drivers/i965/brw_wm.h | 9 ------- src/gallium/drivers/i965/brw_wm_pass0.c | 48 ++++++++------------------------- 3 files changed, 34 insertions(+), 53 deletions(-) (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 3e821d5afe..3f031577d5 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -182,16 +182,32 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) /* fragment shader constants */ if (brw->curbe.wm_size) { + const struct brw_fragment_shader *fs = brw->curr.fragment_shader; GLuint offset = brw->curbe.wm_start * 16; - unsigned nr = brw->wm.prog_data->nr_params; + GLuint nr_immediate, nr_const; + + nr_immediate = fs->immediates.nr; + if (nr_immediate) { + memcpy(&buf[offset], + fs->immediates.data, + nr_immediate * 4 * sizeof(float)); - const GLfloat *value = screen->buffer_map( screen, - brw->curr.fragment_constants, - PIPE_BUFFER_USAGE_CPU_READ); + offset += nr_immediate * 4; + } - memcpy(&buf[offset], value, nr * 4 * sizeof(float)); + nr_const = fs->info.file_max[TGSI_FILE_CONSTANT] + 1; +/* nr_const = brw->wm.prog_data->nr_params; */ + if (nr_const) { + const GLfloat *value = screen->buffer_map( screen, + brw->curr.fragment_constants, + PIPE_BUFFER_USAGE_CPU_READ); - screen->buffer_unmap( screen, brw->curr.fragment_constants ); + memcpy(&buf[offset], value, + nr_const * 4 * sizeof(float)); + + screen->buffer_unmap( screen, + brw->curr.fragment_constants ); + } } @@ -226,7 +242,7 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; - struct brw_vertex_shader *vs = brw->curr.vertex_shader; + const struct brw_vertex_shader *vs = brw->curr.vertex_shader; GLuint nr_immediate, nr_const; nr_immediate = vs->immediates.nr; diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index f85a8af878..b7d807dcb3 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -129,12 +129,6 @@ struct brw_wm_ref { GLuint insn:24; }; -struct brw_wm_imm_ref { - const struct brw_wm_ref *ref; - GLfloat imm1f; -}; - - struct brw_wm_instruction { struct brw_wm_value *dst[4]; struct brw_wm_ref *src[3][4]; @@ -272,9 +266,6 @@ struct brw_wm_compile { struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; GLuint nr_insns; - struct brw_wm_imm_ref imm_ref[BRW_WM_MAX_CONST]; - GLuint nr_imm_refs; - struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2]; GLuint grf_limit; diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 7bb341e2c2..0bacad2b0f 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -30,6 +30,7 @@ */ #include "util/u_memory.h" +#include "util/u_math.h" #include "brw_debug.h" #include "brw_wm.h" @@ -97,9 +98,10 @@ static void pass0_set_fpreg_ref( struct brw_wm_compile *c, } static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, - const GLfloat *param_ptr ) + unsigned idx, + unsigned component) { - GLuint i = c->prog_data.nr_params++; + GLuint i = idx * 4 + component; if (i >= BRW_WM_MAX_PARAM) { debug_printf("%s: out of params\n", __FUNCTION__); @@ -109,8 +111,7 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, else { struct brw_wm_ref *ref = get_ref(c); - c->prog_data.param[i] = param_ptr; - c->nr_creg = (i+16)/16; + c->nr_creg = MAX2(c->nr_creg, (i+16)/16); /* Push the offsets into hw_reg. These will be added to the * real register numbers once one is allocated in pass2. @@ -125,37 +126,6 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, } -/** Return a ref to an immediate value */ -static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c, - const GLfloat *imm1f ) -{ - GLuint i; - - /* Search for an existing const value matching the request: - */ - for (i = 0; i < c->nr_imm_refs; i++) { - if (c->imm_ref[i].imm1f == *imm1f) - return c->imm_ref[i].ref; - } - - /* Else try to add a new one: - */ - if (c->nr_imm_refs < Elements(c->imm_ref)) { - GLuint i = c->nr_imm_refs++; - - /* An immediate is a special type of parameter: - */ - c->imm_ref[i].imm1f = *imm1f; - c->imm_ref[i].ref = get_param_ref(c, imm1f); - - return c->imm_ref[i].ref; - } - else { - debug_printf("%s: out of imm_refs\n", __FUNCTION__); - c->prog_data.error = 1; - return NULL; - } -} /* Lookup our internal registers @@ -177,11 +147,15 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, break; case TGSI_FILE_CONSTANT: - ref = get_param_ref(c, &c->env_param[idx][component]); + ref = get_param_ref(c, + c->fp->info.immediate_count + idx, + component); break; case TGSI_FILE_IMMEDIATE: - ref = get_imm_ref(c, &c->immediate[idx].v[component]); + ref = get_param_ref(c, + idx, + component); break; default: -- cgit v1.2.3 From 8bf75f28de161173d1cdaad8c74bcac074e1211e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 21 Nov 2009 01:52:22 +0000 Subject: i965g: get basic texturing working again Revert to fixed-layout surface binding table -- it's probably the best way to do this. Pass sampler and texture numbers separately even though we're always keeping them the same at present. --- src/gallium/drivers/i965/brw_context.h | 13 +++-- src/gallium/drivers/i965/brw_pipe_fb.c | 4 +- src/gallium/drivers/i965/brw_pipe_sampler.c | 3 +- src/gallium/drivers/i965/brw_sf.c | 3 +- src/gallium/drivers/i965/brw_wm.c | 9 +++ src/gallium/drivers/i965/brw_wm.h | 4 +- src/gallium/drivers/i965/brw_wm_emit.c | 34 ++++++------ src/gallium/drivers/i965/brw_wm_fp.c | 39 ++++++++----- src/gallium/drivers/i965/brw_wm_surface_state.c | 74 ++++++++++++++++--------- 9 files changed, 117 insertions(+), 66 deletions(-) (limited to 'src/gallium/drivers/i965/brw_wm.h') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 096c8cf12b..598e747fe0 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -209,9 +209,9 @@ struct brw_fragment_shader { struct brw_sampler { - float border_color[4]; struct brw_ss0 ss0; struct brw_ss1 ss1; + float border_color[4]; struct brw_ss3 ss3; }; @@ -355,20 +355,23 @@ struct brw_vs_ouput_sizes { /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 +/** Max number of render targets in a shader */ +#define BRW_MAX_DRAW_BUFFERS 4 + /** * Size of our surface binding table for the WM. * This contains pointers to the drawing surfaces and current texture * objects and shader constant buffers (+2). */ -#define BRW_WM_MAX_SURF (PIPE_MAX_COLOR_BUFS + BRW_MAX_TEX_UNIT + 1) +#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) /** * Helpers to convert drawing buffers, textures and constant buffers * to surface binding table indexes, for WM. */ -#define SURF_INDEX_DRAW(d) (d) -#define SURF_INDEX_FRAG_CONST_BUFFER (PIPE_MAX_COLOR_BUFS) -#define SURF_INDEX_TEXTURE(t) (PIPE_MAX_COLOR_BUFS + 1 + (t)) +#define BTI_COLOR_BUF(d) (d) +#define BTI_FRAGMENT_CONSTANTS (BRW_MAX_DRAW_BUFFERS) +#define BTI_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t)) /** * Size of surface binding table for the VS. diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index 1511220447..6b03094f50 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -31,7 +31,7 @@ static void brw_set_framebuffer_state( struct pipe_context *pipe, /* Color buffers: */ - for (i = 0; i < MAX2(fb->nr_cbufs, brw->curr.fb.nr_cbufs); i++) { + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { if (brw->curr.fb.cbufs[i] != fb->cbufs[i]) { brw->state.dirty.mesa |= PIPE_NEW_COLOR_BUFFERS; pipe_surface_reference(&brw->curr.fb.cbufs[i], fb->cbufs[i]); @@ -39,7 +39,7 @@ static void brw_set_framebuffer_state( struct pipe_context *pipe, } if (brw->curr.fb.nr_cbufs != fb->nr_cbufs) { - brw->curr.fb.nr_cbufs = fb->nr_cbufs; + brw->curr.fb.nr_cbufs = MIN2(BRW_MAX_DRAW_BUFFERS, fb->nr_cbufs); brw->state.dirty.mesa |= PIPE_NEW_NR_CBUFS; } } diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c index f0a765ecf5..5cd38a43a6 100644 --- a/src/gallium/drivers/i965/brw_pipe_sampler.c +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -107,7 +107,7 @@ static void * brw_create_sampler_state( struct pipe_context *pipe, const struct pipe_sampler_state *template ) { - struct brw_sampler_state *sampler = CALLOC_STRUCT(brw_sampler_state); + struct brw_sampler *sampler = CALLOC_STRUCT(brw_sampler); sampler->ss0.min_filter = translate_img_filter( template->min_img_filter ); sampler->ss0.mag_filter = translate_img_filter( template->mag_img_filter ); @@ -214,7 +214,6 @@ void brw_pipe_sampler_init( struct brw_context *brw ) brw->base.set_sampler_textures = brw_set_sampler_textures; } - void brw_pipe_sampler_cleanup( struct brw_context *brw ) { } diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index e1986a9dbb..a28fb71589 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -153,9 +153,10 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw) case TGSI_INTERPOLATE_CONSTANT: break; case TGSI_INTERPOLATE_LINEAR: + case TGSI_INTERPOLATE_PERSPECTIVE: key.linear_attrs |= 1 << (i+1); break; - case TGSI_INTERPOLATE_PERSPECTIVE: +// case TGSI_INTERPOLATE_PERSPECTIVE: key.persp_attrs |= 1 << (i+1); break; } diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 3c5a2dab7a..2c9d3e5e87 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -56,6 +56,15 @@ GLuint brw_wm_nr_args( GLuint opcode ) case WM_FB_WRITE: case WM_PINTERP: return 3; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXD: + /* sampler arg is held as a field in the instruction, not in an + * actual register: + */ + return tgsi_get_opcode_info(opcode)->num_src - 1; + default: assert(opcode < MAX_OPCODE); return tgsi_get_opcode_info(opcode)->num_src; diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index b7d807dcb3..f1ca9f6369 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -135,6 +135,7 @@ struct brw_wm_instruction { GLuint opcode:8; GLuint saturate:1; GLuint writemask:4; + GLuint sampler:4; GLuint tex_unit:4; /* texture/sampler unit for texture instructions */ GLuint target:4; /* TGSI_TEXTURE_x for texture instructions, * target binding table index for FB_WRITE @@ -201,7 +202,8 @@ struct brw_fp_instruction { unsigned opcode:8; unsigned target:8; /* XXX: special usage for FB_WRITE */ unsigned tex_unit:4; - unsigned pad:12; + unsigned sampler:4; + unsigned pad:8; }; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index a14e12f35b..3250db1848 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -792,7 +792,8 @@ static void emit_tex( struct brw_wm_compile *c, const struct brw_wm_instruction *inst, struct brw_reg *dst, GLuint dst_flags, - struct brw_reg *arg ) + struct brw_reg *coord, + GLuint sampler) { struct brw_compile *p = &c->func; GLuint msgLength, responseLength; @@ -838,7 +839,7 @@ static void emit_tex( struct brw_wm_compile *c, for (i = 0; i < nr; i++) { static const GLuint swz[4] = {0,1,2,2}; if (emit & (1<payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ + BTI_TEXTURE(inst->tex_unit), + sampler, /* sampler index */ inst->writemask, msg_type, responseLength, @@ -878,7 +879,8 @@ static void emit_txb( struct brw_wm_compile *c, const struct brw_wm_instruction *inst, struct brw_reg *dst, GLuint dst_flags, - struct brw_reg *arg ) + struct brw_reg *coord, + GLuint sampler ) { struct brw_compile *p = &c->func; GLuint msgLength; @@ -888,7 +890,7 @@ static void emit_txb( struct brw_wm_compile *c, switch (inst->target) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: - brw_MOV(p, brw_message_reg(2), arg[0]); + brw_MOV(p, brw_message_reg(2), coord[0]); brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; @@ -896,22 +898,22 @@ static void emit_txb( struct brw_wm_compile *c, case TGSI_TEXTURE_RECT: case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_SHADOWRECT: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); + brw_MOV(p, brw_message_reg(2), coord[0]); + brw_MOV(p, brw_message_reg(4), coord[1]); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); - brw_MOV(p, brw_message_reg(6), arg[2]); + brw_MOV(p, brw_message_reg(2), coord[0]); + brw_MOV(p, brw_message_reg(4), coord[1]); + brw_MOV(p, brw_message_reg(6), coord[2]); break; default: /* unexpected target */ abort(); } - brw_MOV(p, brw_message_reg(8), arg[3]); + brw_MOV(p, brw_message_reg(8), coord[3]); msgLength = 9; if (BRW_IS_IGDNG(p->brw)) @@ -923,8 +925,8 @@ static void emit_txb( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ + BTI_TEXTURE(inst->tex_unit), + sampler, /* sampler index */ inst->writemask, msg_type, 8, /* responseLength */ @@ -1483,11 +1485,11 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Texturing operations: */ case TGSI_OPCODE_TEX: - emit_tex(c, inst, dst, dst_flags, args[0]); + emit_tex(c, inst, dst, dst_flags, args[0], inst->sampler); break; case TGSI_OPCODE_TXB: - emit_txb(c, inst, dst, dst_flags, args[0]); + emit_txb(c, inst, dst, dst_flags, args[0], inst->sampler); break; case TGSI_OPCODE_KIL: diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 174486a101..a8b5e15f36 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -282,6 +282,7 @@ static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c, struct brw_fp_dst dest, GLuint tex_unit, GLuint target, + GLuint sampler, struct brw_fp_src src0, struct brw_fp_src src1, struct brw_fp_src src2 ) @@ -298,6 +299,7 @@ static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c, inst->dst = dest; inst->tex_unit = tex_unit; inst->target = target; + inst->sampler = sampler; inst->src[0] = src0; inst->src[1] = src1; inst->src[2] = src2; @@ -313,7 +315,7 @@ static INLINE void emit_op3(struct brw_wm_compile *c, struct brw_fp_src src1, struct brw_fp_src src2 ) { - emit_tex_op(c, op, dest, 0, 0, src0, src1, src2); + emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src2); } @@ -323,7 +325,7 @@ static INLINE void emit_op2(struct brw_wm_compile *c, struct brw_fp_src src0, struct brw_fp_src src1) { - emit_tex_op(c, op, dest, 0, 0, src0, src1, src_undef()); + emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src_undef()); } static INLINE void emit_op1(struct brw_wm_compile *c, @@ -331,14 +333,14 @@ static INLINE void emit_op1(struct brw_wm_compile *c, struct brw_fp_dst dest, struct brw_fp_src src0) { - emit_tex_op(c, op, dest, 0, 0, src0, src_undef(), src_undef()); + emit_tex_op(c, op, dest, 0, 0, 0, src0, src_undef(), src_undef()); } static INLINE void emit_op0(struct brw_wm_compile *c, GLuint op, struct brw_fp_dst dest) { - emit_tex_op(c, op, dest, 0, 0, src_undef(), src_undef(), src_undef()); + emit_tex_op(c, op, dest, 0, 0, 0, src_undef(), src_undef(), src_undef()); } @@ -674,7 +676,8 @@ static void precalc_tex( struct brw_wm_compile *c, struct brw_fp_dst dst, unsigned target, unsigned unit, - struct brw_fp_src src0 ) + struct brw_fp_src src0, + struct brw_fp_src sampler ) { struct brw_fp_src coord = src_undef(); struct brw_fp_dst tmp = dst_undef(); @@ -751,6 +754,7 @@ static void precalc_tex( struct brw_wm_compile *c, dst_saturate(tmp, dst.saturate), unit, target, + sampler.index, coord, src_undef(), src_undef()); @@ -802,6 +806,7 @@ static void precalc_tex( struct brw_wm_compile *c, dst, unit, target, + sampler.index, coord, src_undef(), src_undef()); @@ -851,7 +856,8 @@ static void precalc_txp( struct brw_wm_compile *c, struct brw_fp_dst dst, unsigned target, unsigned unit, - struct brw_fp_src src0 ) + struct brw_fp_src src0, + struct brw_fp_src sampler ) { if (projtex(c, target, src0)) { struct brw_fp_dst tmp = get_temp(c); @@ -877,7 +883,8 @@ static void precalc_txp( struct brw_wm_compile *c, dst, target, unit, - src_reg_from_dst(tmp)); + src_reg_from_dst(tmp), + sampler ); release_temp(c, tmp); } @@ -885,7 +892,7 @@ static void precalc_txp( struct brw_wm_compile *c, { /* dst = TEX src0 */ - precalc_tex(c, dst, target, unit, src0); + precalc_tex(c, dst, target, unit, src0, sampler); } } @@ -936,6 +943,7 @@ static void emit_fb_write( struct brw_wm_compile *c ) dst_undef(), (i == c->key.nr_cbufs - 1), /* EOT */ i, + 0, /* no sampler */ outcolor, payload_r0_depth, outdepth); @@ -1056,15 +1064,17 @@ static void emit_insn( struct brw_wm_compile *c, case TGSI_OPCODE_TEX: precalc_tex(c, dst, inst->InstructionExtTexture.Texture, - src[0].file, /* sampler unit */ - src[1] ); + src[1].index, /* use sampler unit for tex idx */ + src[0], /* coord */ + src[1]); /* sampler */ break; case TGSI_OPCODE_TXP: precalc_txp(c, dst, inst->InstructionExtTexture.Texture, - src[0].file, /* sampler unit */ - src[1] ); + src[1].index, /* use sampler unit for tex idx */ + src[0], /* coord */ + src[1]); /* sampler */ break; case TGSI_OPCODE_TXB: @@ -1072,8 +1082,9 @@ static void emit_insn( struct brw_wm_compile *c, */ precalc_tex(c, dst, inst->InstructionExtTexture.Texture, - src[0].file, /* sampler unit */ - src[1] ); + src[1].index, /* use sampler unit for tex idx*/ + src[0], + src[1]); break; case TGSI_OPCODE_XPD: diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index f882331433..f92b8198ed 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -149,19 +149,23 @@ brw_wm_get_binding_table(struct brw_context *brw, enum pipe_error ret; struct brw_winsys_reloc reloc[BRW_WM_MAX_SURF]; uint32_t data[BRW_WM_MAX_SURF]; + GLuint nr_relocs = 0; GLuint data_size = brw->wm.nr_surfaces * sizeof data[0]; int i; assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); assert(brw->wm.nr_surfaces > 0); - /* Emit binding table relocations to surface state */ + /* Emit binding table relocations to surface state + */ for (i = 0; i < brw->wm.nr_surfaces; i++) { - make_reloc(&reloc[i], - BRW_USAGE_STATE, - 0, - i * sizeof(GLuint), - brw->wm.surf_bo[i]); + if (brw->wm.surf_bo[i]) { + make_reloc(&reloc[nr_relocs++], + BRW_USAGE_STATE, + 0, + i * sizeof(GLuint), + brw->wm.surf_bo[i]); + } } /* Note there is no key for this search beyond the values in the @@ -169,7 +173,7 @@ brw_wm_get_binding_table(struct brw_context *brw, */ if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, - reloc, brw->wm.nr_surfaces, + reloc, nr_relocs, NULL, bo_out)) return PIPE_OK; @@ -182,7 +186,7 @@ brw_wm_get_binding_table(struct brw_context *brw, ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, - reloc, brw->wm.nr_surfaces, + reloc, nr_relocs, data, data_size, NULL, NULL, bo_out); @@ -208,40 +212,60 @@ static enum pipe_error prepare_wm_surfaces(struct brw_context *brw ) for (i = 0; i < brw->curr.fb.nr_cbufs; i++) { ret = brw_update_render_surface(brw, brw_surface(brw->curr.fb.cbufs[i]), - &brw->wm.surf_bo[nr_surfaces++]); + &brw->wm.surf_bo[BTI_COLOR_BUF(i)]); if (ret) return ret; + + nr_surfaces = BTI_COLOR_BUF(i) + 1; + } + + + + /* PIPE_NEW_FRAGMENT_CONSTANTS + */ +#if 0 + if (brw->curr.fragment_constants) { + ret = brw_update_fragment_constant_surface( + brw, + brw->curr.fragment_constants, + &brw->wm.surf_bo[BTI_FRAGMENT_CONSTANTS]); + + if (ret) + return ret; + + nr_surfaces = BTI_FRAGMENT_CONSTANTS + 1; } + else { + bo_reference(&brw->wm.surf_bo[SURF_FRAG_CONSTANTS], NULL); + } +#endif + /* PIPE_NEW_TEXTURE */ for (i = 0; i < brw->curr.num_textures; i++) { ret = brw_update_texture_surface(brw, brw_texture(brw->curr.texture[i]), - &brw->wm.surf_bo[nr_surfaces++]); + &brw->wm.surf_bo[BTI_TEXTURE(i)]); if (ret) return ret; + + nr_surfaces = BTI_TEXTURE(i) + 1; } - /* PIPE_NEW_FRAGMENT_CONSTANTS + /* Clear any inactive entries: */ -#if 0 - if (brw->curr.fragment_constants) { - ret = brw_update_fragment_constant_surface(brw, - brw->curr.fragment_constants, - &brw->wm.surf_bo[nr_surfaces++]); - if (ret) - return ret; - } -#endif + for (i = brw->curr.fb.nr_cbufs; i < BRW_MAX_DRAW_BUFFERS; i++) + bo_reference(&brw->wm.surf_bo[BTI_COLOR_BUF(i)], NULL); - if (brw->wm.nr_surfaces != nr_surfaces) { + if (!brw->curr.fragment_constants) + bo_reference(&brw->wm.surf_bo[BTI_FRAGMENT_CONSTANTS], NULL); - /* Unreference any left-over old buffers - */ - for (i = nr_surfaces; i < brw->wm.nr_surfaces; i++) - bo_reference(&brw->wm.surf_bo[i], NULL); + /* XXX: no pipe_max_textures define?? */ + for (i = brw->curr.num_textures; i < PIPE_MAX_SAMPLERS; i++) + bo_reference(&brw->wm.surf_bo[BTI_TEXTURE(i)], NULL); + if (brw->wm.nr_surfaces != nr_surfaces) { brw->wm.nr_surfaces = nr_surfaces; brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; } -- cgit v1.2.3