From 2f5f7c07732577f60666e3cee69c75c9b035c145 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 23 Oct 2009 16:55:02 +0100 Subject: i965g: re-starting from the dri driver --- src/gallium/drivers/i965/Makefile | 104 + src/gallium/drivers/i965/brw_cc.c | 297 +++ src/gallium/drivers/i965/brw_clip.c | 273 ++ src/gallium/drivers/i965/brw_clip.h | 179 ++ src/gallium/drivers/i965/brw_clip_line.c | 276 ++ src/gallium/drivers/i965/brw_clip_point.c | 56 + src/gallium/drivers/i965/brw_clip_state.c | 184 ++ src/gallium/drivers/i965/brw_clip_tri.c | 603 +++++ src/gallium/drivers/i965/brw_clip_unfilled.c | 505 ++++ src/gallium/drivers/i965/brw_clip_util.c | 396 +++ src/gallium/drivers/i965/brw_context.c | 173 ++ src/gallium/drivers/i965/brw_context.h | 767 ++++++ src/gallium/drivers/i965/brw_curbe.c | 376 +++ src/gallium/drivers/i965/brw_defines.h | 851 +++++++ src/gallium/drivers/i965/brw_disasm.c | 903 +++++++ src/gallium/drivers/i965/brw_draw.c | 493 ++++ src/gallium/drivers/i965/brw_draw.h | 54 + src/gallium/drivers/i965/brw_draw_upload.c | 742 ++++++ src/gallium/drivers/i965/brw_eu.c | 254 ++ src/gallium/drivers/i965/brw_eu.h | 968 +++++++ src/gallium/drivers/i965/brw_eu_debug.c | 95 + src/gallium/drivers/i965/brw_eu_emit.c | 1425 +++++++++++ src/gallium/drivers/i965/brw_eu_util.c | 126 + src/gallium/drivers/i965/brw_gs.c | 201 ++ src/gallium/drivers/i965/brw_gs.h | 76 + src/gallium/drivers/i965/brw_gs_emit.c | 186 ++ src/gallium/drivers/i965/brw_gs_state.c | 149 ++ src/gallium/drivers/i965/brw_misc_state.c | 545 ++++ src/gallium/drivers/i965/brw_program.c | 166 ++ src/gallium/drivers/i965/brw_queryobj.c | 254 ++ src/gallium/drivers/i965/brw_sf.c | 200 ++ src/gallium/drivers/i965/brw_sf.h | 113 + src/gallium/drivers/i965/brw_sf_emit.c | 739 ++++++ src/gallium/drivers/i965/brw_sf_state.c | 365 +++ src/gallium/drivers/i965/brw_state.h | 173 ++ src/gallium/drivers/i965/brw_state_batch.c | 99 + src/gallium/drivers/i965/brw_state_cache.c | 597 +++++ src/gallium/drivers/i965/brw_state_dump.c | 224 ++ src/gallium/drivers/i965/brw_state_upload.c | 416 ++++ src/gallium/drivers/i965/brw_structs.h | 1575 ++++++++++++ src/gallium/drivers/i965/brw_tex.c | 59 + src/gallium/drivers/i965/brw_tex_layout.c | 222 ++ src/gallium/drivers/i965/brw_urb.c | 250 ++ src/gallium/drivers/i965/brw_util.c | 104 + src/gallium/drivers/i965/brw_util.h | 45 + src/gallium/drivers/i965/brw_vs.c | 124 + src/gallium/drivers/i965/brw_vs.h | 88 + src/gallium/drivers/i965/brw_vs_emit.c | 1667 +++++++++++++ src/gallium/drivers/i965/brw_vs_state.c | 185 ++ src/gallium/drivers/i965/brw_vs_surface_state.c | 226 ++ src/gallium/drivers/i965/brw_wm.c | 375 +++ src/gallium/drivers/i965/brw_wm.h | 309 +++ src/gallium/drivers/i965/brw_wm_debug.c | 174 ++ src/gallium/drivers/i965/brw_wm_emit.c | 1509 +++++++++++ src/gallium/drivers/i965/brw_wm_fp.c | 1177 +++++++++ src/gallium/drivers/i965/brw_wm_glsl.c | 3046 +++++++++++++++++++++++ src/gallium/drivers/i965/brw_wm_iz.c | 157 ++ src/gallium/drivers/i965/brw_wm_pass0.c | 442 ++++ src/gallium/drivers/i965/brw_wm_pass1.c | 291 +++ src/gallium/drivers/i965/brw_wm_pass2.c | 343 +++ src/gallium/drivers/i965/brw_wm_sampler_state.c | 369 +++ src/gallium/drivers/i965/brw_wm_state.c | 317 +++ src/gallium/drivers/i965/brw_wm_surface_state.c | 752 ++++++ src/gallium/drivers/i965/intel_batchbuffer.h | 184 ++ src/gallium/drivers/i965/intel_chipset.h | 118 + src/gallium/drivers/i965/intel_structs.h | 132 + src/gallium/drivers/i965/intel_tex_format.c | 225 ++ src/gallium/drivers/i965/intel_tex_layout.c | 140 ++ 68 files changed, 29208 insertions(+) create mode 100644 src/gallium/drivers/i965/Makefile create mode 100644 src/gallium/drivers/i965/brw_cc.c create mode 100644 src/gallium/drivers/i965/brw_clip.c create mode 100644 src/gallium/drivers/i965/brw_clip.h create mode 100644 src/gallium/drivers/i965/brw_clip_line.c create mode 100644 src/gallium/drivers/i965/brw_clip_point.c create mode 100644 src/gallium/drivers/i965/brw_clip_state.c create mode 100644 src/gallium/drivers/i965/brw_clip_tri.c create mode 100644 src/gallium/drivers/i965/brw_clip_unfilled.c create mode 100644 src/gallium/drivers/i965/brw_clip_util.c create mode 100644 src/gallium/drivers/i965/brw_context.c create mode 100644 src/gallium/drivers/i965/brw_context.h create mode 100644 src/gallium/drivers/i965/brw_curbe.c create mode 100644 src/gallium/drivers/i965/brw_defines.h create mode 100644 src/gallium/drivers/i965/brw_disasm.c create mode 100644 src/gallium/drivers/i965/brw_draw.c create mode 100644 src/gallium/drivers/i965/brw_draw.h create mode 100644 src/gallium/drivers/i965/brw_draw_upload.c create mode 100644 src/gallium/drivers/i965/brw_eu.c create mode 100644 src/gallium/drivers/i965/brw_eu.h create mode 100644 src/gallium/drivers/i965/brw_eu_debug.c create mode 100644 src/gallium/drivers/i965/brw_eu_emit.c create mode 100644 src/gallium/drivers/i965/brw_eu_util.c create mode 100644 src/gallium/drivers/i965/brw_gs.c create mode 100644 src/gallium/drivers/i965/brw_gs.h create mode 100644 src/gallium/drivers/i965/brw_gs_emit.c create mode 100644 src/gallium/drivers/i965/brw_gs_state.c create mode 100644 src/gallium/drivers/i965/brw_misc_state.c create mode 100644 src/gallium/drivers/i965/brw_program.c create mode 100644 src/gallium/drivers/i965/brw_queryobj.c create mode 100644 src/gallium/drivers/i965/brw_sf.c create mode 100644 src/gallium/drivers/i965/brw_sf.h create mode 100644 src/gallium/drivers/i965/brw_sf_emit.c create mode 100644 src/gallium/drivers/i965/brw_sf_state.c create mode 100644 src/gallium/drivers/i965/brw_state.h create mode 100644 src/gallium/drivers/i965/brw_state_batch.c create mode 100644 src/gallium/drivers/i965/brw_state_cache.c create mode 100644 src/gallium/drivers/i965/brw_state_dump.c create mode 100644 src/gallium/drivers/i965/brw_state_upload.c create mode 100644 src/gallium/drivers/i965/brw_structs.h create mode 100644 src/gallium/drivers/i965/brw_tex.c create mode 100644 src/gallium/drivers/i965/brw_tex_layout.c create mode 100644 src/gallium/drivers/i965/brw_urb.c create mode 100644 src/gallium/drivers/i965/brw_util.c create mode 100644 src/gallium/drivers/i965/brw_util.h create mode 100644 src/gallium/drivers/i965/brw_vs.c create mode 100644 src/gallium/drivers/i965/brw_vs.h create mode 100644 src/gallium/drivers/i965/brw_vs_emit.c create mode 100644 src/gallium/drivers/i965/brw_vs_state.c create mode 100644 src/gallium/drivers/i965/brw_vs_surface_state.c create mode 100644 src/gallium/drivers/i965/brw_wm.c create mode 100644 src/gallium/drivers/i965/brw_wm.h create mode 100644 src/gallium/drivers/i965/brw_wm_debug.c create mode 100644 src/gallium/drivers/i965/brw_wm_emit.c create mode 100644 src/gallium/drivers/i965/brw_wm_fp.c create mode 100644 src/gallium/drivers/i965/brw_wm_glsl.c create mode 100644 src/gallium/drivers/i965/brw_wm_iz.c create mode 100644 src/gallium/drivers/i965/brw_wm_pass0.c create mode 100644 src/gallium/drivers/i965/brw_wm_pass1.c create mode 100644 src/gallium/drivers/i965/brw_wm_pass2.c create mode 100644 src/gallium/drivers/i965/brw_wm_sampler_state.c create mode 100644 src/gallium/drivers/i965/brw_wm_state.c create mode 100644 src/gallium/drivers/i965/brw_wm_surface_state.c create mode 100644 src/gallium/drivers/i965/intel_batchbuffer.h create mode 100644 src/gallium/drivers/i965/intel_chipset.h create mode 100644 src/gallium/drivers/i965/intel_structs.h create mode 100644 src/gallium/drivers/i965/intel_tex_format.c create mode 100644 src/gallium/drivers/i965/intel_tex_layout.c (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile new file mode 100644 index 0000000000..7a55333e89 --- /dev/null +++ b/src/gallium/drivers/i965/Makefile @@ -0,0 +1,104 @@ + +TOP = ../../../../.. +include $(TOP)/configs/current + +LIBNAME = i965_dri.so + +DRIVER_SOURCES = \ + intel_batchbuffer.c \ + intel_blit.c \ + intel_buffer_objects.c \ + intel_buffers.c \ + intel_clear.c \ + intel_context.c \ + intel_decode.c \ + intel_extensions.c \ + intel_fbo.c \ + intel_mipmap_tree.c \ + intel_regions.c \ + intel_screen.c \ + intel_span.c \ + intel_pixel.c \ + intel_pixel_bitmap.c \ + intel_pixel_copy.c \ + intel_pixel_draw.c \ + intel_pixel_read.c \ + intel_state.c \ + intel_swapbuffers.c \ + intel_syncobj.c \ + intel_tex.c \ + intel_tex_copy.c \ + intel_tex_format.c \ + intel_tex_image.c \ + intel_tex_layout.c \ + intel_tex_subimage.c \ + intel_tex_validate.c \ + brw_cc.c \ + brw_clip.c \ + brw_clip_line.c \ + brw_clip_point.c \ + brw_clip_state.c \ + brw_clip_tri.c \ + brw_clip_unfilled.c \ + brw_clip_util.c \ + brw_context.c \ + brw_curbe.c \ + brw_disasm.c \ + brw_draw.c \ + brw_draw_upload.c \ + brw_eu.c \ + brw_eu_debug.c \ + brw_eu_emit.c \ + brw_eu_util.c \ + brw_fallback.c \ + brw_gs.c \ + brw_gs_emit.c \ + brw_gs_state.c \ + brw_misc_state.c \ + brw_program.c \ + brw_queryobj.c \ + brw_sf.c \ + brw_sf_emit.c \ + brw_sf_state.c \ + brw_state_batch.c \ + brw_state_cache.c \ + brw_state_dump.c \ + brw_state_upload.c \ + brw_tex.c \ + brw_tex_layout.c \ + brw_urb.c \ + brw_util.c \ + brw_vs.c \ + brw_vs_constval.c \ + brw_vs_emit.c \ + brw_vs_state.c \ + brw_vs_surface_state.c \ + brw_vtbl.c \ + brw_wm.c \ + brw_wm_debug.c \ + brw_wm_emit.c \ + brw_wm_fp.c \ + brw_wm_iz.c \ + brw_wm_glsl.c \ + brw_wm_pass0.c \ + brw_wm_pass1.c \ + brw_wm_pass2.c \ + brw_wm_sampler_state.c \ + brw_wm_state.c \ + brw_wm_surface_state.c + +C_SOURCES = \ + $(COMMON_SOURCES) \ + $(MINIGLX_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +DRIVER_DEFINES = -I../intel -I../intel/server + +DRI_LIB_DEPS += -ldrm_intel + +include ../Makefile.template + +intel_decode.o: ../intel/intel_decode.c +intel_tex_layout.o: ../intel/intel_tex_layout.c diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c new file mode 100644 index 0000000000..1088a7a607 --- /dev/null +++ b/src/gallium/drivers/i965/brw_cc.c @@ -0,0 +1,297 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" +#include "main/macros.h" +#include "main/enums.h" + +static void prepare_cc_vp( struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_cc_viewport ccv; + + memset(&ccv, 0, sizeof(ccv)); + + /* _NEW_VIEWPORT */ + ccv.min_depth = ctx->Viewport.Near; + ccv.max_depth = ctx->Viewport.Far; + + dri_bo_unreference(brw->cc.vp_bo); + brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); +} + +const struct brw_tracked_state brw_cc_vp = { + .dirty = { + .mesa = _NEW_VIEWPORT, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .prepare = prepare_cc_vp +}; + +struct brw_cc_unit_key { + GLboolean stencil, stencil_two_side, color_blend, alpha_enabled; + + GLenum stencil_func[2], stencil_fail_op[2]; + GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; + GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2]; + GLenum logic_op; + + GLenum blend_eq_rgb, blend_eq_a; + GLenum blend_src_rgb, blend_src_a; + GLenum blend_dst_rgb, blend_dst_a; + + GLenum alpha_func; + GLclampf alpha_ref; + + GLboolean dither; + + GLboolean depth_test, depth_write; + GLenum depth_func; +}; + +static void +cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + const unsigned back = ctx->Stencil._BackFace; + + memset(key, 0, sizeof(*key)); + + key->stencil = ctx->Stencil._Enabled; + key->stencil_two_side = ctx->Stencil._TestTwoSide; + + if (key->stencil) { + key->stencil_func[0] = ctx->Stencil.Function[0]; + key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; + key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; + key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; + key->stencil_ref[0] = ctx->Stencil.Ref[0]; + key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; + key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; + } + if (key->stencil_two_side) { + key->stencil_func[1] = ctx->Stencil.Function[back]; + key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; + key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; + key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; + key->stencil_ref[1] = ctx->Stencil.Ref[back]; + key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; + key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; + } + + if (ctx->Color._LogicOpEnabled) + key->logic_op = ctx->Color.LogicOp; + else + key->logic_op = GL_COPY; + + key->color_blend = ctx->Color.BlendEnabled; + if (key->color_blend) { + key->blend_eq_rgb = ctx->Color.BlendEquationRGB; + key->blend_eq_a = ctx->Color.BlendEquationA; + key->blend_src_rgb = ctx->Color.BlendSrcRGB; + key->blend_dst_rgb = ctx->Color.BlendDstRGB; + key->blend_src_a = ctx->Color.BlendSrcA; + key->blend_dst_a = ctx->Color.BlendDstA; + } + + key->alpha_enabled = ctx->Color.AlphaEnabled; + if (key->alpha_enabled) { + key->alpha_func = ctx->Color.AlphaFunc; + key->alpha_ref = ctx->Color.AlphaRef; + } + + key->dither = ctx->Color.DitherFlag; + + key->depth_test = ctx->Depth.Test; + if (key->depth_test) { + key->depth_func = ctx->Depth.Func; + key->depth_write = ctx->Depth.Mask; + } +} + +/** + * Creates the state cache entry for the given CC unit key. + */ +static dri_bo * +cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) +{ + struct brw_cc_unit_state cc; + dri_bo *bo; + + memset(&cc, 0, sizeof(cc)); + + /* _NEW_STENCIL */ + if (key->stencil) { + cc.cc0.stencil_enable = 1; + cc.cc0.stencil_func = + intel_translate_compare_func(key->stencil_func[0]); + cc.cc0.stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[0]); + cc.cc0.stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); + cc.cc0.stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); + cc.cc1.stencil_ref = key->stencil_ref[0]; + cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; + cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; + + if (key->stencil_two_side) { + cc.cc0.bf_stencil_enable = 1; + cc.cc0.bf_stencil_func = + intel_translate_compare_func(key->stencil_func[1]); + cc.cc0.bf_stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[1]); + cc.cc0.bf_stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); + cc.cc0.bf_stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); + cc.cc1.bf_stencil_ref = key->stencil_ref[1]; + cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; + cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; + } + + /* Not really sure about this: + */ + if (key->stencil_write_mask[0] || + (key->stencil_two_side && key->stencil_write_mask[1])) + cc.cc0.stencil_write_enable = 1; + } + + /* _NEW_COLOR */ + if (key->logic_op != GL_COPY) { + cc.cc2.logicop_enable = 1; + cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; + + if (eqRGB == GL_MIN || eqRGB == GL_MAX) { + srcRGB = dstRGB = GL_ONE; + } + + if (eqA == GL_MIN || eqA == GL_MAX) { + srcA = dstA = GL_ONE; + } + + cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); + cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); + cc.cc6.blend_function = brw_translate_blend_equation(eqRGB); + + cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); + cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA); + + cc.cc3.blend_enable = 1; + cc.cc3.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); + } + + if (key->alpha_enabled) { + cc.cc3.alpha_test = 1; + cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); + cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); + } + + if (key->dither) { + cc.cc5.dither_enable = 1; + cc.cc6.y_dither_offset = 0; + cc.cc6.x_dither_offset = 0; + } + + /* _NEW_DEPTH */ + if (key->depth_test) { + cc.cc2.depth_test = 1; + cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); + cc.cc2.depth_write_enable = key->depth_write; + } + + /* CACHE_NEW_CC_VP */ + cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */ + + if (INTEL_DEBUG & DEBUG_STATS) + cc.cc5.statistics_enable = 1; + + bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, + key, sizeof(*key), + &brw->cc.vp_bo, 1, + &cc, sizeof(cc), + NULL, NULL); + + /* Emit CC viewport relocation */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, + 0, + offsetof(struct brw_cc_unit_state, cc4), + brw->cc.vp_bo); + + return bo; +} + +static void prepare_cc_unit( struct brw_context *brw ) +{ + struct brw_cc_unit_key key; + + cc_unit_populate_key(brw, &key); + + dri_bo_unreference(brw->cc.state_bo); + brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT, + &key, sizeof(key), + &brw->cc.vp_bo, 1, + NULL); + + if (brw->cc.state_bo == NULL) + brw->cc.state_bo = cc_unit_create_from_key(brw, &key); +} + +const struct brw_tracked_state brw_cc_unit = { + .dirty = { + .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH, + .brw = 0, + .cache = CACHE_NEW_CC_VP + }, + .prepare = prepare_cc_unit, +}; + + + diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c new file mode 100644 index 0000000000..20a927cf38 --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip.c @@ -0,0 +1,273 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_clip.h" + + +#define FRONT_UNFILLED_BIT 0x1 +#define BACK_UNFILLED_BIT 0x2 + + +static void compile_clip_prog( struct brw_context *brw, + struct brw_clip_prog_key *key ) +{ + struct brw_clip_compile c; + const GLuint *program; + GLuint program_size; + GLuint delta; + GLuint i; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(brw, &c.func); + + c.func.single_program_flow = 1; + + c.key = *key; + c.need_ff_sync = BRW_IS_IGDNG(brw); + + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.header_position_offset = ATTR_SIZE; + + if (BRW_IS_IGDNG(brw)) + delta = 3 * REG_SIZE; + else + delta = REG_SIZE; + + for (i = 0; i < VERT_RESULT_MAX; i++) + if (c.key.attrs & (1<primitive) { + case GL_TRIANGLES: + if (key->do_unfilled) + brw_emit_unfilled_clip( &c ); + else + brw_emit_tri_clip( &c ); + break; + case GL_LINES: + brw_emit_line_clip( &c ); + break; + case GL_POINTS: + brw_emit_point_clip( &c ); + break; + default: + assert(0); + return; + } + + + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + dri_bo_unreference(brw->clip.prog_bo); + brw->clip.prog_bo = brw_upload_cache( &brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->clip.prog_data ); +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_clip_prog(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_clip_prog_key key; + + memset(&key, 0, sizeof(key)); + + /* Populate the key: + */ + /* BRW_NEW_REDUCED_PRIMITIVE */ + key.primitive = brw->intel.reduced_primitive; + /* CACHE_NEW_VS_PROG */ + key.attrs = brw->vs.prog_data->outputs_written; + /* _NEW_LIGHT */ + key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); + /* _NEW_TRANSFORM */ + key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); + + if (BRW_IS_IGDNG(brw)) + key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; + else + key.clip_mode = BRW_CLIPMODE_NORMAL; + + /* _NEW_POLYGON */ + if (key.primitive == GL_TRIANGLES) { + if (ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) + key.clip_mode = BRW_CLIPMODE_REJECT_ALL; + else { + GLuint fill_front = CLIP_CULL; + GLuint fill_back = CLIP_CULL; + GLuint offset_front = 0; + GLuint offset_back = 0; + + if (!ctx->Polygon.CullFlag || + ctx->Polygon.CullFaceMode != GL_FRONT) { + switch (ctx->Polygon.FrontMode) { + case GL_FILL: + fill_front = CLIP_FILL; + offset_front = 0; + break; + case GL_LINE: + fill_front = CLIP_LINE; + offset_front = ctx->Polygon.OffsetLine; + break; + case GL_POINT: + fill_front = CLIP_POINT; + offset_front = ctx->Polygon.OffsetPoint; + break; + } + } + + if (!ctx->Polygon.CullFlag || + ctx->Polygon.CullFaceMode != GL_BACK) { + switch (ctx->Polygon.BackMode) { + case GL_FILL: + fill_back = CLIP_FILL; + offset_back = 0; + break; + case GL_LINE: + fill_back = CLIP_LINE; + offset_back = ctx->Polygon.OffsetLine; + break; + case GL_POINT: + fill_back = CLIP_POINT; + offset_back = ctx->Polygon.OffsetPoint; + break; + } + } + + if (ctx->Polygon.BackMode != GL_FILL || + ctx->Polygon.FrontMode != GL_FILL) { + key.do_unfilled = 1; + + /* Most cases the fixed function units will handle. Cases where + * one or more polygon faces are unfilled will require help: + */ + key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + + if (offset_back || offset_front) { + /* _NEW_POLYGON, _NEW_BUFFERS */ + key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale; + key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD; + } + + switch (ctx->Polygon.FrontFace) { + case GL_CCW: + key.fill_ccw = fill_front; + key.fill_cw = fill_back; + key.offset_ccw = offset_front; + key.offset_cw = offset_back; + if (ctx->Light.Model.TwoSide && + key.fill_cw != CLIP_CULL) + key.copy_bfc_cw = 1; + break; + case GL_CW: + key.fill_cw = fill_front; + key.fill_ccw = fill_back; + key.offset_cw = offset_front; + key.offset_ccw = offset_back; + if (ctx->Light.Model.TwoSide && + key.fill_ccw != CLIP_CULL) + key.copy_bfc_ccw = 1; + break; + } + } + } + } + + dri_bo_unreference(brw->clip.prog_bo); + brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG, + &key, sizeof(key), + NULL, 0, + &brw->clip.prog_data); + if (brw->clip.prog_bo == NULL) + compile_clip_prog( brw, &key ); +} + + +const struct brw_tracked_state brw_clip_prog = { + .dirty = { + .mesa = (_NEW_LIGHT | + _NEW_TRANSFORM | + _NEW_POLYGON | + _NEW_BUFFERS), + .brw = (BRW_NEW_REDUCED_PRIMITIVE), + .cache = CACHE_NEW_VS_PROG + }, + .prepare = upload_clip_prog +}; diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h new file mode 100644 index 0000000000..957df441ab --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip.h @@ -0,0 +1,179 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#ifndef BRW_CLIP_H +#define BRW_CLIP_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_VERTS (3+6+6) + +/* Note that if unfilled primitives are being emitted, we have to fix + * up polygon offset and flatshading at this point: + */ +struct brw_clip_prog_key { + GLuint attrs:32; + GLuint primitive:4; + GLuint nr_userclip:3; + GLuint do_flat_shading:1; + GLuint do_unfilled:1; + GLuint fill_cw:2; /* includes cull information */ + GLuint fill_ccw:2; /* includes cull information */ + GLuint offset_cw:1; + GLuint offset_ccw:1; + GLuint pad0:17; + + GLuint copy_bfc_cw:1; + GLuint copy_bfc_ccw:1; + GLuint clip_mode:3; + GLuint pad1:27; + + GLfloat offset_factor; + GLfloat offset_units; +}; + + +#define CLIP_LINE 0 +#define CLIP_POINT 1 +#define CLIP_FILL 2 +#define CLIP_CULL 3 + + +#define PRIM_MASK (0x1f) + +struct brw_clip_compile { + struct brw_compile func; + struct brw_clip_prog_key key; + struct brw_clip_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_VERTS]; + + struct brw_reg t; + struct brw_reg t0, t1; + struct brw_reg dp0, dp1; + + struct brw_reg dpPrev; + struct brw_reg dp; + struct brw_reg loopcount; + struct brw_reg nr_verts; + struct brw_reg planemask; + + struct brw_reg inlist; + struct brw_reg outlist; + struct brw_reg freelist; + + struct brw_reg dir; + struct brw_reg tmp0, tmp1; + struct brw_reg offset; + + struct brw_reg fixed_planes; + struct brw_reg plane_equation; + + struct brw_reg ff_sync; + } reg; + + /* 3 different ways of expressing vertex size: + */ + GLuint nr_attrs; + GLuint nr_regs; + GLuint nr_bytes; + + GLuint first_tmp; + GLuint last_tmp; + + GLboolean need_direction; + + GLuint last_mrf; + + GLuint header_position_offset; + GLuint offset[VERT_ATTRIB_MAX]; + GLboolean need_ff_sync; +}; + +#define ATTR_SIZE (4*4) + +/* Points are only culled, so no need for a clip routine, however it + * works out easier to have a dummy one. + */ +void brw_emit_unfilled_clip( struct brw_clip_compile *c ); +void brw_emit_tri_clip( struct brw_clip_compile *c ); +void brw_emit_line_clip( struct brw_clip_compile *c ); +void brw_emit_point_clip( struct brw_clip_compile *c ); + +/* brw_clip_tri.c, for use by the unfilled clip routine: + */ +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ); +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ); +void brw_clip_tri( struct brw_clip_compile *c ); +void brw_clip_tri_emit_polygon( struct brw_clip_compile *c ); +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + GLuint nr_verts ); + + +/* Utils: + */ + +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + GLboolean force_edgeflag ); + +void brw_clip_init_planes( struct brw_clip_compile *c ); + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + GLboolean allocate, + GLboolean eot, + GLuint header); + +void brw_clip_kill_thread(struct brw_clip_compile *c); + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ); +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ); + +void brw_clip_copy_colors( struct brw_clip_compile *c, + GLuint to, GLuint from ); + +void brw_clip_init_clipmask( struct brw_clip_compile *c ); + +struct brw_reg get_tmp( struct brw_clip_compile *c ); + +void brw_clip_project_position(struct brw_clip_compile *c, + struct brw_reg pos ); +void brw_clip_ff_sync(struct brw_clip_compile *c); +void brw_clip_init_ff_sync(struct brw_clip_compile *c); +#endif diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c new file mode 100644 index 0000000000..048ca620fa --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -0,0 +1,276 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) +{ + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < 4; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.t0 = brw_vec1_grf(i, 1); + c->reg.t1 = brw_vec1_grf(i, 2); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp1 = brw_vec1_grf(i, 4); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + if (c->need_ff_sync) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +/* Line clipping, more or less following the following algorithm: + * + * for (p=0;p t1) t1 = t; + * } else { + * GLfloat t = dp0 / (dp0 - dp1); + * if (t > t0) t0 = t; + * } + * + * if (t0 + t1 >= 1.0) + * return; + * } + * } + * + * interp( ctx, newvtx0, vtx0, vtx1, t0 ); + * interp( ctx, newvtx1, vtx1, vtx0, t1 ); + * + */ +static void clip_and_emit_line( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_indirect vtx0 = brw_indirect(0, 0); + struct brw_indirect vtx1 = brw_indirect(1, 0); + struct brw_indirect newvtx0 = brw_indirect(2, 0); + struct brw_indirect newvtx1 = brw_indirect(3, 0); + struct brw_indirect plane_ptr = brw_indirect(4, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *is_negative; + struct brw_instruction *is_neg2 = NULL; + struct brw_instruction *not_culled; + struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); + + brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2])); + brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3])); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + + /* Note: init t0, t1 together: + */ + brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); + + brw_clip_init_planes(c); + brw_clip_init_clipmask(c); + + /* -ve rhw workaround */ + if (BRW_IS_965(p->brw)) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); + } + + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + /* dp = DP4(vtx->position, plane) + */ + brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + + /* if (IS_NEGATIVE(dp1)) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + is_negative = brw_IF(p, BRW_EXECUTE_1); + { + /* + * Both can be negative on GM965/G965 due to RHW workaround + * if so, this object should be rejected. + */ + if (BRW_IS_965(p->brw)) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_neg2); + } + + brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); + brw_MOV(p, c->reg.t1, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + is_negative = brw_ELSE(p, is_negative); + { + /* Coming back in. We know that both cannot be negative + * because the line would have been culled in that case. + */ + + /* If both are positive, do nothing */ + /* Only on GM965/G965 */ + if (BRW_IS_965(p->brw)) { + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + } + + { + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + if (BRW_IS_965(p->brw)) { + brw_ENDIF(p, is_neg2); + } + } + brw_ENDIF(p, is_negative); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* while (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); + + brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); + not_culled = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE); + brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, GL_FALSE); + + brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, not_culled); + brw_clip_kill_thread(c); +} + + + +void brw_emit_line_clip( struct brw_clip_compile *c ) +{ + brw_clip_line_alloc_regs(c); + brw_clip_init_ff_sync(c); + + if (c->key.do_flat_shading) + brw_clip_copy_colors(c, 0, 1); + + clip_and_emit_line(c); +} diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c new file mode 100644 index 0000000000..8458f61c5a --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_point.c @@ -0,0 +1,56 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + +/* Point clipping, nothing to do? + */ +void brw_emit_point_clip( struct brw_clip_compile *c ) +{ + /* Send an empty message to kill the thread: + */ + brw_clip_tri_alloc_regs(c, 0); + brw_clip_init_ff_sync(c); + + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c new file mode 100644 index 0000000000..234b3744bf --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -0,0 +1,184 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "main/macros.h" + +struct brw_clip_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + unsigned int clip_mode; + + unsigned int curbe_offset; + + unsigned int nr_urb_entries, urb_size; + + GLboolean depth_clamp; +}; + +static void +clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_CLIP_PROG */ + key->total_grf = brw->clip.prog_data->total_grf; + key->urb_entry_read_length = brw->clip.prog_data->urb_read_length; + key->curb_entry_read_length = brw->clip.prog_data->curb_read_length; + key->clip_mode = brw->clip.prog_data->clip_mode; + + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.clip_start; + + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_clip_entries; + key->urb_size = brw->urb.vsize; + + /* _NEW_TRANSOFORM */ + key->depth_clamp = ctx->Transform.DepthClamp; +} + +static dri_bo * +clip_unit_create_from_key(struct brw_context *brw, + struct brw_clip_unit_key *key) +{ + struct brw_clip_unit_state clip; + dri_bo *bo; + + memset(&clip, 0, sizeof(clip)); + + clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + /* reloc */ + clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; + + clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + clip.thread1.single_program_flow = 1; + + clip.thread3.urb_entry_read_length = key->urb_entry_read_length; + clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + clip.thread3.dispatch_grf_start_reg = 1; + clip.thread3.urb_entry_read_offset = 0; + + clip.thread4.nr_urb_entries = key->nr_urb_entries; + clip.thread4.urb_entry_allocation_size = key->urb_size - 1; + /* If we have enough clip URB entries to run two threads, do so. + */ + if (key->nr_urb_entries >= 10) { + /* Half of the URB entries go to each thread, and it has to be an + * even number. + */ + assert(key->nr_urb_entries % 2 == 0); + + /* Although up to 16 concurrent Clip threads are allowed on IGDNG, + * only 2 threads can output VUEs at a time. + */ + if (BRW_IS_IGDNG(brw)) + clip.thread4.max_threads = 16 - 1; + else + clip.thread4.max_threads = 2 - 1; + } else { + assert(key->nr_urb_entries >= 5); + clip.thread4.max_threads = 1 - 1; + } + + if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + clip.thread4.max_threads = 0; + + if (INTEL_DEBUG & DEBUG_STATS) + clip.thread4.stats_enable = 1; + + clip.clip5.userclip_enable_flags = 0x7f; + clip.clip5.userclip_must_clip = 1; + clip.clip5.guard_band_enable = 0; + if (!key->depth_clamp) + clip.clip5.viewport_z_clip_enable = 1; + clip.clip5.viewport_xy_clip_enable = 1; + clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; + clip.clip5.api_mode = BRW_CLIP_API_OGL; + clip.clip5.clip_mode = key->clip_mode; + + if (BRW_IS_G4X(brw)) + clip.clip5.negative_w_clip_test = 1; + + clip.clip6.clipper_viewport_state_ptr = 0; + clip.viewport_xmin = -1; + clip.viewport_xmax = 1; + clip.viewport_ymin = -1; + clip.viewport_ymax = 1; + + bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, + key, sizeof(*key), + &brw->clip.prog_bo, 1, + &clip, sizeof(clip), + NULL, NULL); + + /* Emit clip program relocation */ + assert(brw->clip.prog_bo); + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, + clip.thread0.grf_reg_count << 1, + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_bo); + + return bo; +} + +static void upload_clip_unit( struct brw_context *brw ) +{ + struct brw_clip_unit_key key; + + clip_unit_populate_key(brw, &key); + + dri_bo_unreference(brw->clip.state_bo); + brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT, + &key, sizeof(key), + &brw->clip.prog_bo, 1, + NULL); + if (brw->clip.state_bo == NULL) { + brw->clip.state_bo = clip_unit_create_from_key(brw, &key); + } +} + +const struct brw_tracked_state brw_clip_unit = { + .dirty = { + .mesa = _NEW_TRANSFORM, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_CLIP_PROG + }, + .prepare = upload_clip_unit, +}; diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c new file mode 100644 index 0000000000..0efd77225e --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -0,0 +1,603 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + +static void release_tmps( struct brw_clip_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + GLuint nr_verts ) +{ + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + if (c->nr_attrs & 1) { + for (j = 0; j < 3; j++) { + GLuint delta = c->nr_attrs*16 + 32; + + if (BRW_IS_IGDNG(c->func.brw)) + delta = c->nr_attrs * 16 + 32 * 3; + + brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); + } + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D); + c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp = brw_vec1_grf(i, 4); + i++; + + c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + if (c->key.do_unfilled) { + c->reg.dir = brw_vec4_grf(i, 0); + c->reg.offset = brw_vec4_grf(i, 4); + i++; + c->reg.tmp0 = brw_vec4_grf(i, 0); + c->reg.tmp1 = brw_vec4_grf(i, 4); + i++; + } + + if (c->need_ff_sync) { + c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD); + i++; + } + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + struct brw_instruction *is_rev; + + /* Initial list of indices for incoming vertexes: + */ + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); + + /* XXX: Is there an easier way to do this? Need to reverse every + * second tristrip element: Can ignore sometimes? + */ + is_rev = brw_IF(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(-1)); + } + is_rev = brw_ELSE(p, is_rev); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(1)); + } + brw_ENDIF(p, is_rev); + + brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0)); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3)); +} + + + +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_copy_colors(c, 1, 0); + brw_clip_copy_colors(c, 2, 0); + } + is_poly = brw_ELSE(p, is_poly); + { + brw_clip_copy_colors(c, 0, 2); + brw_clip_copy_colors(c, 1, 2); + } + brw_ENDIF(p, is_poly); +} + + + +/* Use mesa's clipping algorithms, translated to GEN4 assembly. + */ +void brw_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_indirect vtx = brw_indirect(0, 0); + struct brw_indirect vtxPrev = brw_indirect(1, 0); + struct brw_indirect vtxOut = brw_indirect(2, 0); + struct brw_indirect plane_ptr = brw_indirect(3, 0); + struct brw_indirect inlist_ptr = brw_indirect(4, 0); + struct brw_indirect outlist_ptr = brw_indirect(5, 0); + struct brw_indirect freelist_ptr = brw_indirect(6, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *vertex_loop; + struct brw_instruction *next_test; + struct brw_instruction *prev_test; + + brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + + brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + /* vtxOut = freelist_ptr++ + */ + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); + brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); + + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); + + vertex_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* vtx = *input_ptr; + */ + brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); + + /* IS_NEGATIVE(prev) */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + prev_test = brw_IF(p, BRW_EXECUTE_1); + { + /* IS_POSITIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + + /* Coming back in. + */ + brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); + + /* If (vtxOut == 0) vtxOut = vtxPrev + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + + } + prev_test = brw_ELSE(p, prev_test); + { + /* *outlist_ptr++ = vtxPrev; + * nr_verts++; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + + /* IS_NEGATIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + /* Going out of bounds. Avoid division by zero as we + * know dp != dpPrev from DIFFERENT_SIGNS, above. + */ + brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); + + /* If (vtxOut == 0) vtxOut = vtx + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + } + brw_ENDIF(p, prev_test); + + /* vtxPrev = vtx; + * inlist_ptr++; + */ + brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); + brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); + + /* while (--loopcount != 0) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, vertex_loop); + + /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] + * inlist = outlist + * inlist_ptr = &inlist[0] + * outlist_ptr = &outlist[0] + */ + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); + brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); + brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* nr_verts >= 3 + */ + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + c->reg.nr_verts, + brw_imm_ud(3)); + + /* && (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); +} + + + +void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop, *if_insn; + + /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, + c->reg.loopcount, + c->reg.nr_verts, + brw_imm_d(-2)); + + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect vptr = brw_indirect(1, 0); + + brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + + brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END)); + } + brw_ENDIF(p, if_insn); +} + +static void do_clip_tri( struct brw_clip_compile *c ) +{ + brw_clip_init_planes(c); + + brw_clip_tri(c); +} + + +static void maybe_do_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + do_clip_tri(c); + } + brw_ENDIF(p, do_clip); +} + +static void brw_clip_test( struct brw_clip_compile *c ) +{ + struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + + struct brw_reg v0 = get_tmp(c); + struct brw_reg v1 = get_tmp(c); + struct brw_reg v2 = get_tmp(c); + + struct brw_indirect vt0 = brw_indirect(0, 0); + struct brw_indirect vt1 = brw_indirect(1, 0); + struct brw_indirect vt2 = brw_indirect(2, 0); + + struct brw_compile *p = &c->func; + struct brw_instruction *is_outside; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); + brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); + + /* test nearz, xmin, ymin plane */ + /* clip.xyz < -clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + is_outside = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_outside); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* some vertices are inside a plane, some are outside,need to clip */ + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* test farz, xmax, ymax plane */ + /* clip.xyz > clip.w */ + brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* All vertices are outside of a plane, rejected */ + brw_AND(p, t, t1, t2); + brw_AND(p, t, t, t3); + brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1)); + brw_OR(p, tmp0, tmp0, get_element(t, 2)); + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1)); + is_outside = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, is_outside); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* some vertices are inside a plane, some are outside,need to clip */ + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + brw_AND(p, t, t, brw_imm_ud(0x1)); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + release_tmps(c); +} + + +void brw_emit_tri_clip( struct brw_clip_compile *c ) +{ + struct brw_instruction *neg_rhw; + struct brw_compile *p = &c->func; + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + brw_clip_init_clipmask(c); + brw_clip_init_ff_sync(c); + + /* if -ve rhw workaround bit is set, + do cliptest */ + if (BRW_IS_965(p->brw)) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + neg_rhw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_test(c); + } + brw_ENDIF(p, neg_rhw); + } + /* Can't push into do_clip_tri because with polygon (or quad) + * flatshading, need to apply the flatshade here because we don't + * respect the PV when converting to trifan for emit: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) || + (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP)) + do_clip_tri(c); + else + maybe_do_clip_tri(c); + + brw_clip_tri_emit_polygon(c); + + /* Send an empty message to kill the thread: + */ + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c new file mode 100644 index 0000000000..ad1bfa435f --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -0,0 +1,505 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +/* This is performed against the original triangles, so no indirection + * required: +BZZZT! + */ +static void compute_tri_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg e = c->reg.tmp0; + struct brw_reg f = c->reg.tmp1; + struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); + + + struct brw_reg v0n = get_tmp(c); + struct brw_reg v1n = get_tmp(c); + struct brw_reg v2n = get_tmp(c); + + /* Convert to NDC. + * NOTE: We can't modify the original vertex coordinates, + * as it may impact further operations. + * So, we have to keep normalized coordinates in temp registers. + * + * TBD-KC + * Try to optimize unnecessary MOV's. + */ + brw_MOV(p, v0n, v0); + brw_MOV(p, v1n, v1); + brw_MOV(p, v2n, v2); + + brw_clip_project_position(c, v0n); + brw_clip_project_position(c, v1n); + brw_clip_project_position(c, v2n); + + /* Calculate the vectors of two edges of the triangle: + */ + brw_ADD(p, e, v0n, negate(v2n)); + brw_ADD(p, f, v1n, negate(v2n)); + + /* Take their crossproduct: + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); + brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); + brw_set_access_mode(p, BRW_ALIGN_1); + + brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); +} + + +static void cull_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + GLuint conditional; + + assert (!(c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL)); + + if (c->key.fill_ccw == CLIP_CULL) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, ccw); +} + + + +static void copy_bfc( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + GLuint conditional; + + /* Do we have any colors to copy? + */ + if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) && + !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])) + return; + + /* In some wierd degnerate cases we can end up testing the + * direction twice, once for culling and once for bfc copying. Oh + * well, that's what you get for setting wierd GL state. + */ + if (c->key.copy_bfc_ccw) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + GLuint i; + + for (i = 0; i < 3; i++) { + if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]), + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0])); + + if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]), + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1])); + } + } + brw_ENDIF(p, ccw); +} + + + + +/* + GLfloat iz = 1.0 / dir.z; + GLfloat ac = dir.x * iz; + GLfloat bc = dir.y * iz; + offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; + offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; + offset *= MRD; +*/ +static void compute_offset( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg off = c->reg.offset; + struct brw_reg dir = c->reg.dir; + + brw_math_invert(p, get_element(off, 2), get_element(dir, 2)); + brw_MUL(p, vec2(off), dir, get_element(off, 2)); + + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + brw_abs(get_element(off, 0)), + brw_abs(get_element(off, 1))); + + brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor)); + brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units)); +} + + +static void merge_edgeflags( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + /* Get away with using reg.vertex because we know that this is not + * a _3DPRIM_TRISTRIP_REVERSE: + */ + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); + brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); + brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_poly); +} + + + +static void apply_one_offset( struct brw_clip_compile *c, + struct brw_indirect vert ) +{ + struct brw_compile *p = &c->func; + struct brw_reg z = deref_1f(vert, c->header_position_offset + + 2 * type_sz(BRW_REGISTER_TYPE_F)); + + brw_ADD(p, z, z, vec1(c->reg.offset)); +} + + + +/*********************************************************************** + * Output clipped polygon as an unfilled primitive: + */ +static void emit_lines(struct brw_clip_compile *c, + GLboolean do_offset) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_edge; + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v1 = brw_indirect(1, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + struct brw_indirect v1ptr = brw_indirect(3, 0); + + /* Need a seperate loop for offset: + */ + if (do_offset) { + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + apply_one_offset(c, v0); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + } + + /* v1ptr = &inlist[nr_verts] + * *v1ptr = v0 + */ + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw edge if edgeflag != 0 */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + brw_imm_f(0)); + draw_edge = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, draw_edge); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + +static void emit_points(struct brw_clip_compile *c, + GLboolean do_offset ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_point; + + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw if edgeflag != 0 + */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + brw_imm_f(0)); + draw_point = brw_IF(p, BRW_EXECUTE_1); + { + if (do_offset) + apply_one_offset(c, v0); + + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END); + } + brw_ENDIF(p, draw_point); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + + + + + +static void emit_primitives( struct brw_clip_compile *c, + GLuint mode, + GLboolean do_offset ) +{ + switch (mode) { + case CLIP_FILL: + brw_clip_tri_emit_polygon(c); + break; + + case CLIP_LINE: + emit_lines(c, do_offset); + break; + + case CLIP_POINT: + emit_points(c, do_offset); + break; + + case CLIP_CULL: + assert(0); + break; + } +} + + + +static void emit_unfilled_primitives( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + + /* Direction culling has already been done. + */ + if (c->key.fill_ccw != c->key.fill_cw && + c->key.fill_ccw != CLIP_CULL && + c->key.fill_cw != CLIP_CULL) + { + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } + ccw = brw_ELSE(p, ccw); + { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + brw_ENDIF(p, ccw); + } + else if (c->key.fill_cw != CLIP_CULL) { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + else if (c->key.fill_ccw != CLIP_CULL) { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } +} + + + + +static void check_nr_verts( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3)); + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, if_insn); +} + + +void brw_emit_unfilled_clip( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + + c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) || + (c->key.fill_ccw != c->key.fill_cw) || + c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL || + c->key.copy_bfc_cw || + c->key.copy_bfc_ccw); + + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + brw_clip_init_ff_sync(c); + + assert(c->offset[VERT_RESULT_EDGE]); + + if (c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL) { + brw_clip_kill_thread(c); + return; + } + + merge_edgeflags(c); + + /* Need to use the inlist indirection here: + */ + if (c->need_direction) + compute_tri_direction(c); + + if (c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL) + cull_direction(c); + + if (c->key.offset_ccw || + c->key.offset_cw) + compute_offset(c); + + if (c->key.copy_bfc_ccw || + c->key.copy_bfc_cw) + copy_bfc(c); + + /* Need to do this whether we clip or not: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + brw_clip_init_clipmask(c); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_init_planes(c); + brw_clip_tri(c); + check_nr_verts(c); + } + brw_ENDIF(p, do_clip); + + emit_unfilled_primitives(c); + brw_clip_kill_thread(c); +} + + + diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c new file mode 100644 index 0000000000..5a73abdfee --- /dev/null +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -0,0 +1,396 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/program.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + + +struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + + +static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w) +{ + return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x); +} + + +void brw_clip_init_planes( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + + if (!c->key.nr_userclip) { + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1)); + } +} + + + +#define W 3 + +/* Project 'pos' to screen space (or back again), overwrite with results: + */ +void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) +{ + struct brw_compile *p = &c->func; + + /* calc rhw + */ + brw_math_invert(p, get_element(pos, W), get_element(pos, W)); + + /* value.xyz *= value.rhw + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); + brw_set_access_mode(p, BRW_ALIGN_1); +} + + +static void brw_clip_project_vertex( struct brw_clip_compile *c, + struct brw_indirect vert_addr ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + + /* Fixup position. Extract from the original vertex and re-project + * to screen space: + */ + brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS])); + brw_clip_project_position(c, tmp); + brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); + + release_tmp(c, tmp); +} + + + + +/* Interpolate between two vertices and put the result into a0.0. + * Increment a0.0 accordingly. + */ +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + GLboolean force_edgeflag) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + GLuint i; + + /* Just copy the vertex header: + */ + /* + * After CLIP stage, only first 256 bits of the VUE are read + * back on IGDNG, so needn't change it + */ + brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); + + /* Iterate over each attribute (could be done in pairs?) + */ + for (i = 0; i < c->nr_attrs; i++) { + GLuint delta = i*16 + 32; + + if (BRW_IS_IGDNG(p->brw)) + delta = i * 16 + 32 * 3; + + if (delta == c->offset[VERT_RESULT_EDGE]) { + if (force_edgeflag) + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); + else + brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); + } + else { + /* Interpolate: + * + * New = attr0 + t*attr1 - t*attr0 + */ + brw_MUL(p, + vec4(brw_null_reg()), + deref_4f(v1_ptr, delta), + t0); + + brw_MAC(p, + tmp, + negate(deref_4f(v0_ptr, delta)), + t0); + + brw_ADD(p, + deref_4f(dest_ptr, delta), + deref_4f(v0_ptr, delta), + tmp); + } + } + + if (i & 1) { + GLuint delta = i*16 + 32; + + if (BRW_IS_IGDNG(p->brw)) + delta = i * 16 + 32 * 3; + + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); + } + + release_tmp(c, tmp); + + /* Recreate the projected (NDC) coordinate in the new vertex + * header: + */ + brw_clip_project_vertex(c, dest_ptr ); +} + + + + +#define MAX_MRF 16 + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + GLboolean allocate, + GLboolean eot, + GLuint header) +{ + struct brw_compile *p = &c->func; + GLuint start = c->last_mrf; + + brw_clip_ff_sync(c); + + assert(!(allocate && eot)); + + /* Cycle through mrf regs - probably futile as we have to wait for + * the allocation response anyway. Also, the order this function + * is invoked doesn't correspond to the order the instructions will + * be executed, so it won't have any effect in many cases. + */ +#if 0 + if (start + c->nr_regs + 1 >= MAX_MRF) + start = 0; + + c->last_mrf = start + c->nr_regs + 1; +#endif + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs); + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + + /* Send each vertex as a seperate write to the urb. This + * is different to the concept in brw_sf_emit.c, where + * subsequent writes are used to build up a single urb + * entry. Each of these writes instantiates a seperate + * urb entry - (I think... what about 'allocate'?) + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + start, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response_length */ + eot, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_clip_kill_thread(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + + brw_clip_ff_sync(c); + /* Send an empty message to kill the thread and release any + * allocated urb entry: + */ + brw_urb_WRITE(p, + retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + 0, /* allocate */ + 0, /* used */ + 1, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, + BRW_URB_SWIZZLE_NONE); +} + + + + +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) +{ + return brw_address(c->reg.fixed_planes); +} + + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ) +{ + if (c->key.nr_userclip) { + return brw_imm_uw(16); + } + else { + return brw_imm_uw(4); + } +} + + +/* If flatshading, distribute color from provoking vertex prior to + * clipping. + */ +void brw_clip_copy_colors( struct brw_clip_compile *c, + GLuint to, GLuint from ) +{ + struct brw_compile *p = &c->func; + + if (c->offset[VERT_RESULT_COL0]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0])); + + if (c->offset[VERT_RESULT_COL1]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1])); + + if (c->offset[VERT_RESULT_BFC0]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0])); + + if (c->offset[VERT_RESULT_BFC1]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1])); +} + + + +void brw_clip_init_clipmask( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg incoming = get_element_ud(c->reg.R0, 2); + + /* Shift so that lowest outcode bit is rightmost: + */ + brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); + + if (c->key.nr_userclip) { + struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); + + /* Rearrange userclip outcodes so that they come directly after + * the fixed plane bits. + */ + brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); + brw_SHR(p, tmp, tmp, brw_imm_ud(8)); + brw_OR(p, c->reg.planemask, c->reg.planemask, tmp); + + release_tmp(c, tmp); + } +} + +void brw_clip_ff_sync(struct brw_clip_compile *c) +{ + if (c->need_ff_sync) { + struct brw_compile *p = &c->func; + struct brw_instruction *need_ff_sync; + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1)); + need_ff_sync = brw_IF(p, BRW_EXECUTE_1); + { + brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, + 1, /* used */ + 1, /* msg length */ + 1, /* response length */ + 0, /* eot */ + 1, /* write compelete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); + } + brw_ENDIF(p, need_ff_sync); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } +} + +void brw_clip_init_ff_sync(struct brw_clip_compile *c) +{ + if (c->need_ff_sync) { + struct brw_compile *p = &c->func; + + brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0)); + } +} diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c new file mode 100644 index 0000000000..c300c33adc --- /dev/null +++ b/src/gallium/drivers/i965/brw_context.c @@ -0,0 +1,173 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/imports.h" +#include "main/api_noop.h" +#include "main/macros.h" +#include "main/vtxfmt.h" +#include "main/simple_list.h" +#include "shader/shader_api.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_draw.h" +#include "brw_state.h" +#include "brw_vs.h" +#include "intel_tex.h" +#include "intel_blit.h" +#include "intel_batchbuffer.h" +#include "intel_pixel.h" +#include "intel_span.h" +#include "tnl/t_pipeline.h" + +#include "utils.h" + + +/*************************************** + * Mesa's Driver Functions + ***************************************/ + +static void brwUseProgram(GLcontext *ctx, GLuint program) +{ + _mesa_use_program(ctx, program); +} + +static void brwInitProgFuncs( struct dd_function_table *functions ) +{ + functions->UseProgram = brwUseProgram; +} +static void brwInitDriverFunctions( struct dd_function_table *functions ) +{ + intelInitDriverFunctions( functions ); + + brwInitFragProgFuncs( functions ); + brwInitProgFuncs( functions ); + brw_init_queryobj_functions(functions); + + functions->Viewport = intel_viewport; +} + +GLboolean brwCreateContext( const __GLcontextModes *mesaVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate) +{ + struct dd_function_table functions; + struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + + if (!brw) { + _mesa_printf("%s: failed to alloc context\n", __FUNCTION__); + return GL_FALSE; + } + + brwInitVtbl( brw ); + brwInitDriverFunctions( &functions ); + + if (!intelInitContext( intel, mesaVis, driContextPriv, + sharedContextPrivate, &functions )) { + _mesa_printf("%s: failed to init intel context\n", __FUNCTION__); + FREE(brw); + return GL_FALSE; + } + + /* Initialize swrast, tnl driver tables: */ + intelInitSpanFuncs(ctx); + + TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; + + ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; + ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ + ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, + ctx->Const.MaxTextureImageUnits); + ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ + + /* Mesa limits textures to 4kx4k; it would be nice to fix that someday + */ + ctx->Const.MaxTextureLevels = 13; + ctx->Const.Max3DTextureLevels = 9; + ctx->Const.MaxCubeTextureLevels = 12; + ctx->Const.MaxTextureRectSize = (1<<12); + + ctx->Const.MaxTextureMaxAnisotropy = 16.0; + + /* if conformance mode is set, swrast can handle any size AA point */ + ctx->Const.MaxPointSizeAA = 255.0; + + /* We want the GLSL compiler to emit code that uses condition codes */ + ctx->Shader.EmitCondCodes = GL_TRUE; + ctx->Shader.EmitNVTempInitialization = GL_TRUE; + + ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); + ctx->Const.VertexProgram.MaxAluInstructions = 0; + ctx->Const.VertexProgram.MaxTexInstructions = 0; + ctx->Const.VertexProgram.MaxTexIndirections = 0; + ctx->Const.VertexProgram.MaxNativeAluInstructions = 0; + ctx->Const.VertexProgram.MaxNativeTexInstructions = 0; + ctx->Const.VertexProgram.MaxNativeTexIndirections = 0; + ctx->Const.VertexProgram.MaxNativeAttribs = 16; + ctx->Const.VertexProgram.MaxNativeTemps = 256; + ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; + ctx->Const.VertexProgram.MaxNativeParameters = 1024; + ctx->Const.VertexProgram.MaxEnvParams = + MIN2(ctx->Const.VertexProgram.MaxNativeParameters, + ctx->Const.VertexProgram.MaxEnvParams); + + ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024); + ctx->Const.FragmentProgram.MaxNativeAttribs = 12; + ctx->Const.FragmentProgram.MaxNativeTemps = 256; + ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; + ctx->Const.FragmentProgram.MaxNativeParameters = 1024; + ctx->Const.FragmentProgram.MaxEnvParams = + MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, + ctx->Const.FragmentProgram.MaxEnvParams); + + brw_init_state( brw ); + + brw->state.dirty.mesa = ~0; + brw->state.dirty.brw = ~0; + + brw->emit_state_always = 0; + + ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; + + make_empty_list(&brw->query.active_head); + + brw_draw_init( brw ); + + return GL_TRUE; +} + diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h new file mode 100644 index 0000000000..fa3e32c7ff --- /dev/null +++ b/src/gallium/drivers/i965/brw_context.h @@ -0,0 +1,767 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRWCONTEXT_INC +#define BRWCONTEXT_INC + +#include "intel_context.h" +#include "brw_structs.h" +#include "main/imports.h" + + +/* Glossary: + * + * URB - uniform resource buffer. A mid-sized buffer which is + * partitioned between the fixed function units and used for passing + * values (vertices, primitives, constants) between them. + * + * CURBE - constant URB entry. An urb region (entry) used to hold + * constant values which the fixed function units can be instructed to + * preload into the GRF when spawning a thread. + * + * VUE - vertex URB entry. An urb entry holding a vertex and usually + * a vertex header. The header contains control information and + * things like primitive type, Begin/end flags and clip codes. + * + * PUE - primitive URB entry. An urb entry produced by the setup (SF) + * unit holding rasterization and interpolation parameters. + * + * GRF - general register file. One of several register files + * addressable by programmed threads. The inputs (r0, payload, curbe, + * urb) of the thread are preloaded to this area before the thread is + * spawned. The registers are individually 8 dwords wide and suitable + * for general usage. Registers holding thread input values are not + * special and may be overwritten. + * + * MRF - message register file. Threads communicate (and terminate) + * by sending messages. Message parameters are placed in contiguous + * MRF registers. All program output is via these messages. URB + * entries are populated by sending a message to the shared URB + * function containing the new data, together with a control word, + * often an unmodified copy of R0. + * + * R0 - GRF register 0. Typically holds control information used when + * sending messages to other threads. + * + * EU or GEN4 EU: The name of the programmable subsystem of the + * i965 hardware. Threads are executed by the EU, the registers + * described above are part of the EU architecture. + * + * Fixed function units: + * + * CS - Command streamer. Notional first unit, little software + * interaction. Holds the URB entries used for constant data, ie the + * CURBEs. + * + * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of + * this unit is responsible for pulling vertices out of vertex buffers + * in vram and injecting them into the processing pipe as VUEs. If + * enabled, it first passes them to a VS thread which is a good place + * for the driver to implement any active vertex shader. + * + * GS - Geometry Shader. This corresponds to a new DX10 concept. If + * enabled, incoming strips etc are passed to GS threads in individual + * line/triangle/point units. The GS thread may perform arbitary + * computation and emit whatever primtives with whatever vertices it + * chooses. This makes GS an excellent place to implement GL's + * unfilled polygon modes, though of course it is capable of much + * more. Additionally, GS is used to translate away primitives not + * handled by latter units, including Quads and Lineloops. + * + * CS - Clipper. Mesa's clipping algorithms are imported to run on + * this unit. The fixed function part performs cliptesting against + * the 6 fixed clipplanes and makes descisions on whether or not the + * incoming primitive needs to be passed to a thread for clipping. + * User clip planes are handled via cooperation with the VS thread. + * + * SF - Strips Fans or Setup: Triangles are prepared for + * rasterization. Interpolation coefficients are calculated. + * Flatshading and two-side lighting usually performed here. + * + * WM - Windower. Interpolation of vertex attributes performed here. + * Fragment shader implemented here. SIMD aspects of EU taken full + * advantage of, as pixels are processed in blocks of 16. + * + * CC - Color Calculator. No EU threads associated with this unit. + * Handles blending and (presumably) depth and stencil testing. + */ + +#define BRW_FALLBACK_TEXTURE 0x1 +#define BRW_MAX_CURBE (32*16) + +struct brw_context; + +#define BRW_NEW_URB_FENCE 0x1 +#define BRW_NEW_FRAGMENT_PROGRAM 0x2 +#define BRW_NEW_VERTEX_PROGRAM 0x4 +#define BRW_NEW_INPUT_DIMENSIONS 0x8 +#define BRW_NEW_CURBE_OFFSETS 0x10 +#define BRW_NEW_REDUCED_PRIMITIVE 0x20 +#define BRW_NEW_PRIMITIVE 0x40 +#define BRW_NEW_CONTEXT 0x80 +#define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 +#define BRW_NEW_PSP 0x800 +#define BRW_NEW_WM_SURFACES 0x1000 +#define BRW_NEW_FENCE 0x2000 +#define BRW_NEW_INDICES 0x4000 +#define BRW_NEW_VERTICES 0x8000 +/** + * Used for any batch entry with a relocated pointer that will be used + * by any 3D rendering. + */ +#define BRW_NEW_BATCH 0x10000 +/** brw->depth_region updated */ +#define BRW_NEW_DEPTH_BUFFER 0x20000 +#define BRW_NEW_NR_WM_SURFACES 0x40000 +#define BRW_NEW_NR_VS_SURFACES 0x80000 +#define BRW_NEW_INDEX_BUFFER 0x100000 + +struct brw_state_flags { + /** State update flags signalled by mesa internals */ + GLuint mesa; + /** + * State update flags signalled as the result of brw_tracked_state updates + */ + GLuint brw; + /** State update flags signalled by brw_state_cache.c searches */ + GLuint cache; +}; + + +/** Subclass of Mesa vertex program */ +struct brw_vertex_program { + struct gl_vertex_program program; + GLuint id; + dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; +}; + + +/** Subclass of Mesa fragment program */ +struct brw_fragment_program { + struct gl_fragment_program program; + GLuint id; /**< serial no. to identify frag progs, never re-used */ + GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ + + dri_bo *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; + + /** for debugging, which texture units are referenced */ + GLbitfield tex_units_used; +}; + + +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ +struct brw_wm_prog_data { + GLuint curb_read_length; + GLuint urb_read_length; + + GLuint first_curbe_grf; + GLuint total_grf; + GLuint total_scratch; + + GLuint nr_params; /**< number of float params/constants */ + GLboolean error; + + /* Pointer to tracked values (only valid once + * _mesa_load_state_parameters has been called at runtime). + */ + const GLfloat *param[BRW_MAX_CURBE]; +}; + +struct brw_sf_prog_data { + GLuint urb_read_length; + GLuint total_grf; + + /* Each vertex may have upto 12 attributes, 4 components each, + * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 + * rows. + * + * Actually we use 4 for each, so call it 12 rows. + */ + GLuint urb_entry_size; +}; + +struct brw_clip_prog_data { + GLuint curb_read_length; /* user planes? */ + GLuint clip_mode; + GLuint urb_read_length; + GLuint total_grf; +}; + +struct brw_gs_prog_data { + GLuint urb_read_length; + GLuint total_grf; +}; + +struct brw_vs_prog_data { + GLuint curb_read_length; + GLuint urb_read_length; + GLuint total_grf; + GLuint outputs_written; + GLuint nr_params; /**< number of float params/constants */ + + GLuint inputs_read; + + /* Used for calculating urb partitions: + */ + GLuint urb_entry_size; +}; + + +/* Size == 0 if output either not written, or always [0,0,0,1] + */ +struct brw_vs_ouput_sizes { + GLubyte output_size[VERT_RESULT_MAX]; +}; + + +/** Number of texture sampler units */ +#define BRW_MAX_TEX_UNIT 16 + +/** + * Size of our surface binding table for the WM. + * This contains pointers to the drawing surfaces and current texture + * objects and shader constant buffers (+2). + */ +#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) + +/** + * Helpers to convert drawing buffers, textures and constant buffers + * to surface binding table indexes, for WM. + */ +#define SURF_INDEX_DRAW(d) (d) +#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) +#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t)) + +/** + * Size of surface binding table for the VS. + * Only one constant buffer for now. + */ +#define BRW_VS_MAX_SURF 1 + +/** + * Only a VS constant buffer + */ +#define SURF_INDEX_VERT_CONST_BUFFER 0 + + +enum brw_cache_id { + BRW_CC_VP, + BRW_CC_UNIT, + BRW_WM_PROG, + BRW_SAMPLER_DEFAULT_COLOR, + BRW_SAMPLER, + BRW_WM_UNIT, + BRW_SF_PROG, + BRW_SF_VP, + BRW_SF_UNIT, + BRW_VS_UNIT, + BRW_VS_PROG, + BRW_GS_UNIT, + BRW_GS_PROG, + BRW_CLIP_VP, + BRW_CLIP_UNIT, + BRW_CLIP_PROG, + BRW_SS_SURFACE, + BRW_SS_SURF_BIND, + + BRW_MAX_CACHE +}; + +struct brw_cache_item { + /** + * Effectively part of the key, cache_id identifies what kind of state + * buffer is involved, and also which brw->state.dirty.cache flag should + * be set when this cache item is chosen. + */ + enum brw_cache_id cache_id; + /** 32-bit hash of the key data */ + GLuint hash; + GLuint key_size; /* for variable-sized keys */ + const void *key; + dri_bo **reloc_bufs; + GLuint nr_reloc_bufs; + + dri_bo *bo; + GLuint data_size; + + struct brw_cache_item *next; +}; + + + +struct brw_cache { + struct brw_context *brw; + + struct brw_cache_item **items; + GLuint size, n_items; + + GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */ + GLuint aux_size[BRW_MAX_CACHE]; + char *name[BRW_MAX_CACHE]; + + /* Record of the last BOs chosen for each cache_id. Used to set + * brw->state.dirty.cache when a new cache item is chosen. + */ + dri_bo *last_bo[BRW_MAX_CACHE]; +}; + + +/* Considered adding a member to this struct to document which flags + * an update might raise so that ordering of the state atoms can be + * checked or derived at runtime. Dropped the idea in favor of having + * a debug mode where the state is monitored for flags which are + * raised that have already been tested against. + */ +struct brw_tracked_state { + struct brw_state_flags dirty; + void (*prepare)( struct brw_context *brw ); + void (*emit)( struct brw_context *brw ); +}; + +/* Flags for brw->state.cache. + */ +#define CACHE_NEW_CC_VP (1< 32. Wouldn't life + * be easier if C allowed arrays of packed elements? + */ +#define ATTRIB_BIT_DWORDS ((VERT_ATTRIB_MAX+31)/32) + +struct brw_vertex_element { + const struct gl_client_array *glarray; + + /** The corresponding Mesa vertex attribute */ + gl_vert_attrib attrib; + /** Size of a complete element */ + GLuint element_size; + /** Number of uploaded elements for this input. */ + GLuint count; + /** Byte stride between elements in the uploaded array */ + GLuint stride; + /** Offset of the first element within the buffer object */ + unsigned int offset; + /** Buffer object containing the uploaded vertex data */ + dri_bo *bo; +}; + + + +struct brw_vertex_info { + GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */ +}; + + + + +/* Cache for TNL programs. + */ +struct brw_tnl_cache_item { + GLuint hash; + void *key; + void *data; + struct brw_tnl_cache_item *next; +}; + +struct brw_tnl_cache { + struct brw_tnl_cache_item **items; + GLuint size, n_items; +}; + +struct brw_query_object { + struct gl_query_object Base; + + /** Doubly linked list of active query objects in the context. */ + struct brw_query_object *prev, *next; + + /** Last query BO associated with this query. */ + dri_bo *bo; + /** First index in bo with query data for this object. */ + int first_index; + /** Last index in bo with query data for this object. */ + int last_index; + + /* Total count of pixels from previous BOs */ + unsigned int count; +}; + + +/** + * brw_context is derived from intel_context. + */ +struct brw_context +{ + struct intel_context intel; /**< base class, must be first field */ + GLuint primitive; + + GLboolean emit_state_always; + GLboolean tmp_fallback; + GLboolean no_batch_wrap; + + struct { + struct brw_state_flags dirty; + + GLuint nr_color_regions; + struct intel_region *color_regions[MAX_DRAW_BUFFERS]; + struct intel_region *depth_region; + + /** + * List of buffers accumulated in brw_validate_state to receive + * dri_bo_check_aperture treatment before exec, so we can know if we + * should flush the batch and try again before emitting primitives. + * + * This can be a fixed number as we only have a limited number of + * objects referenced from the batchbuffer in a primitive emit, + * consisting of the vertex buffers, pipelined state pointers, + * the CURBE, the depth buffer, and a query BO. + */ + dri_bo *validated_bos[VERT_ATTRIB_MAX + 16]; + int validated_bo_count; + } state; + + struct brw_cache cache; /** non-surface items */ + struct brw_cache surface_cache; /* surface items */ + struct brw_cached_batch_item *cached_batch_items; + + struct { + struct brw_vertex_element inputs[VERT_ATTRIB_MAX]; + + struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; + GLuint nr_enabled; + +#define BRW_NR_UPLOAD_BUFS 17 +#define BRW_UPLOAD_INIT_SIZE (128*1024) + + struct { + dri_bo *bo; + GLuint offset; + } upload; + + /* Summary of size and varying of active arrays, so we can check + * for changes to this state: + */ + struct brw_vertex_info info; + unsigned int min_index, max_index; + } vb; + + struct { + /** + * Index buffer for this draw_prims call. + * + * Updates are signaled by BRW_NEW_INDICES. + */ + const struct _mesa_index_buffer *ib; + + /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */ + dri_bo *bo; + unsigned int offset; + unsigned int size; + /* Offset to index buffer index to use in CMD_3D_PRIM so that we can + * avoid re-uploading the IB packet over and over if we're actually + * referencing the same index buffer. + */ + unsigned int start_vertex_offset; + } ib; + + /* Active vertex program: + */ + const struct gl_vertex_program *vertex_program; + const struct gl_fragment_program *fragment_program; + + + /* For populating the gtt: + */ + GLuint next_free_page; + + + /* BRW_NEW_URB_ALLOCATIONS: + */ + struct { + GLuint vsize; /* vertex size plus header in urb registers */ + GLuint csize; /* constant buffer size in urb registers */ + GLuint sfsize; /* setup data size in urb registers */ + + GLboolean constrained; + + GLuint nr_vs_entries; + GLuint nr_gs_entries; + GLuint nr_clip_entries; + GLuint nr_sf_entries; + GLuint nr_cs_entries; + +/* GLuint vs_size; */ +/* GLuint gs_size; */ +/* GLuint clip_size; */ +/* GLuint sf_size; */ +/* GLuint cs_size; */ + + GLuint vs_start; + GLuint gs_start; + GLuint clip_start; + GLuint sf_start; + GLuint cs_start; + } urb; + + + /* BRW_NEW_CURBE_OFFSETS: + */ + struct { + GLuint wm_start; /**< pos of first wm const in CURBE buffer */ + GLuint wm_size; /**< number of float[4] consts, multiple of 16 */ + GLuint clip_start; + GLuint clip_size; + GLuint vs_start; + GLuint vs_size; + GLuint total_size; + + dri_bo *curbe_bo; + /** Offset within curbe_bo of space for current curbe entry */ + GLuint curbe_offset; + /** Offset within curbe_bo of space for next curbe entry */ + GLuint curbe_next_offset; + + GLfloat *last_buf; + GLuint last_bufsz; + /** + * Whether we should create a new bo instead of reusing the old one + * (if we just dispatch the batch pointing at the old one. + */ + GLboolean need_new_bo; + } curbe; + + struct { + struct brw_vs_prog_data *prog_data; + + dri_bo *prog_bo; + dri_bo *state_bo; + + /** Binding table of pointers to surf_bo entries */ + dri_bo *bind_bo; + dri_bo *surf_bo[BRW_VS_MAX_SURF]; + GLuint nr_surfaces; + } vs; + + struct { + struct brw_gs_prog_data *prog_data; + + GLboolean prog_active; + dri_bo *prog_bo; + dri_bo *state_bo; + } gs; + + struct { + struct brw_clip_prog_data *prog_data; + + dri_bo *prog_bo; + dri_bo *state_bo; + dri_bo *vp_bo; + } clip; + + + struct { + struct brw_sf_prog_data *prog_data; + + dri_bo *prog_bo; + dri_bo *state_bo; + dri_bo *vp_bo; + } sf; + + struct { + struct brw_wm_prog_data *prog_data; + struct brw_wm_compile *compile_data; + + /** Input sizes, calculated from active vertex program. + * One bit per fragment program input attribute. + */ + GLbitfield input_size_masks[4]; + + /** Array of surface default colors (texture border color) */ + dri_bo *sdc_bo[BRW_MAX_TEX_UNIT]; + + GLuint render_surf; + GLuint nr_surfaces; + + GLuint max_threads; + dri_bo *scratch_bo; + + GLuint sampler_count; + dri_bo *sampler_bo; + + /** Binding table of pointers to surf_bo entries */ + dri_bo *bind_bo; + dri_bo *surf_bo[BRW_WM_MAX_SURF]; + + dri_bo *prog_bo; + dri_bo *state_bo; + } wm; + + + struct { + dri_bo *prog_bo; + dri_bo *state_bo; + dri_bo *vp_bo; + } cc; + + struct { + struct brw_query_object active_head; + dri_bo *bo; + int index; + GLboolean active; + } query; + /* Used to give every program string a unique id + */ + GLuint program_id; +}; + + +#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) + + + +/*====================================================================== + * brw_vtbl.c + */ +void brwInitVtbl( struct brw_context *brw ); + +/*====================================================================== + * brw_context.c + */ +GLboolean brwCreateContext( const __GLcontextModes *mesaVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate); + +/*====================================================================== + * brw_queryobj.c + */ +void brw_init_queryobj_functions(struct dd_function_table *functions); +void brw_prepare_query_begin(struct brw_context *brw); +void brw_emit_query_begin(struct brw_context *brw); +void brw_emit_query_end(struct brw_context *brw); + +/*====================================================================== + * brw_state_dump.c + */ +void brw_debug_batch(struct intel_context *intel); + +/*====================================================================== + * brw_tex.c + */ +void brw_validate_textures( struct brw_context *brw ); + + +/*====================================================================== + * brw_program.c + */ +void brwInitFragProgFuncs( struct dd_function_table *functions ); + + +/* brw_urb.c + */ +void brw_upload_urb_fence(struct brw_context *brw); + +/* brw_curbe.c + */ +void brw_upload_cs_urb_state(struct brw_context *brw); + +/* brw_disasm.c */ +int brw_disasm (FILE *file, struct brw_instruction *inst); + +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct brw_context * +brw_context( GLcontext *ctx ) +{ + return (struct brw_context *)ctx; +} + +static INLINE struct brw_vertex_program * +brw_vertex_program(struct gl_vertex_program *p) +{ + return (struct brw_vertex_program *) p; +} + +static INLINE const struct brw_vertex_program * +brw_vertex_program_const(const struct gl_vertex_program *p) +{ + return (const struct brw_vertex_program *) p; +} + +static INLINE struct brw_fragment_program * +brw_fragment_program(struct gl_fragment_program *p) +{ + return (struct brw_fragment_program *) p; +} + +static INLINE const struct brw_fragment_program * +brw_fragment_program_const(const struct gl_fragment_program *p) +{ + return (const struct brw_fragment_program *) p; +} + + + +#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) + +#endif + diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c new file mode 100644 index 0000000000..4be6c77aa1 --- /dev/null +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -0,0 +1,376 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + + +#include "main/glheader.h" +#include "main/context.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_statevars.h" +#include "intel_batchbuffer.h" +#include "intel_regions.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_util.h" + + +/** + * Partition the CURBE between the various users of constant values: + * Note that vertex and fragment shaders can now fetch constants out + * of constant buffers. We no longer allocatea block of the GRF for + * constants. That greatly reduces the demand for space in the CURBE. + * Some of the comments within are dated... + */ +static void calculate_curbe_offsets( struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + /* CACHE_NEW_WM_PROG */ + const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; + + /* BRW_NEW_VERTEX_PROGRAM */ + const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16; + GLuint nr_clip_regs = 0; + GLuint total_regs; + + /* _NEW_TRANSFORM */ + if (ctx->Transform.ClipPlanesEnabled) { + GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled); + nr_clip_regs = (nr_planes * 4 + 15) / 16; + } + + + total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; + + /* This can happen - what to do? Probably rather than falling + * back, the best thing to do is emit programs which code the + * constants as immediate values. Could do this either as a static + * cap on WM and VS, or adaptively. + * + * Unfortunately, this is currently dependent on the results of the + * program generation process (in the case of wm), so this would + * introduce the need to re-generate programs in the event of a + * curbe allocation failure. + */ + /* Max size is 32 - just large enough to + * hold the 128 parameters allowed by + * the fragment and vertex program + * api's. It's not clear what happens + * when both VP and FP want to use 128 + * parameters, though. + */ + assert(total_regs <= 32); + + /* Lazy resize: + */ + if (nr_fp_regs > brw->curbe.wm_size || + nr_vp_regs > brw->curbe.vs_size || + nr_clip_regs != brw->curbe.clip_size || + (total_regs < brw->curbe.total_size / 4 && + brw->curbe.total_size > 16)) { + + GLuint reg = 0; + + /* Calculate a new layout: + */ + reg = 0; + brw->curbe.wm_start = reg; + brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; + brw->curbe.clip_start = reg; + brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; + brw->curbe.vs_start = reg; + brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; + brw->curbe.total_size = reg; + + if (0) + _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", + brw->curbe.wm_start, + brw->curbe.wm_size, + brw->curbe.clip_start, + brw->curbe.clip_size, + brw->curbe.vs_start, + brw->curbe.vs_size ); + + brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; + } +} + + +const struct brw_tracked_state brw_curbe_offsets = { + .dirty = { + .mesa = _NEW_TRANSFORM, + .brw = BRW_NEW_VERTEX_PROGRAM, + .cache = CACHE_NEW_WM_PROG + }, + .prepare = calculate_curbe_offsets +}; + + + + +/* Define the number of curbes within CS's urb allocation. Multiple + * urb entries -> multiple curbes. These will be used by + * fixed-function hardware in a double-buffering scheme to avoid a + * pipeline stall each time the contents of the curbe is changed. + */ +void brw_upload_cs_urb_state(struct brw_context *brw) +{ + struct brw_cs_urb_state cs_urb; + memset(&cs_urb, 0, sizeof(cs_urb)); + + /* It appears that this is the state packet for the CS unit, ie. the + * urb entries detailed here are housed in the CS range from the + * URB_FENCE command. + */ + cs_urb.header.opcode = CMD_CS_URB_STATE; + cs_urb.header.length = sizeof(cs_urb)/4 - 2; + + /* BRW_NEW_URB_FENCE */ + cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries; + cs_urb.bits0.urb_entry_size = brw->urb.csize - 1; + + assert(brw->urb.nr_cs_entries); + BRW_CACHED_BATCH_STRUCT(brw, &cs_urb); +} + +static GLfloat fixed_plane[6][4] = { + { 0, 0, -1, 1 }, + { 0, 0, 1, 1 }, + { 0, -1, 0, 1 }, + { 0, 1, 0, 1 }, + {-1, 0, 0, 1 }, + { 1, 0, 0, 1 } +}; + +/* Upload a new set of constants. Too much variability to go into the + * cache mechanism, but maybe would benefit from a comparison against + * the current uploaded set of constants. + */ +static void prepare_constant_buffer(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + const struct brw_vertex_program *vp = + brw_vertex_program_const(brw->vertex_program); + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + const GLuint sz = brw->curbe.total_size; + const GLuint bufsz = sz * 16 * sizeof(GLfloat); + GLfloat *buf; + GLuint i; + + if (sz == 0) { + if (brw->curbe.last_buf) { + free(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + brw->curbe.last_bufsz = 0; + } + return; + } + + buf = (GLfloat *) _mesa_calloc(bufsz); + + /* fragment shader constants */ + if (brw->curbe.wm_size) { + GLuint offset = brw->curbe.wm_start * 16; + + _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + + /* copy float constants */ + for (i = 0; i < brw->wm.prog_data->nr_params; i++) + buf[offset + i] = *brw->wm.prog_data->param[i]; + } + + + /* The clipplanes are actually delivered to both CLIP and VS units. + * VS uses them to calculate the outcode bitmasks. + */ + if (brw->curbe.clip_size) { + GLuint offset = brw->curbe.clip_start * 16; + GLuint j; + + /* If any planes are going this way, send them all this way: + */ + for (i = 0; i < 6; i++) { + buf[offset + i * 4 + 0] = fixed_plane[i][0]; + buf[offset + i * 4 + 1] = fixed_plane[i][1]; + buf[offset + i * 4 + 2] = fixed_plane[i][2]; + buf[offset + i * 4 + 3] = fixed_plane[i][3]; + } + + /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to + * clip-space: + */ + assert(MAX_CLIP_PLANES == 6); + for (j = 0; j < MAX_CLIP_PLANES; j++) { + if (ctx->Transform.ClipPlanesEnabled & (1<Transform._ClipUserPlane[j][0]; + buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1]; + buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2]; + buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3]; + i++; + } + } + } + + /* vertex shader constants */ + if (brw->curbe.vs_size) { + GLuint offset = brw->curbe.vs_start * 16; + GLuint nr = brw->vs.prog_data->nr_params / 4; + + if (brw->vertex_program->IsNVProgram) + _mesa_load_tracked_matrices(ctx); + + /* Updates the ParamaterValues[i] pointers for all parameters of the + * basic type of PROGRAM_STATE_VAR. + */ + _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + + /* XXX just use a memcpy here */ + for (i = 0; i < nr; i++) { + const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; + buf[offset + i * 4 + 0] = value[0]; + buf[offset + i * 4 + 1] = value[1]; + buf[offset + i * 4 + 2] = value[2]; + buf[offset + i * 4 + 3] = value[3]; + } + } + + if (0) { + for (i = 0; i < sz*16; i+=4) + _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + buf[i+0], buf[i+1], buf[i+2], buf[i+3]); + + _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n", + brw->curbe.last_buf, buf, + bufsz, brw->curbe.last_bufsz, + brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); + } + + if (brw->curbe.curbe_bo != NULL && + brw->curbe.last_buf && + bufsz == brw->curbe.last_bufsz && + memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { + /* constants have not changed */ + _mesa_free(buf); + } + else { + /* constants have changed */ + if (brw->curbe.last_buf) + _mesa_free(brw->curbe.last_buf); + + brw->curbe.last_buf = buf; + brw->curbe.last_bufsz = bufsz; + + if (brw->curbe.curbe_bo != NULL && + (brw->curbe.need_new_bo || + brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)) + { + dri_bo_unreference(brw->curbe.curbe_bo); + brw->curbe.curbe_bo = NULL; + } + + if (brw->curbe.curbe_bo == NULL) { + /* Allocate a single page for CURBE entries for this batchbuffer. + * They're generally around 64b. + */ + brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", + 4096, 1 << 6); + brw->curbe.curbe_next_offset = 0; + } + + brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; + brw->curbe.curbe_next_offset += bufsz; + brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64); + + /* Copy data to the buffer: + */ + dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf); + } + + brw_add_validated_bo(brw, brw->curbe.curbe_bo); + + /* Because this provokes an action (ie copy the constants into the + * URB), it shouldn't be shortcircuited if identical to the + * previous time - because eg. the urb destination may have + * changed, or the urb contents different to last time. + * + * Note that the data referred to is actually copied internally, + * not just used in place according to passed pointer. + * + * It appears that the CS unit takes care of using each available + * URB entry (Const URB Entry == CURBE) in turn, and issuing + * flushes as necessary when doublebuffering of CURBEs isn't + * possible. + */ +} + +static void emit_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLuint sz = brw->curbe.total_size; + + BEGIN_BATCH(2, IGNORE_CLIPRECTS); + if (sz == 0) { + OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); + OUT_BATCH(0); + } else { + OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); + OUT_RELOC(brw->curbe.curbe_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + (sz - 1) + brw->curbe.curbe_offset); + } + ADVANCE_BATCH(); +} + +/* This tracked state is unique in that the state it monitors varies + * dynamically depending on the parameters tracked by the fragment and + * vertex programs. This is the template used as a starting point, + * each context will maintain a copy of this internally and update as + * required. + */ +const struct brw_tracked_state brw_constant_buffer = { + .dirty = { + .mesa = _NEW_PROGRAM_CONSTANTS, + .brw = (BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ + BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ + BRW_NEW_CURBE_OFFSETS | + BRW_NEW_BATCH), + .cache = (CACHE_NEW_WM_PROG) + }, + .prepare = prepare_constant_buffer, + .emit = emit_constant_buffer, +}; + diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h new file mode 100644 index 0000000000..78d457ad2b --- /dev/null +++ b/src/gallium/drivers/i965/brw_defines.h @@ -0,0 +1,851 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_DEFINES_H +#define BRW_DEFINES_H + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 + +#define PIPE_CONTROL_NOWRITE 0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 +#define PIPE_CONTROL_WRITEDEPTH 0x02 +#define PIPE_CONTROL_WRITETIMESTAMP 0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define BRW_ANISORATIO_2 0 +#define BRW_ANISORATIO_4 1 +#define BRW_ANISORATIO_6 2 +#define BRW_ANISORATIO_8 3 +#define BRW_ANISORATIO_10 4 +#define BRW_ANISORATIO_12 5 +#define BRW_ANISORATIO_14 6 +#define BRW_ANISORATIO_16 7 + +#define BRW_BLENDFACTOR_ONE 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR 0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 +#define BRW_BLENDFACTOR_DST_ALPHA 0x4 +#define BRW_BLENDFACTOR_DST_COLOR 0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BRW_BLENDFACTOR_CONST_COLOR 0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A +#define BRW_BLENDFACTOR_ZERO 0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BRW_BLENDFUNCTION_ADD 0 +#define BRW_BLENDFUNCTION_SUBTRACT 1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BRW_BLENDFUNCTION_MIN 3 +#define BRW_BLENDFUNCTION_MAX 4 + +#define BRW_ALPHATEST_FORMAT_UNORM8 0 +#define BRW_ALPHATEST_FORMAT_FLOAT32 1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define BRW_CHROMAKEY_REPLACE_BLACK 1 + +#define BRW_CLIP_API_OGL 0 +#define BRW_CLIP_API_DX 1 + +#define BRW_CLIPMODE_NORMAL 0 +#define BRW_CLIPMODE_CLIP_ALL 1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 +#define BRW_CLIPMODE_REJECT_ALL 3 +#define BRW_CLIPMODE_ACCEPT_ALL 4 +#define BRW_CLIPMODE_KERNEL_CLIP 5 + +#define BRW_CLIP_NDCSPACE 0 +#define BRW_CLIP_SCREENSPACE 1 + +#define BRW_COMPAREFUNCTION_ALWAYS 0 +#define BRW_COMPAREFUNCTION_NEVER 1 +#define BRW_COMPAREFUNCTION_LESS 2 +#define BRW_COMPAREFUNCTION_EQUAL 3 +#define BRW_COMPAREFUNCTION_LEQUAL 4 +#define BRW_COMPAREFUNCTION_GREATER 5 +#define BRW_COMPAREFUNCTION_NOTEQUAL 6 +#define BRW_COMPAREFUNCTION_GEQUAL 7 + +#define BRW_COVERAGE_PIXELS_HALF 0 +#define BRW_COVERAGE_PIXELS_1 1 +#define BRW_COVERAGE_PIXELS_2 2 +#define BRW_COVERAGE_PIXELS_4 3 + +#define BRW_CULLMODE_BOTH 0 +#define BRW_CULLMODE_NONE 1 +#define BRW_CULLMODE_FRONT 2 +#define BRW_CULLMODE_BACK 3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define BRW_DEPTHFORMAT_D32_FLOAT 1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D16_UNORM 5 + +#define BRW_FLOATING_POINT_IEEE_754 0 +#define BRW_FLOATING_POINT_NON_IEEE_754 1 + +#define BRW_FRONTWINDING_CW 0 +#define BRW_FRONTWINDING_CCW 1 + +#define BRW_SPRITE_POINT_ENABLE 16 + +#define BRW_INDEX_BYTE 0 +#define BRW_INDEX_WORD 1 +#define BRW_INDEX_DWORD 2 + +#define BRW_LOGICOPFUNCTION_CLEAR 0 +#define BRW_LOGICOPFUNCTION_NOR 1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 +#define BRW_LOGICOPFUNCTION_INVERT 5 +#define BRW_LOGICOPFUNCTION_XOR 6 +#define BRW_LOGICOPFUNCTION_NAND 7 +#define BRW_LOGICOPFUNCTION_AND 8 +#define BRW_LOGICOPFUNCTION_EQUIV 9 +#define BRW_LOGICOPFUNCTION_NOOP 10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 +#define BRW_LOGICOPFUNCTION_COPY 12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 +#define BRW_LOGICOPFUNCTION_OR 14 +#define BRW_LOGICOPFUNCTION_SET 15 + +#define BRW_MAPFILTER_NEAREST 0x0 +#define BRW_MAPFILTER_LINEAR 0x1 +#define BRW_MAPFILTER_ANISOTROPIC 0x2 + +#define BRW_MIPFILTER_NONE 0 +#define BRW_MIPFILTER_NEAREST 1 +#define BRW_MIPFILTER_LINEAR 3 + +#define BRW_POLYGON_FRONT_FACING 0 +#define BRW_POLYGON_BACK_FACING 1 + +#define BRW_PREFILTER_ALWAYS 0x0 +#define BRW_PREFILTER_NEVER 0x1 +#define BRW_PREFILTER_LESS 0x2 +#define BRW_PREFILTER_EQUAL 0x3 +#define BRW_PREFILTER_LEQUAL 0x4 +#define BRW_PREFILTER_GREATER 0x5 +#define BRW_PREFILTER_NOTEQUAL 0x6 +#define BRW_PREFILTER_GEQUAL 0x7 + +#define BRW_PROVOKING_VERTEX_0 0 +#define BRW_PROVOKING_VERTEX_1 1 +#define BRW_PROVOKING_VERTEX_2 2 + +#define BRW_RASTRULE_UPPER_LEFT 0 +#define BRW_RASTRULE_UPPER_RIGHT 1 +/* These are listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "Intel® 965 Express Chipset Family and Intel® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * These appear to be supported on at least some + * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT + * is useful when using OpenGL to render to a FBO + * (which has the pixel coordinate Y orientation inverted + * with respect to the normal OpenGL pixel coordinate system). + */ +#define BRW_RASTRULE_LOWER_LEFT 2 +#define BRW_RASTRULE_LOWER_RIGHT 3 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define BRW_STENCILOP_KEEP 0 +#define BRW_STENCILOP_ZERO 1 +#define BRW_STENCILOP_REPLACE 2 +#define BRW_STENCILOP_INCRSAT 3 +#define BRW_STENCILOP_DECRSAT 4 +#define BRW_STENCILOP_INCR 5 +#define BRW_STENCILOP_DECR 6 +#define BRW_STENCILOP_INVERT 7 + +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 +#define BRW_SURFACEFORMAT_R16_UNORM 0x10A +#define BRW_SURFACEFORMAT_R16_SNORM 0x10B +#define BRW_SURFACEFORMAT_R16_SINT 0x10C +#define BRW_SURFACEFORMAT_R16_UINT 0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E +#define BRW_SURFACEFORMAT_I16_UNORM 0x111 +#define BRW_SURFACEFORMAT_L16_UNORM 0x112 +#define BRW_SURFACEFORMAT_A16_UNORM 0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB 0x118 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E +#define BRW_SURFACEFORMAT_R16_USCALED 0x11F +#define BRW_SURFACEFORMAT_R8_UNORM 0x140 +#define BRW_SURFACEFORMAT_R8_SNORM 0x141 +#define BRW_SURFACEFORMAT_R8_SINT 0x142 +#define BRW_SURFACEFORMAT_R8_UINT 0x143 +#define BRW_SURFACEFORMAT_A8_UNORM 0x144 +#define BRW_SURFACEFORMAT_I8_UNORM 0x145 +#define BRW_SURFACEFORMAT_L8_UNORM 0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 +#define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C +#define BRW_SURFACEFORMAT_R1_UINT 0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define BRW_SURFACEFORMAT_MONO8 0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 +#define BRW_SURFACEFORMAT_FXT1 0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define BRW_SURFACERETURNFORMAT_FLOAT32 0 +#define BRW_SURFACERETURNFORMAT_S1 1 + +#define BRW_SURFACE_1D 0 +#define BRW_SURFACE_2D 1 +#define BRW_SURFACE_3D 2 +#define BRW_SURFACE_CUBE 3 +#define BRW_SURFACE_BUFFER 4 +#define BRW_SURFACE_NULL 7 + +#define BRW_TEXCOORDMODE_WRAP 0 +#define BRW_TEXCOORDMODE_MIRROR 1 +#define BRW_TEXCOORDMODE_CLAMP 2 +#define BRW_TEXCOORDMODE_CUBE 3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 + +#define BRW_THREAD_PRIORITY_NORMAL 0 +#define BRW_THREAD_PRIORITY_HIGH 1 + +#define BRW_TILEWALK_XMAJOR 0 +#define BRW_TILEWALK_YMAJOR 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +/* Execution Unit (EU) defines + */ + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +#define BRW_COMPRESSION_NONE 0 +#define BRW_COMPRESSION_2NDHALF 1 +#define BRW_COMPRESSION_COMPRESSED 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_R 7 +#define BRW_CONDITIONAL_O 8 +#define BRW_CONDITIONAL_U 9 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +#define BRW_OPCODE_MOV 1 +#define BRW_OPCODE_SEL 2 +#define BRW_OPCODE_NOT 4 +#define BRW_OPCODE_AND 5 +#define BRW_OPCODE_OR 6 +#define BRW_OPCODE_XOR 7 +#define BRW_OPCODE_SHR 8 +#define BRW_OPCODE_SHL 9 +#define BRW_OPCODE_RSR 10 +#define BRW_OPCODE_RSL 11 +#define BRW_OPCODE_ASR 12 +#define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_CMPN 17 +#define BRW_OPCODE_JMPI 32 +#define BRW_OPCODE_IF 34 +#define BRW_OPCODE_IFF 35 +#define BRW_OPCODE_ELSE 36 +#define BRW_OPCODE_ENDIF 37 +#define BRW_OPCODE_DO 38 +#define BRW_OPCODE_WHILE 39 +#define BRW_OPCODE_BREAK 40 +#define BRW_OPCODE_CONTINUE 41 +#define BRW_OPCODE_HALT 42 +#define BRW_OPCODE_MSAVE 44 +#define BRW_OPCODE_MRESTORE 45 +#define BRW_OPCODE_PUSH 46 +#define BRW_OPCODE_POP 47 +#define BRW_OPCODE_WAIT 48 +#define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_ADD 64 +#define BRW_OPCODE_MUL 65 +#define BRW_OPCODE_AVG 66 +#define BRW_OPCODE_FRC 67 +#define BRW_OPCODE_RNDU 68 +#define BRW_OPCODE_RNDD 69 +#define BRW_OPCODE_RNDE 70 +#define BRW_OPCODE_RNDZ 71 +#define BRW_OPCODE_MAC 72 +#define BRW_OPCODE_MACH 73 +#define BRW_OPCODE_LZD 74 +#define BRW_OPCODE_SAD2 80 +#define BRW_OPCODE_SADA2 81 +#define BRW_OPCODE_DP4 84 +#define BRW_OPCODE_DPH 85 +#define BRW_OPCODE_DP3 86 +#define BRW_OPCODE_DP2 87 +#define BRW_OPCODE_DPA2 88 +#define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_NOP 126 + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + + + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG 1 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG 3 + +/* for IGDNG only */ +#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 +#define BRW_SAMPLER_SIMD_MODE_SIMD8 1 +#define BRW_SAMPLER_SIMD_MODE_SIMD16 2 +#define BRW_SAMPLER_SIMD_MODE_SIMD32_64 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CS_URB_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_PIPELINE_SELECT_965 0x6104 +#define CMD_PIPELINE_SELECT_GM45 0x6904 + +#define CMD_PIPELINED_STATE_POINTERS 0x7800 +#define CMD_BINDING_TABLE_PTRS 0x7801 + +#define CMD_VERTEX_BUFFER 0x7808 +# define BRW_VB0_INDEX_SHIFT 27 +# define BRW_VB0_ACCESS_VERTEXDATA (0 << 26) +# define BRW_VB0_ACCESS_INSTANCEDATA (1 << 26) +# define BRW_VB0_PITCH_SHIFT 0 + +#define CMD_VERTEX_ELEMENT 0x7809 +# define BRW_VE0_INDEX_SHIFT 27 +# define BRW_VE0_FORMAT_SHIFT 16 +# define BRW_VE0_VALID (1 << 26) +# define BRW_VE0_SRC_OFFSET_SHIFT 0 +# define BRW_VE1_COMPONENT_NOSTORE 0 +# define BRW_VE1_COMPONENT_STORE_SRC 1 +# define BRW_VE1_COMPONENT_STORE_0 2 +# define BRW_VE1_COMPONENT_STORE_1_FLT 3 +# define BRW_VE1_COMPONENT_STORE_1_INT 4 +# define BRW_VE1_COMPONENT_STORE_VID 5 +# define BRW_VE1_COMPONENT_STORE_IID 6 +# define BRW_VE1_COMPONENT_STORE_PID 7 +# define BRW_VE1_COMPONENT_0_SHIFT 28 +# define BRW_VE1_COMPONENT_1_SHIFT 24 +# define BRW_VE1_COMPONENT_2_SHIFT 20 +# define BRW_VE1_COMPONENT_3_SHIFT 16 +# define BRW_VE1_DST_OFFSET_SHIFT 0 + +#define CMD_INDEX_BUFFER 0x780a +#define CMD_VF_STATISTICS_965 0x780b +#define CMD_VF_STATISTICS_GM45 0x680b + +#define CMD_DRAW_RECT 0x7900 +#define CMD_BLEND_CONSTANT_COLOR 0x7901 +#define CMD_CHROMA_KEY 0x7904 +#define CMD_DEPTH_BUFFER 0x7905 +#define CMD_POLY_STIPPLE_OFFSET 0x7906 +#define CMD_POLY_STIPPLE_PATTERN 0x7907 +#define CMD_LINE_STIPPLE_PATTERN 0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 +#define CMD_AA_LINE_PARAMETERS 0x790a + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_3D_PRIM 0x7b00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END 0x1 +#define R02_PRIM_START 0x2 + +#include "intel_chipset.h" + +#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID)) +#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID)) +#define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) +#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) +#define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) +#define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \ + (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ + +#endif diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c new file mode 100644 index 0000000000..9fef230507 --- /dev/null +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -0,0 +1,903 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "main/mtypes.h" + +#include "brw_context.h" +#include "brw_defines.h" + +struct { + char *name; + int nsrc; + int ndst; +} opcode[128] = { + [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 }, + + [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 }, + [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 }, + + [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 }, + [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 }, + [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 }, + [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 }, + [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 }, + [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 }, +}; + +char *conditional_modifier[16] = { + [BRW_CONDITIONAL_NONE] = "", + [BRW_CONDITIONAL_Z] = ".e", + [BRW_CONDITIONAL_NZ] = ".ne", + [BRW_CONDITIONAL_G] = ".g", + [BRW_CONDITIONAL_GE] = ".ge", + [BRW_CONDITIONAL_L] = ".l", + [BRW_CONDITIONAL_LE] = ".le", + [BRW_CONDITIONAL_R] = ".r", + [BRW_CONDITIONAL_O] = ".o", + [BRW_CONDITIONAL_U] = ".u", +}; + +char *negate[2] = { + [0] = "", + [1] = "-", +}; + +char *_abs[2] = { + [0] = "", + [1] = "(abs)", +}; + +char *vert_stride[16] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4", + [4] = "8", + [5] = "16", + [6] = "32", + [15] = "VxH", +}; + +char *width[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", +}; + +char *horiz_stride[4] = { + [0] = "0", + [1] = "1", + [2] = "2", + [3] = "4" +}; + +char *chan_sel[4] = { + [0] = "x", + [1] = "y", + [2] = "z", + [3] = "w", +}; + +char *dest_condmod[16] = { +}; + +char *debug_ctrl[2] = { + [0] = "", + [1] = ".breakpoint" +}; + +char *saturate[2] = { + [0] = "", + [1] = ".sat" +}; + +char *exec_size[8] = { + [0] = "1", + [1] = "2", + [2] = "4", + [3] = "8", + [4] = "16", + [5] = "32" +}; + +char *pred_inv[2] = { + [0] = "+", + [1] = "-" +}; + +char *pred_ctrl_align16[16] = { + [1] = "", + [2] = ".x", + [3] = ".y", + [4] = ".z", + [5] = ".w", + [6] = ".any4h", + [7] = ".all4h", +}; + +char *pred_ctrl_align1[16] = { + [1] = "", + [2] = ".anyv", + [3] = ".allv", + [4] = ".any2h", + [5] = ".all2h", + [6] = ".any4h", + [7] = ".all4h", + [8] = ".any8h", + [9] = ".all8h", + [10] = ".any16h", + [11] = ".all16h", +}; + +char *thread_ctrl[4] = { + [0] = "", + [2] = "switch" +}; + +char *compr_ctrl[4] = { + [0] = "", + [1] = "sechalf", + [2] = "compr", +}; + +char *dep_ctrl[4] = { + [0] = "", + [1] = "NoDDClr", + [2] = "NoDDChk", + [3] = "NoDDClr,NoDDChk", +}; + +char *mask_ctrl[4] = { + [0] = "", + [1] = "nomask", +}; + +char *access_mode[2] = { + [0] = "align1", + [1] = "align16", +}; + +char *reg_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [4] = "UB", + [5] = "B", + [7] = "F" +}; + +char *imm_encoding[8] = { + [0] = "UD", + [1] = "D", + [2] = "UW", + [3] = "W", + [5] = "VF", + [5] = "V", + [7] = "F" +}; + +char *reg_file[4] = { + [0] = "A", + [1] = "g", + [2] = "m", + [3] = "imm", +}; + +char *writemask[16] = { + [0x0] = ".", + [0x1] = ".x", + [0x2] = ".y", + [0x3] = ".xy", + [0x4] = ".z", + [0x5] = ".xz", + [0x6] = ".yz", + [0x7] = ".xyz", + [0x8] = ".w", + [0x9] = ".xw", + [0xa] = ".yw", + [0xb] = ".xyw", + [0xc] = ".zw", + [0xd] = ".xzw", + [0xe] = ".yzw", + [0xf] = "", +}; + +char *end_of_thread[2] = { + [0] = "", + [1] = "EOT" +}; + +char *target_function[16] = { + [BRW_MESSAGE_TARGET_NULL] = "null", + [BRW_MESSAGE_TARGET_MATH] = "math", + [BRW_MESSAGE_TARGET_SAMPLER] = "sampler", + [BRW_MESSAGE_TARGET_GATEWAY] = "gateway", + [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read", + [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write", + [BRW_MESSAGE_TARGET_URB] = "urb", + [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner" +}; + +char *math_function[16] = { + [BRW_MATH_FUNCTION_INV] = "inv", + [BRW_MATH_FUNCTION_LOG] = "log", + [BRW_MATH_FUNCTION_EXP] = "exp", + [BRW_MATH_FUNCTION_SQRT] = "sqrt", + [BRW_MATH_FUNCTION_RSQ] = "rsq", + [BRW_MATH_FUNCTION_SIN] = "sin", + [BRW_MATH_FUNCTION_COS] = "cos", + [BRW_MATH_FUNCTION_SINCOS] = "sincos", + [BRW_MATH_FUNCTION_TAN] = "tan", + [BRW_MATH_FUNCTION_POW] = "pow", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod", + [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod", + [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv", +}; + +char *math_saturate[2] = { + [0] = "", + [1] = "sat" +}; + +char *math_signed[2] = { + [0] = "", + [1] = "signed" +}; + +char *math_scalar[2] = { + [0] = "", + [1] = "scalar" +}; + +char *math_precision[2] = { + [0] = "", + [1] = "partial_precision" +}; + +char *urb_swizzle[4] = { + [BRW_URB_SWIZZLE_NONE] = "", + [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave", + [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose", +}; + +char *urb_allocate[2] = { + [0] = "", + [1] = "allocate" +}; + +char *urb_used[2] = { + [0] = "", + [1] = "used" +}; + +char *urb_complete[2] = { + [0] = "", + [1] = "complete" +}; + +char *sampler_target_format[4] = { + [0] = "F", + [2] = "UD", + [3] = "D" +}; + + +static int column; + +static int string (FILE *file, char *string) +{ + fputs (string, file); + column += strlen (string); + return 0; +} + +static int format (FILE *f, char *format, ...) +{ + char buf[1024]; + va_list args; + va_start (args, format); + + vsnprintf (buf, sizeof (buf) - 1, format, args); + string (f, buf); + return 0; +} + +static int newline (FILE *f) +{ + putc ('\n', f); + column = 0; + return 0; +} + +static int pad (FILE *f, int c) +{ + do + string (f, " "); + while (column < c); + return 0; +} + +static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space) +{ + if (!ctrl[id]) { + fprintf (file, "*** invalid %s value %d ", + name, id); + return 1; + } + if (ctrl[id][0]) + { + if (space && *space) + string (file, " "); + string (file, ctrl[id]); + if (space) + *space = 1; + } + return 0; +} + +static int print_opcode (FILE *file, int id) +{ + if (!opcode[id].name) { + format (file, "*** invalid opcode value %d ", id); + return 1; + } + string (file, opcode[id].name); + return 0; +} + +static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) +{ + int err = 0; + if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) { + switch (_reg_nr & 0xf0) { + case BRW_ARF_NULL: + string (file, "null"); + return -1; + case BRW_ARF_ADDRESS: + format (file, "a%d", _reg_nr & 0x0f); + break; + case BRW_ARF_ACCUMULATOR: + format (file, "acc%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK: + format (file, "mask%d", _reg_nr & 0x0f); + break; + case BRW_ARF_MASK_STACK: + format (file, "msd%d", _reg_nr & 0x0f); + break; + case BRW_ARF_STATE: + format (file, "sr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_CONTROL: + format (file, "cr%d", _reg_nr & 0x0f); + break; + case BRW_ARF_NOTIFICATION_COUNT: + format (file, "n%d", _reg_nr & 0x0f); + break; + case BRW_ARF_IP: + string (file, "ip"); + return -1; + break; + default: + format (file, "ARF%d", _reg_nr); + break; + } + } else { + err |= control (file, "src reg file", reg_file, _reg_file, NULL); + format (file, "%d", _reg_nr); + } + return err; +} + +static int dest (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + + if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da1.dest_subreg_nr) + format (file, ".%d", inst->bits1.da1.dest_subreg_nr); + format (file, "<%d>", inst->bits1.da1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL); + } + else + { + string (file, "g[a0"); + if (inst->bits1.ia1.dest_subreg_nr) + format (file, ".%d", inst->bits1.ia1.dest_subreg_nr); + if (inst->bits1.ia1.dest_indirect_offset) + format (file, " %d", inst->bits1.ia1.dest_indirect_offset); + string (file, "]"); + format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL); + } + } + else + { + if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT) + { + err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr); + if (err == -1) + return 0; + if (inst->bits1.da16.dest_subreg_nr) + format (file, ".%d", inst->bits1.da16.dest_subreg_nr); + string (file, "<1>"); + err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL); + err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL); + } + else + { + err = 1; + string (file, "Indirect align16 address mode not supported"); + } + } + + return 0; +} + +static int src_align1_region (FILE *file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride) +{ + int err = 0; + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ","); + err |= control (file, "width", width, _width, NULL); + string (file, ","); + err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL); + string (file, ">"); + return err; +} + +static int src_da1 (FILE *file, GLuint type, GLuint _reg_file, + GLuint _vert_stride, GLuint _width, GLuint _horiz_stride, + GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, reg_num); + if (err == -1) + return 0; + if (sub_reg_num) + format (file, ".%d", sub_reg_num); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_ia1 (FILE *file, + GLuint type, + GLuint _reg_file, + GLint _addr_imm, + GLuint _addr_subreg_nr, + GLuint _negate, + GLuint __abs, + GLuint _addr_mode, + GLuint _horiz_stride, + GLuint _width, + GLuint _vert_stride) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + string (file, "g[a0"); + if (_addr_subreg_nr) + format (file, ".%d", _addr_subreg_nr); + if (_addr_imm) + format (file, " %d", _addr_imm); + string (file, "]"); + src_align1_region (file, _vert_stride, _width, _horiz_stride); + err |= control (file, "src reg encoding", reg_encoding, type, NULL); + return err; +} + +static int src_da16 (FILE *file, + GLuint _reg_type, + GLuint _reg_file, + GLuint _vert_stride, + GLuint _reg_nr, + GLuint _subreg_nr, + GLuint __abs, + GLuint _negate, + GLuint swz_x, + GLuint swz_y, + GLuint swz_z, + GLuint swz_w) +{ + int err = 0; + err |= control (file, "negate", negate, _negate, NULL); + err |= control (file, "abs", _abs, __abs, NULL); + + err |= reg (file, _reg_file, _reg_nr); + if (err == -1) + return 0; + if (_subreg_nr) + format (file, ".%d", _subreg_nr); + string (file, "<"); + err |= control (file, "vert stride", vert_stride, _vert_stride, NULL); + string (file, ",1,1>"); + err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL); + /* + * Three kinds of swizzle display: + * identity - nothing printed + * 1->all - print the single channel + * 1->1 - print the mapping + */ + if (swz_x == BRW_CHANNEL_X && + swz_y == BRW_CHANNEL_Y && + swz_z == BRW_CHANNEL_Z && + swz_w == BRW_CHANNEL_W) + { + ; + } + else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w) + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + } + else + { + string (file, "."); + err |= control (file, "channel select", chan_sel, swz_x, NULL); + err |= control (file, "channel select", chan_sel, swz_y, NULL); + err |= control (file, "channel select", chan_sel, swz_z, NULL); + err |= control (file, "channel select", chan_sel, swz_w, NULL); + } + return err; +} + + +static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { + switch (type) { + case BRW_REGISTER_TYPE_UD: + format (file, "0x%08xUD", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_D: + format (file, "%dD", inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UW: + format (file, "0x%04xUW", (uint16_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_W: + format (file, "%dW", (int16_t) inst->bits3.d); + break; + case BRW_REGISTER_TYPE_UB: + format (file, "0x%02xUB", (int8_t) inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_VF: + format (file, "Vector Float"); + break; + case BRW_REGISTER_TYPE_V: + format (file, "0x%08xV", inst->bits3.ud); + break; + case BRW_REGISTER_TYPE_F: + format (file, "%-gF", inst->bits3.f); + } + return 0; +} + +static int src0 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src0_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src0_reg_type, + inst->bits1.da1.src0_reg_file, + inst->bits2.da1.src0_vert_stride, + inst->bits2.da1.src0_width, + inst->bits2.da1.src0_horiz_stride, + inst->bits2.da1.src0_reg_nr, + inst->bits2.da1.src0_subreg_nr, + inst->bits2.da1.src0_abs, + inst->bits2.da1.src0_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src0_reg_type, + inst->bits1.ia1.src0_reg_file, + inst->bits2.ia1.src0_indirect_offset, + inst->bits2.ia1.src0_subreg_nr, + inst->bits2.ia1.src0_negate, + inst->bits2.ia1.src0_abs, + inst->bits2.ia1.src0_address_mode, + inst->bits2.ia1.src0_horiz_stride, + inst->bits2.ia1.src0_width, + inst->bits2.ia1.src0_vert_stride); + } + } + else + { + if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src0_reg_type, + inst->bits1.da16.src0_reg_file, + inst->bits2.da16.src0_vert_stride, + inst->bits2.da16.src0_reg_nr, + inst->bits2.da16.src0_subreg_nr, + inst->bits2.da16.src0_abs, + inst->bits2.da16.src0_negate, + inst->bits2.da16.src0_swz_x, + inst->bits2.da16.src0_swz_y, + inst->bits2.da16.src0_swz_z, + inst->bits2.da16.src0_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +static int src1 (FILE *file, struct brw_instruction *inst) +{ + if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) + return imm (file, inst->bits1.da1.src1_reg_type, + inst); + else if (inst->header.access_mode == BRW_ALIGN_1) + { + if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da1 (file, + inst->bits1.da1.src1_reg_type, + inst->bits1.da1.src1_reg_file, + inst->bits3.da1.src1_vert_stride, + inst->bits3.da1.src1_width, + inst->bits3.da1.src1_horiz_stride, + inst->bits3.da1.src1_reg_nr, + inst->bits3.da1.src1_subreg_nr, + inst->bits3.da1.src1_abs, + inst->bits3.da1.src1_negate); + } + else + { + return src_ia1 (file, + inst->bits1.ia1.src1_reg_type, + inst->bits1.ia1.src1_reg_file, + inst->bits3.ia1.src1_indirect_offset, + inst->bits3.ia1.src1_subreg_nr, + inst->bits3.ia1.src1_negate, + inst->bits3.ia1.src1_abs, + inst->bits3.ia1.src1_address_mode, + inst->bits3.ia1.src1_horiz_stride, + inst->bits3.ia1.src1_width, + inst->bits3.ia1.src1_vert_stride); + } + } + else + { + if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT) + { + return src_da16 (file, + inst->bits1.da16.src1_reg_type, + inst->bits1.da16.src1_reg_file, + inst->bits3.da16.src1_vert_stride, + inst->bits3.da16.src1_reg_nr, + inst->bits3.da16.src1_subreg_nr, + inst->bits3.da16.src1_abs, + inst->bits3.da16.src1_negate, + inst->bits3.da16.src1_swz_x, + inst->bits3.da16.src1_swz_y, + inst->bits3.da16.src1_swz_z, + inst->bits3.da16.src1_swz_w); + } + else + { + string (file, "Indirect align16 address mode not supported"); + return 1; + } + } +} + +int brw_disasm (FILE *file, struct brw_instruction *inst) +{ + int err = 0; + int space = 0; + + if (inst->header.predicate_control) { + string (file, "("); + err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL); + string (file, "f0"); + if (inst->bits2.da1.flag_reg_nr) + format (file, ".%d", inst->bits2.da1.flag_reg_nr); + if (inst->header.access_mode == BRW_ALIGN_1) + err |= control (file, "predicate control align1", pred_ctrl_align1, + inst->header.predicate_control, NULL); + else + err |= control (file, "predicate control align16", pred_ctrl_align16, + inst->header.predicate_control, NULL); + string (file, ") "); + } + + err |= print_opcode (file, inst->header.opcode); + err |= control (file, "saturate", saturate, inst->header.saturate, NULL); + err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL); + + if (inst->header.opcode != BRW_OPCODE_SEND) + err |= control (file, "conditional modifier", conditional_modifier, + inst->header.destreg__conditionalmod, NULL); + + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "("); + err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL); + string (file, ")"); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) + format (file, " %d", inst->header.destreg__conditionalmod); + + if (opcode[inst->header.opcode].ndst > 0) { + pad (file, 16); + err |= dest (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 0) { + pad (file, 32); + err |= src0 (file, inst); + } + if (opcode[inst->header.opcode].nsrc > 1) { + pad (file, 48); + err |= src1 (file, inst); + } + + if (inst->header.opcode == BRW_OPCODE_SEND) { + newline (file); + pad (file, 16); + space = 0; + err |= control (file, "target function", target_function, + inst->bits3.generic.msg_target, &space); + switch (inst->bits3.generic.msg_target) { + case BRW_MESSAGE_TARGET_MATH: + err |= control (file, "math function", math_function, + inst->bits3.math.function, &space); + err |= control (file, "math saturate", math_saturate, + inst->bits3.math.saturate, &space); + err |= control (file, "math signed", math_signed, + inst->bits3.math.int_type, &space); + err |= control (file, "math scalar", math_scalar, + inst->bits3.math.data_type, &space); + err |= control (file, "math precision", math_precision, + inst->bits3.math.precision, &space); + break; + case BRW_MESSAGE_TARGET_SAMPLER: + format (file, " (%d, %d, ", + inst->bits3.sampler.binding_table_index, + inst->bits3.sampler.sampler); + err |= control (file, "sampler target format", sampler_target_format, + inst->bits3.sampler.return_format, NULL); + string (file, ")"); + break; + case BRW_MESSAGE_TARGET_DATAPORT_WRITE: + format (file, " (%d, %d, %d, %d)", + inst->bits3.dp_write.binding_table_index, + (inst->bits3.dp_write.pixel_scoreboard_clear << 3) | + inst->bits3.dp_write.msg_control, + inst->bits3.dp_write.msg_type, + inst->bits3.dp_write.send_commit_msg); + break; + case BRW_MESSAGE_TARGET_URB: + format (file, " %d", inst->bits3.urb.offset); + space = 1; + err |= control (file, "urb swizzle", urb_swizzle, + inst->bits3.urb.swizzle_control, &space); + err |= control (file, "urb allocate", urb_allocate, + inst->bits3.urb.allocate, &space); + err |= control (file, "urb used", urb_used, + inst->bits3.urb.used, &space); + err |= control (file, "urb complete", urb_complete, + inst->bits3.urb.complete, &space); + break; + case BRW_MESSAGE_TARGET_THREAD_SPAWNER: + break; + default: + format (file, "unsupported target %d", inst->bits3.generic.msg_target); + break; + } + if (space) + string (file, " "); + format (file, "mlen %d", + inst->bits3.generic.msg_length); + format (file, " rlen %d", + inst->bits3.generic.response_length); + } + pad (file, 64); + if (inst->header.opcode != BRW_OPCODE_NOP) { + string (file, "{"); + space = 1; + err |= control(file, "access mode", access_mode, inst->header.access_mode, &space); + err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space); + err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space); + err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space); + err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space); + if (inst->header.opcode == BRW_OPCODE_SEND) + err |= control (file, "end of thread", end_of_thread, + inst->bits3.generic.end_of_thread, &space); + if (space) + string (file, " "); + string (file, "}"); + } + string (file, ";"); + newline (file); + return err; +} diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c new file mode 100644 index 0000000000..44bb7bd588 --- /dev/null +++ b/src/gallium/drivers/i965/brw_draw.c @@ -0,0 +1,493 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/glheader.h" +#include "main/context.h" +#include "main/state.h" +#include "main/enums.h" +#include "tnl/tnl.h" +#include "vbo/vbo_context.h" +#include "swrast/swrast.h" +#include "swrast_setup/swrast_setup.h" + +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_fallback.h" + +#include "intel_batchbuffer.h" +#include "intel_buffer_objects.h" + +#define FILE_DEBUG_FLAG DEBUG_BATCH + +static GLuint prim_to_hw_prim[GL_POLYGON+1] = { + _3DPRIM_POINTLIST, + _3DPRIM_LINELIST, + _3DPRIM_LINELOOP, + _3DPRIM_LINESTRIP, + _3DPRIM_TRILIST, + _3DPRIM_TRISTRIP, + _3DPRIM_TRIFAN, + _3DPRIM_QUADLIST, + _3DPRIM_QUADSTRIP, + _3DPRIM_POLYGON +}; + + +static const GLenum reduced_prim[GL_POLYGON+1] = { + GL_POINTS, + GL_LINES, + GL_LINES, + GL_LINES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES +}; + + +/* When the primitive changes, set a state bit and re-validate. Not + * the nicest and would rather deal with this by having all the + * programs be immune to the active primitive (ie. cope with all + * possibilities). That may not be realistic however. + */ +static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) +{ + GLcontext *ctx = &brw->intel.ctx; + + if (INTEL_DEBUG & DEBUG_PRIMS) + _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); + + /* Slight optimization to avoid the GS program when not needed: + */ + if (prim == GL_QUAD_STRIP && + ctx->Light.ShadeModel != GL_FLAT && + ctx->Polygon.FrontMode == GL_FILL && + ctx->Polygon.BackMode == GL_FILL) + prim = GL_TRIANGLE_STRIP; + + if (prim != brw->primitive) { + brw->primitive = prim; + brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; + + if (reduced_prim[prim] != brw->intel.reduced_primitive) { + brw->intel.reduced_primitive = reduced_prim[prim]; + brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; + } + } + + return prim_to_hw_prim[prim]; +} + + +static GLuint trim(GLenum prim, GLuint length) +{ + if (prim == GL_QUAD_STRIP) + return length > 3 ? (length - length % 2) : 0; + else if (prim == GL_QUADS) + return length - length % 4; + else + return length; +} + + +static void brw_emit_prim(struct brw_context *brw, + const struct _mesa_prim *prim, + uint32_t hw_prim) +{ + struct brw_3d_primitive prim_packet; + struct intel_context *intel = &brw->intel; + + if (INTEL_DEBUG & DEBUG_PRIMS) + _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), + prim->start, prim->count); + + prim_packet.header.opcode = CMD_3D_PRIM; + prim_packet.header.length = sizeof(prim_packet)/4 - 2; + prim_packet.header.pad = 0; + prim_packet.header.topology = hw_prim; + prim_packet.header.indexed = prim->indexed; + + prim_packet.verts_per_instance = trim(prim->mode, prim->count); + prim_packet.start_vert_location = prim->start; + if (prim->indexed) + prim_packet.start_vert_location += brw->ib.start_vertex_offset; + prim_packet.instance_count = 1; + prim_packet.start_instance_location = 0; + prim_packet.base_vert_location = prim->basevertex; + + /* Can't wrap here, since we rely on the validated state. */ + brw->no_batch_wrap = GL_TRUE; + + /* If we're set to always flush, do it before and after the primitive emit. + * We want to catch both missed flushes that hurt instruction/state cache + * and missed flushes of the render cache as it heads to other parts of + * the besides the draw code. + */ + if (intel->always_flush_cache) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(intel->vtbl.flush_cmd()); + ADVANCE_BATCH(); + } + if (prim_packet.verts_per_instance) { + intel_batchbuffer_data( brw->intel.batch, &prim_packet, + sizeof(prim_packet), LOOP_CLIPRECTS); + } + if (intel->always_flush_cache) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH(intel->vtbl.flush_cmd()); + ADVANCE_BATCH(); + } + + brw->no_batch_wrap = GL_FALSE; +} + +static void brw_merge_inputs( struct brw_context *brw, + const struct gl_client_array *arrays[]) +{ + struct brw_vertex_info old = brw->vb.info; + GLuint i; + + for (i = 0; i < VERT_ATTRIB_MAX; i++) + dri_bo_unreference(brw->vb.inputs[i].bo); + + memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs)); + memset(&brw->vb.info, 0, sizeof(brw->vb.info)); + + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + brw->vb.inputs[i].glarray = arrays[i]; + brw->vb.inputs[i].attrib = (gl_vert_attrib) i; + + if (arrays[i]->StrideB != 0) + brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) << + ((i%16) * 2); + } + + /* Raise statechanges if input sizes have changed. */ + if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0) + brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; +} + +/* XXX: could split the primitive list to fallback only on the + * non-conformant primitives. + */ +static GLboolean check_fallbacks( struct brw_context *brw, + const struct _mesa_prim *prim, + GLuint nr_prims ) +{ + GLcontext *ctx = &brw->intel.ctx; + GLuint i; + + /* If we don't require strict OpenGL conformance, never + * use fallbacks. If we're forcing fallbacks, always + * use fallfacks. + */ + if (brw->intel.conformance_mode == 0) + return GL_FALSE; + + if (brw->intel.conformance_mode == 2) + return GL_TRUE; + + if (ctx->Polygon.SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (reduced_prim[prim[i].mode] == GL_TRIANGLES) + return GL_TRUE; + } + + /* BRW hardware will do AA lines, but they are non-conformant it + * seems. TBD whether we keep this fallback: + */ + if (ctx->Line.SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (reduced_prim[prim[i].mode] == GL_LINES) + return GL_TRUE; + } + + /* Stipple -- these fallbacks could be resolved with a little + * bit of work? + */ + if (ctx->Line.StippleFlag) { + for (i = 0; i < nr_prims; i++) { + /* GS doesn't get enough information to know when to reset + * the stipple counter?!? + */ + if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) + return GL_TRUE; + + if (prim[i].mode == GL_POLYGON && + (ctx->Polygon.FrontMode == GL_LINE || + ctx->Polygon.BackMode == GL_LINE)) + return GL_TRUE; + } + } + + if (ctx->Point.SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (prim[i].mode == GL_POINTS) + return GL_TRUE; + } + + /* BRW hardware doesn't handle GL_CLAMP texturing correctly; + * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP + * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and + * we want strict conformance, force the fallback. + * Right now, we only do this for 2D textures. + */ + { + int u; + for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; + if (texUnit->Enabled) { + if (texUnit->Enabled & TEXTURE_1D_BIT) { + if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_2D_BIT) { + if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_3D_BIT) { + if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { + return GL_TRUE; + } + } + } + } + } + + /* Nothing stopping us from the fast path now */ + return GL_FALSE; +} + +/* May fail if out of video memory for texture or vbo upload, or on + * fallback conditions. + */ +static GLboolean brw_try_draw_prims( GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index ) +{ + struct intel_context *intel = intel_context(ctx); + struct brw_context *brw = brw_context(ctx); + GLboolean retval = GL_FALSE; + GLboolean warn = GL_FALSE; + GLboolean first_time = GL_TRUE; + GLuint i; + + if (ctx->NewState) + _mesa_update_state( ctx ); + + /* We have to validate the textures *before* checking for fallbacks; + * otherwise, the software fallback won't be able to rely on the + * texture state, the firstLevel and lastLevel fields won't be + * set in the intel texture object (they'll both be 0), and the + * software fallback will segfault if it attempts to access any + * texture level other than level 0. + */ + brw_validate_textures( brw ); + + if (check_fallbacks(brw, prim, nr_prims)) + return GL_FALSE; + + /* Bind all inputs, derive varying and size information: + */ + brw_merge_inputs( brw, arrays ); + + brw->ib.ib = ib; + brw->state.dirty.brw |= BRW_NEW_INDICES; + + brw->vb.min_index = min_index; + brw->vb.max_index = max_index; + brw->state.dirty.brw |= BRW_NEW_VERTICES; + + /* Have to validate state quite late. Will rebuild tnl_program, + * which depends on varying information. + * + * Note this is where brw->vs->prog_data.inputs_read is calculated, + * so can't access it earlier. + */ + + LOCK_HARDWARE(intel); + + if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) { + UNLOCK_HARDWARE(intel); + return GL_TRUE; + } + + for (i = 0; i < nr_prims; i++) { + uint32_t hw_prim; + + /* Flush the batch if it's approaching full, so that we don't wrap while + * we've got validated state that needs to be in the same batch as the + * primitives. This fraction is just a guess (minimal full state plus + * a primitive is around 512 bytes), and would be better if we had + * an upper bound of how much we might emit in a single + * brw_try_draw_prims(). + */ + intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4, + LOOP_CLIPRECTS); + + hw_prim = brw_set_prim(brw, prim[i].mode); + + if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) { + first_time = GL_FALSE; + + brw_validate_state(brw); + + /* Various fallback checks: */ + if (brw->intel.Fallback) + goto out; + + /* Check that we can fit our state in with our existing batchbuffer, or + * flush otherwise. + */ + if (dri_bufmgr_check_aperture_space(brw->state.validated_bos, + brw->state.validated_bo_count)) { + static GLboolean warned; + intel_batchbuffer_flush(intel->batch); + + /* Validate the state after we flushed the batch (which would have + * changed the set of dirty state). If we still fail to + * check_aperture, warn of what's happening, but attempt to continue + * on since it may succeed anyway, and the user would probably rather + * see a failure and a warning than a fallback. + */ + brw_validate_state(brw); + if (!warned && + dri_bufmgr_check_aperture_space(brw->state.validated_bos, + brw->state.validated_bo_count)) { + warn = GL_TRUE; + warned = GL_TRUE; + } + } + + brw_upload_state(brw); + } + + brw_emit_prim(brw, &prim[i], hw_prim); + + retval = GL_TRUE; + } + + if (intel->always_flush_batch) + intel_batchbuffer_flush(intel->batch); + out: + UNLOCK_HARDWARE(intel); + + brw_state_cache_check_size(brw); + + if (warn) + fprintf(stderr, "i965: Single primitive emit potentially exceeded " + "available aperture space\n"); + + if (!retval) + DBG("%s failed\n", __FUNCTION__); + + return retval; +} + +void brw_draw_prims( GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prim, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index ) +{ + GLboolean retval; + + if (!vbo_all_varyings_in_vbos(arrays)) { + if (!index_bounds_valid) + vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + + /* Decide if we want to rebase. If so we end up recursing once + * only into this function. + */ + if (min_index != 0) { + vbo_rebase_prims(ctx, arrays, + prim, nr_prims, + ib, min_index, max_index, + brw_draw_prims ); + return; + } + } + + /* Make a first attempt at drawing: + */ + retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + + /* Otherwise, we really are out of memory. Pass the drawing + * command to the software tnl module and which will in turn call + * swrast to do the drawing. + */ + if (!retval) { + _swsetup_Wakeup(ctx); + _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + } + +} + +void brw_draw_init( struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct vbo_context *vbo = vbo_context(ctx); + + /* Register our drawing function: + */ + vbo->draw_prims = brw_draw_prims; +} + +void brw_draw_destroy( struct brw_context *brw ) +{ + int i; + + if (brw->vb.upload.bo != NULL) { + dri_bo_unreference(brw->vb.upload.bo); + brw->vb.upload.bo = NULL; + } + + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + dri_bo_unreference(brw->vb.inputs[i].bo); + brw->vb.inputs[i].bo = NULL; + } + + dri_bo_unreference(brw->ib.bo); + brw->ib.bo = NULL; +} diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h new file mode 100644 index 0000000000..2a14db217f --- /dev/null +++ b/src/gallium/drivers/i965/brw_draw.h @@ -0,0 +1,54 @@ + /************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_DRAW_H +#define BRW_DRAW_H + +#include "main/mtypes.h" /* for GLcontext... */ +#include "vbo/vbo.h" + +struct brw_context; + + +void brw_draw_prims( GLcontext *ctx, + const struct gl_client_array *arrays[], + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLboolean index_bounds_valid, + GLuint min_index, + GLuint max_index ); + +void brw_draw_init( struct brw_context *brw ); +void brw_draw_destroy( struct brw_context *brw ); + +/* brw_draw_current.c + */ +void brw_init_current_values(GLcontext *ctx, + struct gl_client_array *arrays); + +#endif diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c new file mode 100644 index 0000000000..a3ff6c58d8 --- /dev/null +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -0,0 +1,742 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/glheader.h" +#include "main/bufferobj.h" +#include "main/context.h" +#include "main/state.h" +#include "main/api_validate.h" +#include "main/enums.h" + +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_fallback.h" + +#include "intel_batchbuffer.h" +#include "intel_buffer_objects.h" +#include "intel_tex.h" + +static GLuint double_types[5] = { + 0, + BRW_SURFACEFORMAT_R64_FLOAT, + BRW_SURFACEFORMAT_R64G64_FLOAT, + BRW_SURFACEFORMAT_R64G64B64_FLOAT, + BRW_SURFACEFORMAT_R64G64B64A64_FLOAT +}; + +static GLuint float_types[5] = { + 0, + BRW_SURFACEFORMAT_R32_FLOAT, + BRW_SURFACEFORMAT_R32G32_FLOAT, + BRW_SURFACEFORMAT_R32G32B32_FLOAT, + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT +}; + +static GLuint uint_types_norm[5] = { + 0, + BRW_SURFACEFORMAT_R32_UNORM, + BRW_SURFACEFORMAT_R32G32_UNORM, + BRW_SURFACEFORMAT_R32G32B32_UNORM, + BRW_SURFACEFORMAT_R32G32B32A32_UNORM +}; + +static GLuint uint_types_scale[5] = { + 0, + BRW_SURFACEFORMAT_R32_USCALED, + BRW_SURFACEFORMAT_R32G32_USCALED, + BRW_SURFACEFORMAT_R32G32B32_USCALED, + BRW_SURFACEFORMAT_R32G32B32A32_USCALED +}; + +static GLuint int_types_norm[5] = { + 0, + BRW_SURFACEFORMAT_R32_SNORM, + BRW_SURFACEFORMAT_R32G32_SNORM, + BRW_SURFACEFORMAT_R32G32B32_SNORM, + BRW_SURFACEFORMAT_R32G32B32A32_SNORM +}; + +static GLuint int_types_scale[5] = { + 0, + BRW_SURFACEFORMAT_R32_SSCALED, + BRW_SURFACEFORMAT_R32G32_SSCALED, + BRW_SURFACEFORMAT_R32G32B32_SSCALED, + BRW_SURFACEFORMAT_R32G32B32A32_SSCALED +}; + +static GLuint ushort_types_norm[5] = { + 0, + BRW_SURFACEFORMAT_R16_UNORM, + BRW_SURFACEFORMAT_R16G16_UNORM, + BRW_SURFACEFORMAT_R16G16B16_UNORM, + BRW_SURFACEFORMAT_R16G16B16A16_UNORM +}; + +static GLuint ushort_types_scale[5] = { + 0, + BRW_SURFACEFORMAT_R16_USCALED, + BRW_SURFACEFORMAT_R16G16_USCALED, + BRW_SURFACEFORMAT_R16G16B16_USCALED, + BRW_SURFACEFORMAT_R16G16B16A16_USCALED +}; + +static GLuint short_types_norm[5] = { + 0, + BRW_SURFACEFORMAT_R16_SNORM, + BRW_SURFACEFORMAT_R16G16_SNORM, + BRW_SURFACEFORMAT_R16G16B16_SNORM, + BRW_SURFACEFORMAT_R16G16B16A16_SNORM +}; + +static GLuint short_types_scale[5] = { + 0, + BRW_SURFACEFORMAT_R16_SSCALED, + BRW_SURFACEFORMAT_R16G16_SSCALED, + BRW_SURFACEFORMAT_R16G16B16_SSCALED, + BRW_SURFACEFORMAT_R16G16B16A16_SSCALED +}; + +static GLuint ubyte_types_norm[5] = { + 0, + BRW_SURFACEFORMAT_R8_UNORM, + BRW_SURFACEFORMAT_R8G8_UNORM, + BRW_SURFACEFORMAT_R8G8B8_UNORM, + BRW_SURFACEFORMAT_R8G8B8A8_UNORM +}; + +static GLuint ubyte_types_scale[5] = { + 0, + BRW_SURFACEFORMAT_R8_USCALED, + BRW_SURFACEFORMAT_R8G8_USCALED, + BRW_SURFACEFORMAT_R8G8B8_USCALED, + BRW_SURFACEFORMAT_R8G8B8A8_USCALED +}; + +static GLuint byte_types_norm[5] = { + 0, + BRW_SURFACEFORMAT_R8_SNORM, + BRW_SURFACEFORMAT_R8G8_SNORM, + BRW_SURFACEFORMAT_R8G8B8_SNORM, + BRW_SURFACEFORMAT_R8G8B8A8_SNORM +}; + +static GLuint byte_types_scale[5] = { + 0, + BRW_SURFACEFORMAT_R8_SSCALED, + BRW_SURFACEFORMAT_R8G8_SSCALED, + BRW_SURFACEFORMAT_R8G8B8_SSCALED, + BRW_SURFACEFORMAT_R8G8B8A8_SSCALED +}; + + +/** + * Given vertex array type/size/format/normalized info, return + * the appopriate hardware surface type. + * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. + */ +static GLuint get_surface_type( GLenum type, GLuint size, + GLenum format, GLboolean normalized ) +{ + if (INTEL_DEBUG & DEBUG_VERTS) + _mesa_printf("type %s size %d normalized %d\n", + _mesa_lookup_enum_by_nr(type), size, normalized); + + if (normalized) { + switch (type) { + case GL_DOUBLE: return double_types[size]; + case GL_FLOAT: return float_types[size]; + case GL_INT: return int_types_norm[size]; + case GL_SHORT: return short_types_norm[size]; + case GL_BYTE: return byte_types_norm[size]; + case GL_UNSIGNED_INT: return uint_types_norm[size]; + case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; + case GL_UNSIGNED_BYTE: + if (format == GL_BGRA) { + /* See GL_EXT_vertex_array_bgra */ + assert(size == 4); + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + } + else { + return ubyte_types_norm[size]; + } + default: assert(0); return 0; + } + } + else { + assert(format == GL_RGBA); /* sanity check */ + switch (type) { + case GL_DOUBLE: return double_types[size]; + case GL_FLOAT: return float_types[size]; + case GL_INT: return int_types_scale[size]; + case GL_SHORT: return short_types_scale[size]; + case GL_BYTE: return byte_types_scale[size]; + case GL_UNSIGNED_INT: return uint_types_scale[size]; + case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; + case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; + default: assert(0); return 0; + } + } +} + + +static GLuint get_size( GLenum type ) +{ + switch (type) { + case GL_DOUBLE: return sizeof(GLdouble); + case GL_FLOAT: return sizeof(GLfloat); + case GL_INT: return sizeof(GLint); + case GL_SHORT: return sizeof(GLshort); + case GL_BYTE: return sizeof(GLbyte); + case GL_UNSIGNED_INT: return sizeof(GLuint); + case GL_UNSIGNED_SHORT: return sizeof(GLushort); + case GL_UNSIGNED_BYTE: return sizeof(GLubyte); + default: return 0; + } +} + +static GLuint get_index_type(GLenum type) +{ + switch (type) { + case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE; + case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD; + case GL_UNSIGNED_INT: return BRW_INDEX_DWORD; + default: assert(0); return 0; + } +} + +static void wrap_buffers( struct brw_context *brw, + GLuint size ) +{ + if (size < BRW_UPLOAD_INIT_SIZE) + size = BRW_UPLOAD_INIT_SIZE; + + brw->vb.upload.offset = 0; + + if (brw->vb.upload.bo != NULL) + dri_bo_unreference(brw->vb.upload.bo); + brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", + size, 1); + + /* Set the internal VBO\ to no-backing-store. We only use them as a + * temporary within a brw_try_draw_prims while the lock is held. + */ + /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH + FAKE TO PUSH THIS STUFF */ +// if (!brw->intel.ttm) +// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL); +} + +static void get_space( struct brw_context *brw, + GLuint size, + dri_bo **bo_return, + GLuint *offset_return ) +{ + size = ALIGN(size, 64); + + if (brw->vb.upload.bo == NULL || + brw->vb.upload.offset + size > brw->vb.upload.bo->size) { + wrap_buffers(brw, size); + } + + assert(*bo_return == NULL); + dri_bo_reference(brw->vb.upload.bo); + *bo_return = brw->vb.upload.bo; + *offset_return = brw->vb.upload.offset; + brw->vb.upload.offset += size; +} + +static void +copy_array_to_vbo_array( struct brw_context *brw, + struct brw_vertex_element *element, + GLuint dst_stride) +{ + struct intel_context *intel = &brw->intel; + GLuint size = element->count * dst_stride; + + get_space(brw, size, &element->bo, &element->offset); + + if (element->glarray->StrideB == 0) { + assert(element->count == 1); + element->stride = 0; + } else { + element->stride = dst_stride; + } + + if (dst_stride == element->glarray->StrideB) { + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(element->bo); + memcpy((char *)element->bo->virtual + element->offset, + element->glarray->Ptr, size); + drm_intel_gem_bo_unmap_gtt(element->bo); + } else { + dri_bo_subdata(element->bo, + element->offset, + size, + element->glarray->Ptr); + } + } else { + char *dest; + const unsigned char *src = element->glarray->Ptr; + int i; + + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(element->bo); + dest = element->bo->virtual; + dest += element->offset; + + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; + } + + drm_intel_gem_bo_unmap_gtt(element->bo); + } else { + void *data; + + data = _mesa_malloc(dst_stride * element->count); + dest = data; + for (i = 0; i < element->count; i++) { + memcpy(dest, src, dst_stride); + src += element->glarray->StrideB; + dest += dst_stride; + } + + dri_bo_subdata(element->bo, + element->offset, + size, + data); + + _mesa_free(data); + } + } +} + +static void brw_prepare_vertices(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = intel_context(ctx); + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; + GLuint i; + const unsigned char *ptr = NULL; + GLuint interleave = 0; + unsigned int min_index = brw->vb.min_index; + unsigned int max_index = brw->vb.max_index; + + struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; + GLuint nr_uploads = 0; + + /* First build an array of pointers to ve's in vb.inputs_read + */ + if (0) + _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); + + /* Accumulate the list of enabled arrays. */ + brw->vb.nr_enabled = 0; + while (vs_inputs) { + GLuint i = _mesa_ffsll(vs_inputs) - 1; + struct brw_vertex_element *input = &brw->vb.inputs[i]; + + vs_inputs &= ~(1 << i); + brw->vb.enabled[brw->vb.nr_enabled++] = input; + } + + /* XXX: In the rare cases where this happens we fallback all + * the way to software rasterization, although a tnl fallback + * would be sufficient. I don't know of *any* real world + * cases with > 17 vertex attributes enabled, so it probably + * isn't an issue at this point. + */ + if (brw->vb.nr_enabled >= BRW_VEP_MAX) { + intel->Fallback = 1; + return; + } + + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; + + input->element_size = get_size(input->glarray->Type) * input->glarray->Size; + + if (_mesa_is_bufferobj(input->glarray->BufferObj)) { + struct intel_buffer_object *intel_buffer = + intel_buffer_object(input->glarray->BufferObj); + + /* Named buffer object: Just reference its contents directly. */ + dri_bo_unreference(input->bo); + input->bo = intel_bufferobj_buffer(intel, intel_buffer, + INTEL_READ); + dri_bo_reference(input->bo); + input->offset = (unsigned long)input->glarray->Ptr; + input->stride = input->glarray->StrideB; + input->count = input->glarray->_MaxElement; + + /* This is a common place to reach if the user mistakenly supplies + * a pointer in place of a VBO offset. If we just let it go through, + * we may end up dereferencing a pointer beyond the bounds of the + * GTT. We would hope that the VBO's max_index would save us, but + * Mesa appears to hand us min/max values not clipped to the + * array object's _MaxElement, and _MaxElement frequently appears + * to be wrong anyway. + * + * The VBO spec allows application termination in this case, and it's + * probably a service to the poor programmer to do so rather than + * trying to just not render. + */ + assert(input->offset < input->bo->size); + } else { + input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; + if (input->bo != NULL) { + /* Already-uploaded vertex data is present from a previous + * prepare_vertices, but we had to re-validate state due to + * check_aperture failing and a new batch being produced. + */ + continue; + } + + /* Queue the buffer object up to be uploaded in the next pass, + * when we've decided if we're doing interleaved or not. + */ + if (input->attrib == VERT_ATTRIB_POS) { + /* Position array not properly enabled: + */ + if (input->glarray->StrideB == 0) { + intel->Fallback = 1; + return; + } + + interleave = input->glarray->StrideB; + ptr = input->glarray->Ptr; + } + else if (interleave != input->glarray->StrideB || + (const unsigned char *)input->glarray->Ptr - ptr < 0 || + (const unsigned char *)input->glarray->Ptr - ptr > interleave) + { + interleave = 0; + } + + upload[nr_uploads++] = input; + + /* We rebase drawing to start at element zero only when + * varyings are not in vbos, which means we can end up + * uploading non-varying arrays (stride != 0) when min_index + * is zero. This doesn't matter as the amount to upload is + * the same for these arrays whether the draw call is rebased + * or not - we just have to upload the one element. + */ + assert(min_index == 0 || input->glarray->StrideB == 0); + } + } + + /* Handle any arrays to be uploaded. */ + if (nr_uploads > 1 && interleave && interleave <= 256) { + /* All uploads are interleaved, so upload the arrays together as + * interleaved. First, upload the contents and set up upload[0]. + */ + copy_array_to_vbo_array(brw, upload[0], interleave); + + for (i = 1; i < nr_uploads; i++) { + /* Then, just point upload[i] at upload[0]'s buffer. */ + upload[i]->stride = interleave; + upload[i]->offset = upload[0]->offset + + ((const unsigned char *)upload[i]->glarray->Ptr - ptr); + upload[i]->bo = upload[0]->bo; + dri_bo_reference(upload[i]->bo); + } + } + else { + /* Upload non-interleaved arrays */ + for (i = 0; i < nr_uploads; i++) { + copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); + } + } + + brw_prepare_query_begin(brw); + + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; + + brw_add_validated_bo(brw, input->bo); + } +} + +static void brw_emit_vertices(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = intel_context(ctx); + GLuint i; + + brw_emit_query_begin(brw); + + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), emit a single pad + * VERTEX_ELEMENT struct and bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (brw->vb.nr_enabled == 0) { + BEGIN_BATCH(3, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); + OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | + (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | + (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); + ADVANCE_BATCH(); + return; + } + + /* Now emit VB and VEP state packets. + * + * This still defines a hardware VB for each input, even if they + * are interleaved or from the same VBO. TBD if this makes a + * performance difference. + */ + BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_BUFFER << 16) | + ((1 + brw->vb.nr_enabled * 4) - 2)); + + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; + + OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | + BRW_VB0_ACCESS_VERTEXDATA | + (input->stride << BRW_VB0_PITCH_SHIFT)); + OUT_RELOC(input->bo, + I915_GEM_DOMAIN_VERTEX, 0, + input->offset); + if (BRW_IS_IGDNG(brw)) { + if (input->stride) { + OUT_RELOC(input->bo, + I915_GEM_DOMAIN_VERTEX, 0, + input->offset + input->stride * input->count - 1); + } else { + assert(input->count == 1); + OUT_RELOC(input->bo, + I915_GEM_DOMAIN_VERTEX, 0, + input->offset + input->element_size - 1); + } + } else + OUT_BATCH(input->stride ? input->count : 0); + OUT_BATCH(0); /* Instance data step rate */ + } + ADVANCE_BATCH(); + + BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2)); + for (i = 0; i < brw->vb.nr_enabled; i++) { + struct brw_vertex_element *input = brw->vb.enabled[i]; + uint32_t format = get_surface_type(input->glarray->Type, + input->glarray->Size, + input->glarray->Format, + input->glarray->Normalized); + uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; + uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; + + switch (input->glarray->Size) { + case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; + case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; + case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; + case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT; + break; + } + + OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | + BRW_VE0_VALID | + (format << BRW_VE0_FORMAT_SHIFT) | + (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + + if (BRW_IS_IGDNG(brw)) + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT)); + else + OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | + (comp1 << BRW_VE1_COMPONENT_1_SHIFT) | + (comp2 << BRW_VE1_COMPONENT_2_SHIFT) | + (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | + ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); + } + ADVANCE_BATCH(); +} + +const struct brw_tracked_state brw_vertices = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES, + .cache = 0, + }, + .prepare = brw_prepare_vertices, + .emit = brw_emit_vertices, +}; + +static void brw_prepare_indices(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + const struct _mesa_index_buffer *index_buffer = brw->ib.ib; + GLuint ib_size; + dri_bo *bo = NULL; + struct gl_buffer_object *bufferobj; + GLuint offset; + GLuint ib_type_size; + + if (index_buffer == NULL) + return; + + ib_type_size = get_size(index_buffer->type); + ib_size = ib_type_size * index_buffer->count; + bufferobj = index_buffer->obj;; + + /* Turn into a proper VBO: + */ + if (!_mesa_is_bufferobj(bufferobj)) { + brw->ib.start_vertex_offset = 0; + + /* Get new bufferobj, offset: + */ + get_space(brw, ib_size, &bo, &offset); + + /* Straight upload + */ + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(bo); + memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); + drm_intel_gem_bo_unmap_gtt(bo); + } else { + dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } + } else { + offset = (GLuint) (unsigned long) index_buffer->ptr; + brw->ib.start_vertex_offset = 0; + + /* If the index buffer isn't aligned to its element size, we have to + * rebase it into a temporary. + */ + if ((get_size(index_buffer->type) - 1) & offset) { + GLubyte *map = ctx->Driver.MapBuffer(ctx, + GL_ELEMENT_ARRAY_BUFFER_ARB, + GL_DYNAMIC_DRAW_ARB, + bufferobj); + map += offset; + + get_space(brw, ib_size, &bo, &offset); + + dri_bo_subdata(bo, offset, ib_size, map); + + ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); + } else { + bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), + INTEL_READ); + dri_bo_reference(bo); + + /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading + * the index buffer state when we're just moving the start index + * of our drawing. + */ + brw->ib.start_vertex_offset = offset / ib_type_size; + offset = 0; + ib_size = bo->size; + } + } + + if (brw->ib.bo != bo || + brw->ib.offset != offset || + brw->ib.size != ib_size) + { + drm_intel_bo_unreference(brw->ib.bo); + brw->ib.bo = bo; + brw->ib.offset = offset; + brw->ib.size = ib_size; + + brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; + } else { + drm_intel_bo_unreference(bo); + } + + brw_add_validated_bo(brw, brw->ib.bo); +} + +const struct brw_tracked_state brw_indices = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_INDICES, + .cache = 0, + }, + .prepare = brw_prepare_indices, +}; + +static void brw_emit_index_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + const struct _mesa_index_buffer *index_buffer = brw->ib.ib; + + if (index_buffer == NULL) + return; + + /* Emit the indexbuffer packet: + */ + { + struct brw_indexbuffer ib; + + memset(&ib, 0, sizeof(ib)); + + ib.header.bits.opcode = CMD_INDEX_BUFFER; + ib.header.bits.length = sizeof(ib)/4 - 2; + ib.header.bits.index_format = get_index_type(index_buffer->type); + ib.header.bits.cut_index_enable = 0; + + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH( ib.header.dword ); + OUT_RELOC(brw->ib.bo, + I915_GEM_DOMAIN_VERTEX, 0, + brw->ib.offset); + OUT_RELOC(brw->ib.bo, + I915_GEM_DOMAIN_VERTEX, 0, + brw->ib.offset + brw->ib.size - 1); + OUT_BATCH( 0 ); + ADVANCE_BATCH(); + } +} + +const struct brw_tracked_state brw_index_buffer = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER, + .cache = 0, + }, + .emit = brw_emit_index_buffer, +}; diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c new file mode 100644 index 0000000000..1df561386e --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu.c @@ -0,0 +1,254 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_push_insn_state(p); + brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); + p->flag_value = value; + brw_pop_insn_state(p); + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_predicate_control( struct brw_compile *p, GLuint pc ) +{ + p->current->header.predicate_control = pc; +} + +void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ) +{ + p->current->header.destreg__conditionalmod = conditional; +} + +void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ) +{ + p->current->header.access_mode = access_mode; +} + +void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control ) +{ + p->current->header.compression_control = compression_control; +} + +void brw_set_mask_control( struct brw_compile *p, GLuint value ) +{ + p->current->header.mask_control = value; +} + +void brw_set_saturate( struct brw_compile *p, GLuint value ) +{ + p->current->header.saturate = value; +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; +} + + +/*********************************************************************** + */ +void brw_init_compile( struct brw_context *brw, struct brw_compile *p ) +{ + p->brw = brw; + p->nr_insn = 0; + p->current = p->stack; + memset(p->current, 0, sizeof(p->current[0])); + + /* Some defaults? + */ + brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate(p, 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value(p, 0xff); +} + + +const GLuint *brw_get_program( struct brw_compile *p, + GLuint *sz ) +{ + GLuint i; + + for (i = 0; i < 8; i++) + brw_NOP(p); + + *sz = p->nr_insn * sizeof(struct brw_instruction); + return (const GLuint *)p->store; +} + + + +/** + * Subroutine calls require special attention. + * Mesa instructions may be expanded into multiple hardware instructions + * so the prog_instruction::BranchTarget field can't be used as an index + * into the hardware instructions. + * + * The BranchTarget field isn't needed, however. Mesa's GLSL compiler + * emits CAL and BGNSUB instructions with labels that can be used to map + * subroutine calls to actual subroutine code blocks. + * + * The structures and function here implement patching of CAL instructions + * so they jump to the right subroutine code... + */ + + +/** + * For each OPCODE_BGNSUB we create one of these. + */ +struct brw_glsl_label +{ + const char *name; /**< the label string */ + GLuint position; /**< the position of the brw instruction for this label */ + struct brw_glsl_label *next; /**< next in linked list */ +}; + + +/** + * For each OPCODE_CAL we create one of these. + */ +struct brw_glsl_call +{ + GLuint call_inst_pos; /**< location of the CAL instruction */ + const char *sub_name; /**< name of subroutine to call */ + struct brw_glsl_call *next; /**< next in linked list */ +}; + + +/** + * Called for each OPCODE_BGNSUB. + */ +void +brw_save_label(struct brw_compile *c, const char *name, GLuint position) +{ + struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label); + label->name = name; + label->position = position; + label->next = c->first_label; + c->first_label = label; +} + + +/** + * Called for each OPCODE_CAL. + */ +void +brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos) +{ + struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call); + call->call_inst_pos = call_pos; + call->sub_name = name; + call->next = c->first_call; + c->first_call = call; +} + + +/** + * Lookup a label, return label's position/offset. + */ +static GLuint +brw_lookup_label(struct brw_compile *c, const char *name) +{ + const struct brw_glsl_label *label; + for (label = c->first_label; label; label = label->next) { + if (strcmp(name, label->name) == 0) { + return label->position; + } + } + abort(); /* should never happen */ + return ~0; +} + + +/** + * When we're done generating code, this function is called to resolve + * subroutine calls. + */ +void +brw_resolve_cals(struct brw_compile *c) +{ + const struct brw_glsl_call *call; + + for (call = c->first_call; call; call = call->next) { + const GLuint sub_loc = brw_lookup_label(c, call->sub_name); + struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos]; + struct brw_instruction *brw_sub_inst = &c->store[sub_loc]; + GLint offset = brw_sub_inst - brw_call_inst; + + /* patch brw_inst1 to point to brw_inst2 */ + brw_set_src1(brw_call_inst, brw_imm_d(offset * 16)); + } + + /* free linked list of calls */ + { + struct brw_glsl_call *call, *next; + for (call = c->first_call; call; call = next) { + next = call->next; + _mesa_free(call); + } + c->first_call = NULL; + } + + /* free linked list of labels */ + { + struct brw_glsl_label *label, *next; + for (label = c->first_label; label; label = next) { + next = label->next; + _mesa_free(label); + } + c->first_label = NULL; + } +} diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h new file mode 100644 index 0000000000..30603bdd0e --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu.h @@ -0,0 +1,968 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include "brw_structs.h" +#include "brw_defines.h" +#include "shader/prog_instruction.h" + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) + + +#define REG_SIZE (8*4) + + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg +{ + GLuint type:4; + GLuint file:2; + GLuint nr:8; + GLuint subnr:5; /* :1 in align16 */ + GLuint negate:1; /* source only */ + GLuint abs:1; /* source only */ + GLuint vstride:4; /* source only */ + GLuint width:3; /* src only, align1 only */ + GLuint hstride:2; /* align1 only */ + GLuint address_mode:1; /* relative addressing, hopefully! */ + GLuint pad0:1; + + union { + struct { + GLuint swizzle:8; /* src only, align16 only */ + GLuint writemask:4; /* dest only, align16 only */ + GLint indirect_offset:10; /* relative addressing offset */ + GLuint pad1:10; /* two dwords total */ + } bits; + + GLfloat f; + GLint d; + GLuint ud; + } dw1; +}; + + +struct brw_indirect { + GLuint addr_subnr:4; + GLint addr_offset:10; + GLuint pad:18; +}; + + +struct brw_glsl_label; +struct brw_glsl_call; + + + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 10000 + +struct brw_compile { + struct brw_instruction store[BRW_EU_MAX_INSN]; + GLuint nr_insn; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + GLuint flag_value; + GLboolean single_program_flow; + struct brw_context *brw; + + struct brw_glsl_label *first_label; /**< linked list of labels */ + struct brw_glsl_call *first_call; /**< linked list of CALs */ +}; + + +void +brw_save_label(struct brw_compile *c, const char *name, GLuint position); + +void +brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos); + +void +brw_resolve_cals(struct brw_compile *c); + + + +static INLINE int type_sz( GLuint type ) +{ + switch( type ) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +/** + * Construct a brw_reg. + * \param file one of the BRW_x_REGISTER_FILE values + * \param nr register number/index + * \param subnr register sub number + * \param type one of BRW_REGISTER_TYPE_x + * \param vstride one of BRW_VERTICAL_STRIDE_x + * \param width one of BRW_WIDTH_x + * \param hstride one of BRW_HORIZONTAL_STRIDE_x + * \param swizzle one of BRW_SWIZZLE_x + * \param writemask WRITEMASK_X/Y/Z/W bitfield + */ +static INLINE struct brw_reg brw_reg( GLuint file, + GLuint nr, + GLuint subnr, + GLuint type, + GLuint vstride, + GLuint width, + GLuint hstride, + GLuint swizzle, + GLuint writemask ) +{ + struct brw_reg reg; + if (type == BRW_GENERAL_REGISTER_FILE) + assert(nr < BRW_MAX_GRF); + else if (type == BRW_MESSAGE_REGISTER_FILE) + assert(nr < BRW_MAX_MRF); + else if (type == BRW_ARCHITECTURE_REGISTER_FILE) + assert(nr <= BRW_ARF_IP); + + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + return reg; +} + +/** Construct float[16] register */ +static INLINE struct brw_reg brw_vec16_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[8] register */ +static INLINE struct brw_reg brw_vec8_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[4] register */ +static INLINE struct brw_reg brw_vec4_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + WRITEMASK_XYZW); +} + +/** Construct float[2] register */ +static INLINE struct brw_reg brw_vec2_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + WRITEMASK_XY); +} + +/** Construct float[1] register */ +static INLINE struct brw_reg brw_vec1_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); +} + + +static INLINE struct brw_reg retype( struct brw_reg reg, + GLuint type ) +{ + reg.type = type; + return reg; +} + +static INLINE struct brw_reg suboffset( struct brw_reg reg, + GLuint delta ) +{ + reg.subnr += delta * type_sz(reg.type); + return reg; +} + + +static INLINE struct brw_reg offset( struct brw_reg reg, + GLuint delta ) +{ + reg.nr += delta; + return reg; +} + + +static INLINE struct brw_reg byte_offset( struct brw_reg reg, + GLuint bytes ) +{ + GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + + +/** Construct unsigned word[16] register */ +static INLINE struct brw_reg brw_uw16_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[8] register */ +static INLINE struct brw_reg brw_uw8_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +/** Construct unsigned word[1] register */ +static INLINE struct brw_reg brw_uw1_reg( GLuint file, + GLuint nr, + GLuint subnr ) +{ + return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static INLINE struct brw_reg brw_imm_reg( GLuint type ) +{ + return brw_reg( BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +/** Construct float immediate register */ +static INLINE struct brw_reg brw_imm_f( GLfloat f ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +/** Construct integer immediate register */ +static INLINE struct brw_reg brw_imm_d( GLint d ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +/** Construct uint immediate register */ +static INLINE struct brw_reg brw_imm_ud( GLuint ud ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +/** Construct ushort immediate register */ +static INLINE struct brw_reg brw_imm_uw( GLushort uw ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw | (uw << 16); + return imm; +} + +/** Construct short immediate register */ +static INLINE struct brw_reg brw_imm_w( GLshort w ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w | (w << 16); + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/** Construct vector of eight signed half-byte values */ +static INLINE struct brw_reg brw_imm_v( GLuint v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/** Construct vector of four 8-bit float values */ +static INLINE struct brw_reg brw_imm_vf( GLuint v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static INLINE struct brw_reg brw_imm_vf4( GLuint v0, + GLuint v1, + GLuint v2, + GLuint v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); + return imm; +} + + +static INLINE struct brw_reg brw_address( struct brw_reg reg ) +{ + return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + +/** Construct float[1] general-purpose register */ +static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr ) +{ + return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[2] general-purpose register */ +static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr ) +{ + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[4] general-purpose register */ +static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr ) +{ + return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +/** Construct float[8] general-purpose register */ +static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr ) +{ + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr ) +{ + return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr ) +{ + return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +/** Construct null register (usually used for setting condition codes) */ +static INLINE struct brw_reg brw_null_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); +} + +static INLINE struct brw_reg brw_address_reg( GLuint subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static INLINE struct brw_reg brw_ip_reg( void ) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + WRITEMASK_XYZW); /* NOTE! */ +} + +static INLINE struct brw_reg brw_acc_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); +} + + +static INLINE struct brw_reg brw_flag_reg( void ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG, + 0); +} + + +static INLINE struct brw_reg brw_mask_reg( GLuint subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK, + subnr); +} + +static INLINE struct brw_reg brw_message_reg( GLuint nr ) +{ + assert(nr < BRW_MAX_MRF); + return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, + nr, + 0); +} + + + + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static INLINE GLuint cvt( GLuint val ) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static INLINE struct brw_reg stride( struct brw_reg reg, + GLuint vstride, + GLuint width, + GLuint hstride ) +{ + reg.vstride = cvt(vstride); + reg.width = cvt(width) - 1; + reg.hstride = cvt(hstride); + return reg; +} + + +static INLINE struct brw_reg vec16( struct brw_reg reg ) +{ + return stride(reg, 16,16,1); +} + +static INLINE struct brw_reg vec8( struct brw_reg reg ) +{ + return stride(reg, 8,8,1); +} + +static INLINE struct brw_reg vec4( struct brw_reg reg ) +{ + return stride(reg, 4,4,1); +} + +static INLINE struct brw_reg vec2( struct brw_reg reg ) +{ + return stride(reg, 2,2,1); +} + +static INLINE struct brw_reg vec1( struct brw_reg reg ) +{ + return stride(reg, 0,1,0); +} + + +static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt ) +{ + return vec1(suboffset(reg, elt)); +} + +static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt ) +{ + return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + + +static INLINE struct brw_reg brw_swizzle( struct brw_reg reg, + GLuint x, + GLuint y, + GLuint z, + GLuint w) +{ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + return reg; +} + + +static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg, + GLuint x ) +{ + return brw_swizzle(reg, x, x, x, x); +} + +static INLINE struct brw_reg brw_writemask( struct brw_reg reg, + GLuint mask ) +{ + reg.dw1.bits.writemask &= mask; + return reg; +} + +static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg, + GLuint mask ) +{ + reg.dw1.bits.writemask = mask; + return reg; +} + +static INLINE struct brw_reg negate( struct brw_reg reg ) +{ + reg.negate ^= 1; + return reg; +} + +static INLINE struct brw_reg brw_abs( struct brw_reg reg ) +{ + reg.abs = 1; + return reg; +} + +/*********************************************************************** + */ +static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr, + GLint offset ) +{ + struct brw_reg reg = brw_vec4_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr, + GLint offset ) +{ + struct brw_reg reg = brw_vec1_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset) +{ + return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset) +{ + return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); +} + +static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr) +{ + return brw_address_reg(ptr.addr_subnr); +} + +static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset ) +{ + ptr.addr_offset += offset; + return ptr; +} + +static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset ) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +/** Do two brw_regs refer to the same register? */ +static INLINE GLboolean +brw_same_reg(struct brw_reg r1, struct brw_reg r2) +{ + return r1.file == r2.file && r1.nr == r2.nr; +} + +static INLINE struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +void brw_pop_insn_state( struct brw_compile *p ); +void brw_push_insn_state( struct brw_compile *p ); +void brw_set_mask_control( struct brw_compile *p, GLuint value ); +void brw_set_saturate( struct brw_compile *p, GLuint value ); +void brw_set_access_mode( struct brw_compile *p, GLuint access_mode ); +void brw_set_compression_control( struct brw_compile *p, GLboolean control ); +void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value ); +void brw_set_predicate_control( struct brw_compile *p, GLuint pc ); +void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ); + +void brw_init_compile( struct brw_context *, struct brw_compile *p ); +const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz ); + + +/* Helpers for regular instructions: + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0); + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1); + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(JMPI) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU1(RNDZ) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + +#undef ALU1 +#undef ALU2 + + + +/* Helpers for SEND instruction: + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle); + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle); + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + GLboolean eot); + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint sampler, + GLuint writemask, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot, + GLuint header_present, + GLuint simd_mode); + +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint precision ); + +void brw_math( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint data_type, + GLuint precision ); + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + GLuint scratch_offset ); + +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index ); + +void brw_dp_READ_4_vs( struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index ); + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + GLuint scratch_offset ); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, + GLuint execute_size); + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn); + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *if_or_else_insn); + + +/* DO/WHILE loops: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, + GLuint execute_size); + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *patch_insn); + +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); +/* Forward jumps: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn); + + + +void brw_NOP(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + GLuint conditional, + struct brw_reg src0, + struct brw_reg src1); + +void brw_print_reg( struct brw_reg reg ); + + +/*********************************************************************** + * brw_eu_util.c: + */ + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + GLuint count); + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + GLuint count); + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count); + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count); + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src); + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ); +#endif diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c new file mode 100644 index 0000000000..29f3f6d02f --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu_debug.c @@ -0,0 +1,95 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/mtypes.h" +#include "main/imports.h" +#include "brw_eu.h" + +void brw_print_reg( struct brw_reg hwreg ) +{ + static const char *file[] = { + "arf", + "grf", + "msg", + "imm" + }; + + static const char *type[] = { + "ud", + "d", + "uw", + "w", + "ub", + "vf", + "hf", + "f" + }; + + _mesa_printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); + + if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.nr % 2 == 0 && + hwreg.subnr == 0 && + hwreg.vstride == BRW_VERTICAL_STRIDE_8 && + hwreg.width == BRW_WIDTH_8 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && + hwreg.type == BRW_REGISTER_TYPE_F) { + /* vector register */ + _mesa_printf("vec%d", hwreg.nr); + } + else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.vstride == BRW_VERTICAL_STRIDE_0 && + hwreg.width == BRW_WIDTH_1 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && + hwreg.type == BRW_REGISTER_TYPE_F) { + /* "scalar" register */ + _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + } + else if (hwreg.file == BRW_IMMEDIATE_VALUE) { + _mesa_printf("imm %f", hwreg.dw1.f); + } + else { + _mesa_printf("%s%d.%d<%d;%d,%d>:%s", + file[hwreg.file], + hwreg.nr, + hwreg.subnr / type_sz(hwreg.type), + hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, + 1< + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size( struct brw_instruction *insn, + struct brw_reg reg ) +{ + if (reg.width == BRW_WIDTH_8 && + insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; /* note - definitions are compatible */ +} + + +static void brw_set_dest( struct brw_instruction *insn, + struct brw_reg dest ) +{ + if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(dest.nr < 128); + + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.da1.dest_horiz_stride = dest.hstride; + } + else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + } + } + else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + if (dest.hstride == BRW_HORIZONTAL_STRIDE_0) + dest.hstride = BRW_HORIZONTAL_STRIDE_1; + insn->bits1.ia1.dest_horiz_stride = dest.hstride; + } + else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + } + } + + /* NEW: Set the execution size based on dest.width and + * insn->compression_control: + */ + guess_execution_size(insn, dest); +} + +static void brw_set_src0( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) + assert(reg.nr < 128); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + insn->bits1.da1.src1_reg_file = 0; /* arf */ + insn->bits1.da1.src1_reg_type = reg.type; + } + else + { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } + else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } + else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } + else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } + } + else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + assert(reg.nr < 128); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + + /* Only src1 can be immediate in two-argument instructions. + */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } + else { + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + //assert (reg.file == BRW_GENERAL_REGISTER_FILE); + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } + else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } + } + else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + + + +static void brw_set_math_message( struct brw_context *brw, + struct brw_instruction *insn, + GLuint msg_length, + GLuint response_length, + GLuint function, + GLuint integer_type, + GLboolean low_precision, + GLboolean saturate, + GLuint dataType ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.math_igdng.function = function; + insn->bits3.math_igdng.int_type = integer_type; + insn->bits3.math_igdng.precision = low_precision; + insn->bits3.math_igdng.saturate = saturate; + insn->bits3.math_igdng.data_type = dataType; + insn->bits3.math_igdng.snapshot = 0; + insn->bits3.math_igdng.header_present = 0; + insn->bits3.math_igdng.response_length = response_length; + insn->bits3.math_igdng.msg_length = msg_length; + insn->bits3.math_igdng.end_of_thread = 0; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH; + insn->bits2.send_igdng.end_of_thread = 0; + } else { + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + insn->bits3.math.response_length = response_length; + insn->bits3.math.msg_length = msg_length; + insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; + insn->bits3.math.end_of_thread = 0; + } +} + + +static void brw_set_ff_sync_message( struct brw_context *brw, + struct brw_instruction *insn, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean end_of_thread, + GLboolean complete, + GLuint offset, + GLuint swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.urb_igdng.opcode = 1; + insn->bits3.urb_igdng.offset = offset; + insn->bits3.urb_igdng.swizzle_control = swizzle_control; + insn->bits3.urb_igdng.allocate = allocate; + insn->bits3.urb_igdng.used = used; + insn->bits3.urb_igdng.complete = complete; + insn->bits3.urb_igdng.header_present = 1; + insn->bits3.urb_igdng.response_length = response_length; + insn->bits3.urb_igdng.msg_length = msg_length; + insn->bits3.urb_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; +} + +static void brw_set_urb_message( struct brw_context *brw, + struct brw_instruction *insn, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean end_of_thread, + GLboolean complete, + GLuint offset, + GLuint swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.urb_igdng.opcode = 0; /* ? */ + insn->bits3.urb_igdng.offset = offset; + insn->bits3.urb_igdng.swizzle_control = swizzle_control; + insn->bits3.urb_igdng.allocate = allocate; + insn->bits3.urb_igdng.used = used; /* ? */ + insn->bits3.urb_igdng.complete = complete; + insn->bits3.urb_igdng.header_present = 1; + insn->bits3.urb_igdng.response_length = response_length; + insn->bits3.urb_igdng.msg_length = msg_length; + insn->bits3.urb_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + insn->bits3.urb.response_length = response_length; + insn->bits3.urb.msg_length = msg_length; + insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; + insn->bits3.urb.end_of_thread = end_of_thread; + } +} + +static void brw_set_dp_write_message( struct brw_context *brw, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.dp_write_igdng.binding_table_index = binding_table_index; + insn->bits3.dp_write_igdng.msg_control = msg_control; + insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write_igdng.msg_type = msg_type; + insn->bits3.dp_write_igdng.send_commit_msg = 0; + insn->bits3.dp_write_igdng.header_present = 1; + insn->bits3.dp_write_igdng.response_length = response_length; + insn->bits3.dp_write_igdng.msg_length = msg_length; + insn->bits3.dp_write_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.response_length = response_length; + insn->bits3.dp_write.msg_length = msg_length; + insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits3.dp_write.end_of_thread = end_of_thread; + } +} + +static void brw_set_dp_read_message( struct brw_context *brw, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint target_cache, + GLuint msg_length, + GLuint response_length, + GLuint end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.dp_read_igdng.binding_table_index = binding_table_index; + insn->bits3.dp_read_igdng.msg_control = msg_control; + insn->bits3.dp_read_igdng.msg_type = msg_type; + insn->bits3.dp_read_igdng.target_cache = target_cache; + insn->bits3.dp_read_igdng.header_present = 1; + insn->bits3.dp_read_igdng.response_length = response_length; + insn->bits3.dp_read_igdng.msg_length = msg_length; + insn->bits3.dp_read_igdng.pad1 = 0; + insn->bits3.dp_read_igdng.end_of_thread = end_of_thread; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits2.send_igdng.end_of_thread = end_of_thread; + } else { + insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ + insn->bits3.dp_read.msg_type = msg_type; /*12:13*/ + insn->bits3.dp_read.target_cache = target_cache; /*14:15*/ + insn->bits3.dp_read.response_length = response_length; /*16:19*/ + insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ + insn->bits3.dp_read.pad1 = 0; /*28:30*/ + insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ + } +} + +static void brw_set_sampler_message(struct brw_context *brw, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint sampler, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot, + GLuint header_present, + GLuint simd_mode) +{ + assert(eot == 0); + brw_set_src1(insn, brw_imm_d(0)); + + if (BRW_IS_IGDNG(brw)) { + insn->bits3.sampler_igdng.binding_table_index = binding_table_index; + insn->bits3.sampler_igdng.sampler = sampler; + insn->bits3.sampler_igdng.msg_type = msg_type; + insn->bits3.sampler_igdng.simd_mode = simd_mode; + insn->bits3.sampler_igdng.header_present = header_present; + insn->bits3.sampler_igdng.response_length = response_length; + insn->bits3.sampler_igdng.msg_length = msg_length; + insn->bits3.sampler_igdng.end_of_thread = eot; + insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER; + insn->bits2.send_igdng.end_of_thread = eot; + } else if (BRW_IS_G4X(brw)) { + insn->bits3.sampler_g4x.binding_table_index = binding_table_index; + insn->bits3.sampler_g4x.sampler = sampler; + insn->bits3.sampler_g4x.msg_type = msg_type; + insn->bits3.sampler_g4x.response_length = response_length; + insn->bits3.sampler_g4x.msg_length = msg_length; + insn->bits3.sampler_g4x.end_of_thread = eot; + insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + } else { + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + insn->bits3.sampler.response_length = response_length; + insn->bits3.sampler.msg_length = msg_length; + insn->bits3.sampler.end_of_thread = eot; + insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; + } +} + + + +static struct brw_instruction *next_insn( struct brw_compile *p, + GLuint opcode ) +{ + struct brw_instruction *insn; + + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + + insn = &p->store[p->nr_insn++]; + memcpy(insn, p->current, sizeof(*insn)); + + /* Reset this one-shot flag: + */ + + if (p->current->header.destreg__conditionalmod) { + p->current->header.destreg__conditionalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + + +static struct brw_instruction *brw_alu1( struct brw_compile *p, + GLuint opcode, + struct brw_reg dest, + struct brw_reg src ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + return insn; +} + +static struct brw_instruction *brw_alu2(struct brw_compile *p, + GLuint opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + return insn; +} + + +/*********************************************************************** + * Convenience routines. + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU1(RNDZ) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + + + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_ud(0x0)); +} + + + + + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + insn->header.execution_size = 1; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_DISABLE; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + * + * No attempt is made to deal with stack overflow (14 elements?). + */ +struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + assert(execute_size == BRW_EXECUTE_1); + + insn = next_insn(p, BRW_OPCODE_ADD); + insn->header.predicate_inverse = 1; + } else { + insn = next_insn(p, BRW_OPCODE_IF); + } + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn) +{ + struct brw_instruction *insn; + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; + + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); + } else { + insn = next_insn(p, BRW_OPCODE_ELSE); + } + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = if_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + if (!p->single_program_flow) + insn->header.thread_control = BRW_THREAD_SWITCH; + + /* Patch the if instruction to point at this instruction. + */ + if (p->single_program_flow) { + assert(if_insn->header.opcode == BRW_OPCODE_ADD); + + if_insn->bits3.ud = (insn - if_insn + 1) * 16; + } else { + assert(if_insn->header.opcode == BRW_OPCODE_IF); + + if_insn->bits3.if_else.jump_count = br * (insn - if_insn); + if_insn->bits3.if_else.pop_count = 0; + if_insn->bits3.if_else.pad0 = 0; + } + + return insn; +} + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *patch_insn) +{ + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; + + if (p->single_program_flow) { + /* In single program flow mode, there's no need to execute an ENDIF, + * since we don't need to do any stack operations, and if we're executing + * currently, we want to just continue executing. + */ + struct brw_instruction *next = &p->store[p->nr_insn]; + + assert(patch_insn->header.opcode == BRW_OPCODE_ADD); + + patch_insn->bits3.ud = (next - patch_insn) * 16; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); + + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = patch_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + insn->header.thread_control = BRW_THREAD_SWITCH; + + assert(patch_insn->bits3.if_else.jump_count == 0); + + /* Patch the if or else instructions to point at this or the next + * instruction respectively. + */ + if (patch_insn->header.opcode == BRW_OPCODE_IF) { + /* Automagically turn it into an IFF: + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { + patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + assert(0); + } + + /* Also pop item off the stack in the endif instruction: + */ + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + insn->bits3.if_else.pad0 = 0; + return insn; +} + +/* DO/WHILE loop: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) +{ + if (p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_null_reg()); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + + return insn; + } +} + + + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + GLuint br = 1; + + if (BRW_IS_IGDNG(p->brw)) + br = 2; + + if (p->single_program_flow) + insn = next_insn(p, BRW_OPCODE_ADD); + else + insn = next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->single_program_flow) { + insn->header.execution_size = BRW_EXECUTE_1; + + insn->bits3.d = (do_insn - insn) * 16; + } else { + insn->header.execution_size = do_insn->header.execution_size; + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + +/* insn->header.mask_control = BRW_MASK_ENABLE; */ + + /* insn->header.mask_control = BRW_MASK_DISABLE; */ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + return insn; +} + + +/* FORWARD JUMPS: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn) +{ + struct brw_instruction *landing = &p->store[p->nr_insn]; + GLuint jmpi = 1; + + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1); +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + GLuint conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditionalmod = conditional; + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + +/* guess_execution_size(insn, src0); */ + + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + + + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/** Extended math function, float[8]. + */ +void brw_math( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint data_type, + GLuint precision ) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(p->brw, + insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); +} + +/** + * Extended math function, float[16]. + * Use 2 send instructions. + */ +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + GLuint function, + GLuint saturate, + GLuint msg_reg_nr, + struct brw_reg src, + GLuint precision ) +{ + struct brw_instruction *insn; + GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* First instruction: + */ + brw_push_insn_state(p); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(p->brw, + insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + /* Second instruction: + */ + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.compression_control = BRW_COMPRESSION_2NDHALF; + insn->header.destreg__conditionalmod = msg_reg_nr+1; + + brw_set_dest(insn, offset(dest,1)); + brw_set_src0(insn, src); + brw_set_math_message(p->brw, + insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + brw_pop_insn_state(p); +} + + +/** + * Write block of 16 dwords/floats to the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + GLuint scratch_offset ) +{ + GLuint msg_reg_nr = 1; + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + GLuint msg_length = 3; + struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + + brw_set_dp_write_message(p->brw, + insn, + 255, /* binding table index (255=stateless) */ + BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ + msg_length, + 0, /* pixel scoreboard */ + 0, /* response_length */ + 0); /* eot */ + } +} + + +/** + * Read block of 16 dwords/floats from the data port Render Cache scratch buffer. + * Scratch offset should be a multiple of 64. + * Used for register spilling. + */ +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + GLuint scratch_offset ) +{ + GLuint msg_reg_nr = 1; + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); /* UW? */ + brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + + brw_set_dp_read_message(p->brw, + insn, + 255, /* binding table index (255=stateless) */ + 3, /* msg_control (3 means 4 Owords) */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 1, /* target cache (render/scratch) */ + 1, /* msg_length */ + 2, /* response_length */ + 0); /* eot */ + } +} + + +/** + * Read a float[4] vector from the data port Data Cache (const buffer). + * Location (in buffer) should be a multiple of 16. + * Used for fetching shader constants. + * If relAddr is true, we'll do an indirect fetch using the address register. + */ +void brw_dp_READ_4( struct brw_compile *p, + struct brw_reg dest, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index ) +{ + /* XXX: relAddr not implemented */ + GLuint msg_reg_nr = 1; + { + struct brw_reg b; + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + /* Setup MRF[1] with location/offset into const buffer */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + brw_MOV(p, b, brw_imm_ud(location)); + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + + /* cast dest to a uword[8] vector */ + dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(p->brw, + insn, + bind_table_index, + 0, /* msg_control (0 means 1 Oword) */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + +/** + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. + */ +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + GLuint oword, + GLboolean relAddr, + struct brw_reg addrReg, + GLuint location, + GLuint bind_table_index) +{ + GLuint msg_reg_nr = 1; + + assert(oword < 2); + /* + printf("vs const read msg, location %u, msg_reg_nr %d\n", + location, msg_reg_nr); + */ + + /* Setup MRF[1] with location/offset into const buffer */ + { + struct brw_reg b; + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + /*brw_set_access_mode(p, BRW_ALIGN_16);*/ + + /* XXX I think we're setting all the dwords of MRF[1] to 'location'. + * when the docs say only dword[2] should be set. Hmmm. But it works. + */ + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + /*b = get_element_ud(b, 2);*/ + if (relAddr) { + brw_ADD(p, b, addrReg, brw_imm_ud(location)); + } + else { + brw_MOV(p, b, brw_imm_ud(location)); + } + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + /*insn->header.access_mode = BRW_ALIGN_16;*/ + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(p->brw, + insn, + bind_table_index, + oword, /* 0 = lower Oword, 1 = upper Oword */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint msg_length, + GLuint response_length, + GLboolean eot) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_dp_write_message(p->brw, + insn, + binding_table_index, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + msg_length, + 1, /* pixel scoreboard */ + response_length, + eot); +} + + +/** + * Texture sample instruction. + * Note: the msg_type plus msg_length values determine exactly what kind + * of sampling operation is performed. See volume 4, page 161 of docs. + */ +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLuint binding_table_index, + GLuint sampler, + GLuint writemask, + GLuint msg_type, + GLuint response_length, + GLuint msg_length, + GLboolean eot, + GLuint header_present, + GLuint simd_mode) +{ + GLboolean need_stall = 0; + + if (writemask == 0) { + /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ + return; + } + + /* Hardware doesn't do destination dependency checking on send + * instructions properly. Add a workaround which generates the + * dependency by other means. In practice it seems like this bug + * only crops up for texture samples, and only where registers are + * written by the send and then written again later without being + * read in between. Luckily for us, we already track that + * information and use it to modify the writemask for the + * instruction, so that is a guide for whether a workaround is + * needed. + */ + if (writemask != WRITEMASK_XYZW) { + GLuint dst_offset = 0; + GLuint i, newmask = 0, len = 0; + + for (i = 0; i < 4; i++) { + if (writemask & (1<header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_sampler_message(p->brw, insn, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + eot, + header_present, + simd_mode); + } + + if (need_stall) { + struct brw_reg reg = vec8(offset(dest, response_length-1)); + + /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } + */ + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, reg, reg); + brw_pop_insn_state(p); + } + +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < BRW_MAX_MRF); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_urb_message(p->brw, + insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} + +void brw_ff_sync(struct brw_compile *p, + struct brw_reg dest, + GLuint msg_reg_nr, + struct brw_reg src0, + GLboolean allocate, + GLboolean used, + GLuint msg_length, + GLuint response_length, + GLboolean eot, + GLboolean writes_complete, + GLuint offset, + GLuint swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < 16); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditionalmod = msg_reg_nr; + + brw_set_ff_sync_message(p->brw, + insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} diff --git a/src/gallium/drivers/i965/brw_eu_util.c b/src/gallium/drivers/i965/brw_eu_util.c new file mode 100644 index 0000000000..5405cf17a4 --- /dev/null +++ b/src/gallium/drivers/i965/brw_eu_util.c @@ -0,0 +1,126 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math( p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count) +{ + GLuint i; + + dst = vec4(dst); + src = vec4(src); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); + } +} + + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + GLuint count) +{ + GLuint i; + + dst = vec8(dst); + src = vec8(src); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + GLuint count) +{ + GLuint i; + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); + brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); + } +} + + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + GLuint count) +{ + GLuint i; + + dst = vec4(dst); + + for (i = 0; i < count; i++) + { + GLuint delta = i*32; + brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); + brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); + } +} + + + + diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c new file mode 100644 index 0000000000..48c2b9a41c --- /dev/null +++ b/src/gallium/drivers/i965/brw_gs.c @@ -0,0 +1,201 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_gs.h" + + + +static void compile_gs_prog( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + struct brw_gs_compile c; + const GLuint *program; + GLuint program_size; + + memset(&c, 0, sizeof(c)); + + c.key = *key; + c.need_ff_sync = BRW_IS_IGDNG(brw); + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.nr_attrs = brw_count_bits(c.key.attrs); + + if (BRW_IS_IGDNG(brw)) + c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ + else + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + + c.nr_bytes = c.nr_regs * REG_SIZE; + + + /* Begin the compilation: + */ + brw_init_compile(brw, &c.func); + + c.func.single_program_flow = 1; + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Note that primitives which don't require a GS program have + * already been weeded out by this stage: + */ + switch (key->primitive) { + case GL_QUADS: + brw_gs_quads( &c ); + break; + case GL_QUAD_STRIP: + brw_gs_quad_strip( &c ); + break; + case GL_LINE_LOOP: + brw_gs_lines( &c ); + break; + case GL_LINES: + if (key->hint_gs_always) + brw_gs_lines( &c ); + else { + return; + } + break; + case GL_TRIANGLES: + if (key->hint_gs_always) + brw_gs_tris( &c ); + else { + return; + } + break; + case GL_POINTS: + if (key->hint_gs_always) + brw_gs_points( &c ); + else { + return; + } + break; + default: + return; + } + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + dri_bo_unreference(brw->gs.prog_bo); + brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->gs.prog_data ); +} + +static const GLenum gs_prim[GL_POLYGON+1] = { + GL_POINTS, + GL_LINES, + GL_LINE_LOOP, + GL_LINES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_TRIANGLES, + GL_QUADS, + GL_QUAD_STRIP, + GL_TRIANGLES +}; + +static void populate_key( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_VS_PROG */ + key->attrs = brw->vs.prog_data->outputs_written; + + /* BRW_NEW_PRIMITIVE */ + key->primitive = gs_prim[brw->primitive]; + + key->hint_gs_always = 0; /* debug code? */ + + key->need_gs_prog = (key->hint_gs_always || + brw->primitive == GL_QUADS || + brw->primitive == GL_QUAD_STRIP || + brw->primitive == GL_LINE_LOOP); +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static void prepare_gs_prog(struct brw_context *brw) +{ + struct brw_gs_prog_key key; + /* Populate the key: + */ + populate_key(brw, &key); + + if (brw->gs.prog_active != key.need_gs_prog) { + brw->state.dirty.cache |= CACHE_NEW_GS_PROG; + brw->gs.prog_active = key.need_gs_prog; + } + + if (brw->gs.prog_active) { + dri_bo_unreference(brw->gs.prog_bo); + brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, + &key, sizeof(key), + NULL, 0, + &brw->gs.prog_data); + if (brw->gs.prog_bo == NULL) + compile_gs_prog( brw, &key ); + } +} + + +const struct brw_tracked_state brw_gs_prog = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_PRIMITIVE, + .cache = CACHE_NEW_VS_PROG + }, + .prepare = prepare_gs_prog +}; diff --git a/src/gallium/drivers/i965/brw_gs.h b/src/gallium/drivers/i965/brw_gs.h new file mode 100644 index 0000000000..bbb991ea2e --- /dev/null +++ b/src/gallium/drivers/i965/brw_gs.h @@ -0,0 +1,76 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_GS_H +#define BRW_GS_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_GS_VERTS (4) + +struct brw_gs_prog_key { + GLuint attrs:32; + GLuint primitive:4; + GLuint hint_gs_always:1; + GLuint need_gs_prog:1; + GLuint pad:26; +}; + +struct brw_gs_compile { + struct brw_compile func; + struct brw_gs_prog_key key; + struct brw_gs_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_GS_VERTS]; + } reg; + + /* 3 different ways of expressing vertex size: + */ + GLuint nr_attrs; + GLuint nr_regs; + GLuint nr_bytes; + GLboolean need_ff_sync; +}; + +#define ATTR_SIZE (4*4) + +void brw_gs_quads( struct brw_gs_compile *c ); +void brw_gs_quad_strip( struct brw_gs_compile *c ); +void brw_gs_tris( struct brw_gs_compile *c ); +void brw_gs_lines( struct brw_gs_compile *c ); +void brw_gs_points( struct brw_gs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c new file mode 100644 index 0000000000..a9b2aa2eac --- /dev/null +++ b/src/gallium/drivers/i965/brw_gs_emit.c @@ -0,0 +1,186 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" + +#include "shader/program.h" +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_gs.h" + +static void brw_gs_alloc_regs( struct brw_gs_compile *c, + GLuint nr_verts ) +{ + GLuint i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->prog_data.urb_read_length = c->nr_regs; + c->prog_data.total_grf = i; +} + + +static void brw_gs_emit_vue(struct brw_gs_compile *c, + struct brw_reg vert, + GLboolean last, + GLuint header) +{ + struct brw_compile *p = &c->func; + GLboolean allocate = !last; + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); + + /* Send each vertex as a seperate write to the urb. This is + * different to the concept in brw_sf_emit.c, where subsequent + * writes are used to build up a single urb entry. Each of these + * writes instantiates a seperate urb entry, and a new one must be + * allocated each time. + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response length */ + allocate ? 0 : 1, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + +static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) +{ + struct brw_compile *p = &c->func; + brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim)); + brw_ff_sync(p, + c->reg.R0, + 0, + c->reg.R0, + 1, + 1, /* used */ + 1, /* msg length */ + 1, /* response length */ + 0, /* eot */ + 1, /* write compelete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + +void brw_gs_quads( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + /* Use polygons for correct edgeflag behaviour. Note that vertex 3 + * is the PV for quads, but vertex 0 for polygons: + */ + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_quad_strip( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_tris( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 3); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); +} + +void brw_gs_lines( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 2); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); +} + +void brw_gs_points( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 1); + + if (c->need_ff_sync) + brw_gs_ff_sync(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); +} + + + + + + + + diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c new file mode 100644 index 0000000000..ed9d2ffe60 --- /dev/null +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -0,0 +1,149 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "main/macros.h" + +struct brw_gs_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + + unsigned int curbe_offset; + + unsigned int nr_urb_entries, urb_size; + GLboolean prog_active; +}; + +static void +gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_GS_PROG */ + key->prog_active = brw->gs.prog_active; + if (key->prog_active) { + key->total_grf = brw->gs.prog_data->total_grf; + key->urb_entry_read_length = brw->gs.prog_data->urb_read_length; + } else { + key->total_grf = 1; + key->urb_entry_read_length = 1; + } + + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.clip_start; + + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_gs_entries; + key->urb_size = brw->urb.vsize; +} + +static dri_bo * +gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) +{ + struct brw_gs_unit_state gs; + dri_bo *bo; + + memset(&gs, 0, sizeof(gs)); + + gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + if (key->prog_active) /* reloc */ + gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; + + gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + gs.thread1.single_program_flow = 1; + + gs.thread3.dispatch_grf_start_reg = 1; + gs.thread3.const_urb_entry_read_offset = 0; + gs.thread3.const_urb_entry_read_length = 0; + gs.thread3.urb_entry_read_offset = 0; + gs.thread3.urb_entry_read_length = key->urb_entry_read_length; + + gs.thread4.nr_urb_entries = key->nr_urb_entries; + gs.thread4.urb_entry_allocation_size = key->urb_size - 1; + + if (key->nr_urb_entries >= 8) + gs.thread4.max_threads = 1; + else + gs.thread4.max_threads = 0; + + if (BRW_IS_IGDNG(brw)) + gs.thread4.rendering_enable = 1; + + if (INTEL_DEBUG & DEBUG_STATS) + gs.thread4.stats_enable = 1; + + bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, + key, sizeof(*key), + &brw->gs.prog_bo, 1, + &gs, sizeof(gs), + NULL, NULL); + + if (key->prog_active) { + /* Emit GS program relocation */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + gs.thread0.grf_reg_count << 1, + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_bo); + } + + return bo; +} + +static void prepare_gs_unit(struct brw_context *brw) +{ + struct brw_gs_unit_key key; + + gs_unit_populate_key(brw, &key); + + dri_bo_unreference(brw->gs.state_bo); + brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT, + &key, sizeof(key), + &brw->gs.prog_bo, 1, + NULL); + if (brw->gs.state_bo == NULL) { + brw->gs.state_bo = gs_unit_create_from_key(brw, &key); + } +} + +const struct brw_tracked_state brw_gs_unit = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_GS_PROG + }, + .prepare = prepare_gs_unit, +}; diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c new file mode 100644 index 0000000000..ea71857548 --- /dev/null +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -0,0 +1,545 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + + +#include "intel_batchbuffer.h" +#include "intel_regions.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + + + + + +/*********************************************************************** + * Blend color + */ + +static void upload_blend_constant_color(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_blend_constant_color bcc; + + memset(&bcc, 0, sizeof(bcc)); + bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc.header.length = sizeof(bcc)/4-2; + bcc.blend_constant_color[0] = ctx->Color.BlendColor[0]; + bcc.blend_constant_color[1] = ctx->Color.BlendColor[1]; + bcc.blend_constant_color[2] = ctx->Color.BlendColor[2]; + bcc.blend_constant_color[3] = ctx->Color.BlendColor[3]; + + BRW_CACHED_BATCH_STRUCT(brw, &bcc); +} + + +const struct brw_tracked_state brw_blend_constant_color = { + .dirty = { + .mesa = _NEW_COLOR, + .brw = 0, + .cache = 0 + }, + .emit = upload_blend_constant_color +}; + +/* Constant single cliprect for framebuffer object or DRI2 drawing */ +static void upload_drawing_rect(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + + if (!intel->constant_cliprect) + return; + + BEGIN_BATCH(4, NO_LOOP_CLIPRECTS); + OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965); + OUT_BATCH(0); /* xmin, ymin */ + OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | + ((ctx->DrawBuffer->Height - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +const struct brw_tracked_state brw_drawing_rect = { + .dirty = { + .mesa = _NEW_BUFFERS, + .brw = 0, + .cache = 0 + }, + .emit = upload_drawing_rect +}; + +static void prepare_binding_table_pointers(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->vs.bind_bo); + brw_add_validated_bo(brw, brw->wm.bind_bo); +} + +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is 0. + */ +static void upload_binding_table_pointers(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(6, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); + if (brw->vs.bind_bo != NULL) + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + else + OUT_BATCH(0); + OUT_BATCH(0); /* gs */ + OUT_BATCH(0); /* clip */ + OUT_BATCH(0); /* sf */ + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ + ADVANCE_BATCH(); +} + +const struct brw_tracked_state brw_binding_table_pointers = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = CACHE_NEW_SURF_BIND, + }, + .prepare = prepare_binding_table_pointers, + .emit = upload_binding_table_pointers, +}; + + +/** + * Upload pointers to the per-stage state. + * + * The state pointers in this packet are all relative to the general state + * base address set by CMD_STATE_BASE_ADDRESS, which is 0. + */ +static void upload_pipelined_state_pointers(struct brw_context *brw ) +{ + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(7, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); + OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + if (brw->gs.prog_active) + OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + else + OUT_BATCH(0); + OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + ADVANCE_BATCH(); + + brw->state.dirty.brw |= BRW_NEW_PSP; +} + + +static void prepare_psp_urb_cbs(struct brw_context *brw) +{ + brw_add_validated_bo(brw, brw->vs.state_bo); + brw_add_validated_bo(brw, brw->gs.state_bo); + brw_add_validated_bo(brw, brw->clip.state_bo); + brw_add_validated_bo(brw, brw->sf.state_bo); + brw_add_validated_bo(brw, brw->wm.state_bo); + brw_add_validated_bo(brw, brw->cc.state_bo); +} + +static void upload_psp_urb_cbs(struct brw_context *brw ) +{ + upload_pipelined_state_pointers(brw); + brw_upload_urb_fence(brw); + brw_upload_cs_urb_state(brw); +} + +const struct brw_tracked_state brw_psp_urb_cbs = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .prepare = prepare_psp_urb_cbs, + .emit = upload_psp_urb_cbs, +}; + +static void prepare_depthbuffer(struct brw_context *brw) +{ + struct intel_region *region = brw->state.depth_region; + + if (region != NULL) + brw_add_validated_bo(brw, region->buffer); +} + +static void emit_depthbuffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct intel_region *region = brw->state.depth_region; + unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; + + if (region == NULL) { + BEGIN_BATCH(len, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | + (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + OUT_BATCH(0); + + ADVANCE_BATCH(); + } else { + unsigned int format; + + switch (region->cpp) { + case 2: + format = BRW_DEPTHFORMAT_D16_UNORM; + break; + case 4: + if (intel->depth_buffer_is_float) + format = BRW_DEPTHFORMAT_D32_FLOAT; + else + format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + break; + default: + assert(0); + return; + } + + assert(region->tiling != I915_TILING_X); + + BEGIN_BATCH(len, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH(((region->pitch * region->cpp) - 1) | + (format << 18) | + (BRW_TILEWALK_YMAJOR << 26) | + ((region->tiling != I915_TILING_NONE) << 27) | + (BRW_SURFACE_2D << 29)); + OUT_RELOC(region->buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | + ((region->pitch - 1) << 6) | + ((region->height - 1) << 19)); + OUT_BATCH(0); + + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + OUT_BATCH(0); + + ADVANCE_BATCH(); + } +} + +const struct brw_tracked_state brw_depthbuffer = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH, + .cache = 0, + }, + .prepare = prepare_depthbuffer, + .emit = emit_depthbuffer, +}; + + + +/*********************************************************************** + * Polygon stipple packet + */ + +static void upload_polygon_stipple(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_polygon_stipple bps; + GLuint i; + + memset(&bps, 0, sizeof(bps)); + bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; + bps.header.length = sizeof(bps)/4-2; + + /* Polygon stipple is provided in OpenGL order, i.e. bottom + * row first. If we're rendering to a window (i.e. the + * default frame buffer object, 0), then we need to invert + * it to match our pixel layout. But if we're rendering + * to a FBO (i.e. any named frame buffer object), we *don't* + * need to invert - we already match the layout. + */ + if (ctx->DrawBuffer->Name == 0) { + for (i = 0; i < 32; i++) + bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */ + } + else { + for (i = 0; i < 32; i++) + bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */ + } + + BRW_CACHED_BATCH_STRUCT(brw, &bps); +} + +const struct brw_tracked_state brw_polygon_stipple = { + .dirty = { + .mesa = _NEW_POLYGONSTIPPLE, + .brw = 0, + .cache = 0 + }, + .emit = upload_polygon_stipple +}; + + +/*********************************************************************** + * Polygon stipple offset packet + */ + +static void upload_polygon_stipple_offset(struct brw_context *brw) +{ + __DRIdrawablePrivate *dPriv = brw->intel.driDrawable; + struct brw_polygon_stipple_offset bpso; + + memset(&bpso, 0, sizeof(bpso)); + bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; + bpso.header.length = sizeof(bpso)/4-2; + + /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0), + * we have to invert the Y axis in order to match the OpenGL + * pixel coordinate system, and our offset must be matched + * to the window position. If we're drawing to a FBO + * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate + * system works just fine, and there's no window system to + * worry about. + */ + if (brw->intel.ctx.DrawBuffer->Name == 0) { + bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31; + bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31; + } + else { + bpso.bits0.y_offset = 0; + bpso.bits0.x_offset = 0; + } + + BRW_CACHED_BATCH_STRUCT(brw, &bpso); +} + +#define _NEW_WINDOW_POS 0x40000000 + +const struct brw_tracked_state brw_polygon_stipple_offset = { + .dirty = { + .mesa = _NEW_WINDOW_POS, + .brw = 0, + .cache = 0 + }, + .emit = upload_polygon_stipple_offset +}; + +/********************************************************************** + * AA Line parameters + */ +static void upload_aa_line_parameters(struct brw_context *brw) +{ + struct brw_aa_line_parameters balp; + + if (BRW_IS_965(brw)) + return; + + /* use legacy aa line coverage computation */ + memset(&balp, 0, sizeof(balp)); + balp.header.opcode = CMD_AA_LINE_PARAMETERS; + balp.header.length = sizeof(balp) / 4 - 2; + + BRW_CACHED_BATCH_STRUCT(brw, &balp); +} + +const struct brw_tracked_state brw_aa_line_parameters = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .emit = upload_aa_line_parameters +}; + +/*********************************************************************** + * Line stipple packet + */ + +static void upload_line_stipple(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_line_stipple bls; + GLfloat tmp; + GLint tmpi; + + memset(&bls, 0, sizeof(bls)); + bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; + bls.header.length = sizeof(bls)/4 - 2; + + bls.bits0.pattern = ctx->Line.StipplePattern; + bls.bits1.repeat_count = ctx->Line.StippleFactor; + + tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; + tmpi = tmp * (1<<13); + + + bls.bits1.inverse_repeat_count = tmpi; + + BRW_CACHED_BATCH_STRUCT(brw, &bls); +} + +const struct brw_tracked_state brw_line_stipple = { + .dirty = { + .mesa = _NEW_LINE, + .brw = 0, + .cache = 0 + }, + .emit = upload_line_stipple +}; + + +/*********************************************************************** + * Misc invarient state packets + */ + +static void upload_invarient_state( struct brw_context *brw ) +{ + { + /* 0x61040000 Pipeline Select */ + /* PipelineSelect : 0 */ + struct brw_pipeline_select ps; + + memset(&ps, 0, sizeof(ps)); + ps.header.opcode = CMD_PIPELINE_SELECT(brw); + ps.header.pipeline_select = 0; + BRW_BATCH_STRUCT(brw, &ps); + } + + { + struct brw_global_depth_offset_clamp gdo; + memset(&gdo, 0, sizeof(gdo)); + + /* Disable depth offset clamping. + */ + gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; + gdo.header.length = sizeof(gdo)/4 - 2; + gdo.depth_offset_clamp = 0.0; + + BRW_BATCH_STRUCT(brw, &gdo); + } + + + /* 0x61020000 State Instruction Pointer */ + { + struct brw_system_instruction_pointer sip; + memset(&sip, 0, sizeof(sip)); + + sip.header.opcode = CMD_STATE_INSN_POINTER; + sip.header.length = 0; + sip.bits0.pad = 0; + sip.bits0.system_instruction_pointer = 0; + BRW_BATCH_STRUCT(brw, &sip); + } + + + { + struct brw_vf_statistics vfs; + memset(&vfs, 0, sizeof(vfs)); + + vfs.opcode = CMD_VF_STATISTICS(brw); + if (INTEL_DEBUG & DEBUG_STATS) + vfs.statistics_enable = 1; + + BRW_BATCH_STRUCT(brw, &vfs); + } +} + +const struct brw_tracked_state brw_invarient_state = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = 0 + }, + .emit = upload_invarient_state +}; + +/** + * Define the base addresses which some state is referenced from. + * + * This allows us to avoid having to emit relocations in many places for + * cached state, and instead emit pointers inside of large, mostly-static + * state pools. This comes at the expense of memory, and more expensive cache + * misses. + */ +static void upload_state_base_address( struct brw_context *brw ) +{ + struct intel_context *intel = &brw->intel; + + /* Output the structure (brw_state_base_address) directly to the + * batchbuffer, so we can emit relocations inline. + */ + if (BRW_IS_IGDNG(brw)) { + BEGIN_BATCH(8, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* Instruction base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + OUT_BATCH(1); /* Instruction access upper bound */ + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(6, IGNORE_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); + } +} + +const struct brw_tracked_state brw_state_base_address = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = 0, + }, + .emit = upload_state_base_address +}; diff --git a/src/gallium/drivers/i965/brw_program.c b/src/gallium/drivers/i965/brw_program.c new file mode 100644 index 0000000000..bac69187c1 --- /dev/null +++ b/src/gallium/drivers/i965/brw_program.c @@ -0,0 +1,166 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "main/imports.h" +#include "main/enums.h" +#include "shader/prog_parameter.h" +#include "shader/program.h" +#include "shader/programopt.h" +#include "tnl/tnl.h" + +#include "brw_context.h" +#include "brw_util.h" +#include "brw_wm.h" + +static void brwBindProgram( GLcontext *ctx, + GLenum target, + struct gl_program *prog ) +{ + struct brw_context *brw = brw_context(ctx); + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + break; + case GL_FRAGMENT_PROGRAM_ARB: + brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; + break; + } +} + +static struct gl_program *brwNewProgram( GLcontext *ctx, + GLenum target, + GLuint id ) +{ + struct brw_context *brw = brw_context(ctx); + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: { + struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program); + if (prog) { + prog->id = brw->program_id++; + + return _mesa_init_vertex_program( ctx, &prog->program, + target, id ); + } + else + return NULL; + } + + case GL_FRAGMENT_PROGRAM_ARB: { + struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program); + if (prog) { + prog->id = brw->program_id++; + + return _mesa_init_fragment_program( ctx, &prog->program, + target, id ); + } + else + return NULL; + } + + default: + return _mesa_new_program(ctx, target, id); + } +} + +static void brwDeleteProgram( GLcontext *ctx, + struct gl_program *prog ) +{ + if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; + struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); + dri_bo_unreference(brw_fprog->const_buffer); + } + + _mesa_delete_program( ctx, prog ); +} + + +static GLboolean brwIsProgramNative( GLcontext *ctx, + GLenum target, + struct gl_program *prog ) +{ + return GL_TRUE; +} + +static void brwProgramStringNotify( GLcontext *ctx, + GLenum target, + struct gl_program *prog ) +{ + struct brw_context *brw = brw_context(ctx); + + if (target == GL_FRAGMENT_PROGRAM_ARB) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; + struct brw_fragment_program *newFP = brw_fragment_program(fprog); + const struct brw_fragment_program *curFP = + brw_fragment_program_const(brw->fragment_program); + + if (fprog->FogOption) { + _mesa_append_fog_code(ctx, fprog); + fprog->FogOption = GL_NONE; + } + + if (newFP == curFP) + brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; + newFP->id = brw->program_id++; + newFP->isGLSL = brw_wm_is_glsl(fprog); + } + else if (target == GL_VERTEX_PROGRAM_ARB) { + struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; + struct brw_vertex_program *newVP = brw_vertex_program(vprog); + const struct brw_vertex_program *curVP = + brw_vertex_program_const(brw->vertex_program); + + if (newVP == curVP) + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + if (newVP->program.IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, &newVP->program); + } + newVP->id = brw->program_id++; + + /* Also tell tnl about it: + */ + _tnl_program_string(ctx, target, prog); + } +} + +void brwInitFragProgFuncs( struct dd_function_table *functions ) +{ + assert(functions->ProgramStringNotify == _tnl_program_string); + + functions->BindProgram = brwBindProgram; + functions->NewProgram = brwNewProgram; + functions->DeleteProgram = brwDeleteProgram; + functions->IsProgramNative = brwIsProgramNative; + functions->ProgramStringNotify = brwProgramStringNotify; +} + diff --git a/src/gallium/drivers/i965/brw_queryobj.c b/src/gallium/drivers/i965/brw_queryobj.c new file mode 100644 index 0000000000..a195bc32b0 --- /dev/null +++ b/src/gallium/drivers/i965/brw_queryobj.c @@ -0,0 +1,254 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +/** @file support for ARB_query_object + * + * ARB_query_object is implemented by using the PIPE_CONTROL command to stall + * execution on the completion of previous depth tests, and write the + * current PS_DEPTH_COUNT to a buffer object. + * + * We use before and after counts when drawing during a query so that + * we don't pick up other clients' query data in ours. To reduce overhead, + * a single BO is used to record the query data for all active queries at + * once. This also gives us a simple bound on how much batchbuffer space is + * required for handling queries, so that we can be sure that we won't + * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT. + */ +#include "main/simple_list.h" +#include "main/imports.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "intel_batchbuffer.h" +#include "intel_reg.h" + +/** Waits on the query object's BO and totals the results for this query */ +static void +brw_queryobj_get_results(struct brw_query_object *query) +{ + int i; + uint64_t *results; + + if (query->bo == NULL) + return; + + /* Map and count the pixels from the current query BO */ + dri_bo_map(query->bo, GL_FALSE); + results = query->bo->virtual; + for (i = query->first_index; i <= query->last_index; i++) { + query->Base.Result += results[i * 2 + 1] - results[i * 2]; + } + dri_bo_unmap(query->bo); + + dri_bo_unreference(query->bo); + query->bo = NULL; +} + +static struct gl_query_object * +brw_new_query_object(GLcontext *ctx, GLuint id) +{ + struct brw_query_object *query; + + query = _mesa_calloc(sizeof(struct brw_query_object)); + + query->Base.Id = id; + query->Base.Result = 0; + query->Base.Active = GL_FALSE; + query->Base.Ready = GL_TRUE; + + return &query->Base; +} + +static void +brw_delete_query(GLcontext *ctx, struct gl_query_object *q) +{ + struct brw_query_object *query = (struct brw_query_object *)q; + + dri_bo_unreference(query->bo); + _mesa_free(query); +} + +static void +brw_begin_query(GLcontext *ctx, struct gl_query_object *q) +{ + struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = intel_context(ctx); + struct brw_query_object *query = (struct brw_query_object *)q; + + /* Reset our driver's tracking of query state. */ + dri_bo_unreference(query->bo); + query->bo = NULL; + query->first_index = -1; + query->last_index = -1; + + insert_at_head(&brw->query.active_head, query); + intel->stats_wm++; +} + +/** + * Begin the ARB_occlusion_query query on a query object. + */ +static void +brw_end_query(GLcontext *ctx, struct gl_query_object *q) +{ + struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = intel_context(ctx); + struct brw_query_object *query = (struct brw_query_object *)q; + + /* Flush the batchbuffer in case it has writes to our query BO. + * Have later queries write to a new query BO so that further rendering + * doesn't delay the collection of our results. + */ + if (query->bo) { + brw_emit_query_end(brw); + intel_batchbuffer_flush(intel->batch); + + dri_bo_unreference(brw->query.bo); + brw->query.bo = NULL; + } + + remove_from_list(query); + + intel->stats_wm--; +} + +static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q) +{ + struct brw_query_object *query = (struct brw_query_object *)q; + + brw_queryobj_get_results(query); + query->Base.Ready = GL_TRUE; +} + +static void brw_check_query(GLcontext *ctx, struct gl_query_object *q) +{ + struct brw_query_object *query = (struct brw_query_object *)q; + + if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) { + brw_queryobj_get_results(query); + query->Base.Ready = GL_TRUE; + } +} + +/** Called to set up the query BO and account for its aperture space */ +void +brw_prepare_query_begin(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + /* Skip if we're not doing any queries. */ + if (is_empty_list(&brw->query.active_head)) + return; + + /* Get a new query BO if we're going to need it. */ + if (brw->query.bo == NULL || + brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) { + dri_bo_unreference(brw->query.bo); + brw->query.bo = NULL; + + brw->query.bo = dri_bo_alloc(intel->bufmgr, "query", 4096, 1); + brw->query.index = 0; + } + + brw_add_validated_bo(brw, brw->query.bo); +} + +/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */ +void +brw_emit_query_begin(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_query_object *query; + + /* Skip if we're not doing any queries, or we've emitted the start. */ + if (brw->query.active || is_empty_list(&brw->query.active_head)) + return; + + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_WRITE_DEPTH_COUNT); + /* This object could be mapped cacheable, but we don't have an exposed + * mechanism to support that. Since it's going uncached, tell GEM that + * we're writing to it. The usual clflush should be all that's required + * to pick up the results. + */ + OUT_RELOC(brw->query.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + PIPE_CONTROL_GLOBAL_GTT_WRITE | + ((brw->query.index * 2) * sizeof(uint64_t))); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + foreach(query, &brw->query.active_head) { + if (query->bo != brw->query.bo) { + if (query->bo != NULL) + brw_queryobj_get_results(query); + dri_bo_reference(brw->query.bo); + query->bo = brw->query.bo; + query->first_index = brw->query.index; + } + query->last_index = brw->query.index; + } + brw->query.active = GL_TRUE; +} + +/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */ +void +brw_emit_query_end(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + + if (!brw->query.active) + return; + + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_WRITE_DEPTH_COUNT); + OUT_RELOC(brw->query.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + PIPE_CONTROL_GLOBAL_GTT_WRITE | + ((brw->query.index * 2 + 1) * sizeof(uint64_t))); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + brw->query.active = GL_FALSE; + brw->query.index++; +} + +void brw_init_queryobj_functions(struct dd_function_table *functions) +{ + functions->NewQueryObject = brw_new_query_object; + functions->DeleteQuery = brw_delete_query; + functions->BeginQuery = brw_begin_query; + functions->EndQuery = brw_end_query; + functions->CheckQuery = brw_check_query; + functions->WaitQuery = brw_wait_query; +} diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c new file mode 100644 index 0000000000..e1c2c7777b --- /dev/null +++ b/src/gallium/drivers/i965/brw_sf.c @@ -0,0 +1,200 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" +#include "brw_state.h" + +static void compile_sf_prog( struct brw_context *brw, + struct brw_sf_prog_key *key ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_sf_compile c; + const GLuint *program; + GLuint program_size; + GLuint i, idx; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(brw, &c.func); + + c.key = *key; + c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_attr_regs = (c.nr_attrs+1)/2; + c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); + c.nr_setup_regs = (c.nr_setup_attrs+1)/2; + + c.prog_data.urb_read_length = c.nr_attr_regs; + c.prog_data.urb_entry_size = c.nr_setup_regs * 2; + + /* Construct map from attribute number to position in the vertex. + */ + for (i = idx = 0; i < VERT_RESULT_MAX; i++) + if (c.key.attrs & (1<= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { + c.point_attrs[i].CoordReplace = + ctx->Point.CoordReplace[i - VERT_RESULT_TEX0]; + } + else { + c.point_attrs[i].CoordReplace = GL_FALSE; + } + idx++; + } + + /* Which primitive? Or all three? + */ + switch (key->primitive) { + case SF_TRIANGLES: + c.nr_verts = 3; + brw_emit_tri_setup( &c, GL_TRUE ); + break; + case SF_LINES: + c.nr_verts = 2; + brw_emit_line_setup( &c, GL_TRUE ); + break; + case SF_POINTS: + c.nr_verts = 1; + if (key->do_point_sprite) + brw_emit_point_sprite_setup( &c, GL_TRUE ); + else + brw_emit_point_setup( &c, GL_TRUE ); + break; + case SF_UNFILLED_TRIS: + c.nr_verts = 3; + brw_emit_anyprim_setup( &c ); + break; + default: + assert(0); + return; + } + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + dri_bo_unreference(brw->sf.prog_bo); + brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->sf.prog_data ); +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_sf_prog(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_sf_prog_key key; + + memset(&key, 0, sizeof(key)); + + /* Populate the key, noting state dependencies: + */ + /* CACHE_NEW_VS_PROG */ + key.attrs = brw->vs.prog_data->outputs_written; + + /* BRW_NEW_REDUCED_PRIMITIVE */ + switch (brw->intel.reduced_primitive) { + case GL_TRIANGLES: + /* NOTE: We just use the edgeflag attribute as an indicator that + * unfilled triangles are active. We don't actually do the + * edgeflag testing here, it is already done in the clip + * program. + */ + if (key.attrs & (1<Point.PointSprite; + key.SpriteOrigin = ctx->Point.SpriteOrigin; + /* _NEW_LIGHT */ + key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); + key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + + /* _NEW_HINT */ + key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + + /* _NEW_POLYGON */ + if (key.do_twoside_color) { + /* If we're rendering to a FBO, we have to invert the polygon + * face orientation, just as we invert the viewport in + * sf_unit_create_from_key(). ctx->DrawBuffer->Name will be + * nonzero if we're rendering to such an FBO. + */ + key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0); + } + + dri_bo_unreference(brw->sf.prog_bo); + brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG, + &key, sizeof(key), + NULL, 0, + &brw->sf.prog_data); + if (brw->sf.prog_bo == NULL) + compile_sf_prog( brw, &key ); +} + + +const struct brw_tracked_state brw_sf_prog = { + .dirty = { + .mesa = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT), + .brw = (BRW_NEW_REDUCED_PRIMITIVE), + .cache = CACHE_NEW_VS_PROG + }, + .prepare = upload_sf_prog +}; + diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h new file mode 100644 index 0000000000..6426b6df9f --- /dev/null +++ b/src/gallium/drivers/i965/brw_sf.h @@ -0,0 +1,113 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_SF_H +#define BRW_SF_H + + +#include "shader/program.h" +#include "brw_context.h" +#include "brw_eu.h" + + +#define SF_POINTS 0 +#define SF_LINES 1 +#define SF_TRIANGLES 2 +#define SF_UNFILLED_TRIS 3 + +struct brw_sf_prog_key { + GLuint attrs:32; + GLuint primitive:2; + GLuint do_twoside_color:1; + GLuint do_flat_shading:1; + GLuint frontface_ccw:1; + GLuint do_point_sprite:1; + GLuint linear_color:1; /**< linear interp vs. perspective interp */ + GLuint pad:25; + GLenum SpriteOrigin; +}; + +struct brw_sf_point_tex { + GLboolean CoordReplace; +}; + +struct brw_sf_compile { + struct brw_compile func; + struct brw_sf_prog_key key; + struct brw_sf_prog_data prog_data; + + struct brw_reg pv; + struct brw_reg det; + struct brw_reg dx0; + struct brw_reg dx2; + struct brw_reg dy0; + struct brw_reg dy2; + + /* z and 1/w passed in seperately: + */ + struct brw_reg z[3]; + struct brw_reg inv_w[3]; + + /* The vertices: + */ + struct brw_reg vert[3]; + + /* Temporaries, allocated after last vertex reg. + */ + struct brw_reg inv_det; + struct brw_reg a1_sub_a0; + struct brw_reg a2_sub_a0; + struct brw_reg tmp; + + struct brw_reg m1Cx; + struct brw_reg m2Cy; + struct brw_reg m3C0; + + GLuint nr_verts; + GLuint nr_attrs; + GLuint nr_attr_regs; + GLuint nr_setup_attrs; + GLuint nr_setup_regs; + + GLubyte attr_to_idx[VERT_RESULT_MAX]; + GLubyte idx_to_attr[VERT_RESULT_MAX]; + struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; +}; + + +void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate ); +void brw_emit_anyprim_setup( struct brw_sf_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c new file mode 100644 index 0000000000..ca8f97f9f9 --- /dev/null +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -0,0 +1,739 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" + +#include "intel_batchbuffer.h" + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" + + +static struct brw_reg get_vert_attr(struct brw_sf_compile *c, + struct brw_reg vert, + GLuint attr) +{ + GLuint off = c->attr_to_idx[attr] / 2; + GLuint sub = c->attr_to_idx[attr] % 2; + + return brw_vec4_grf(vert.nr + off, sub * 4); +} + +static GLboolean have_attr(struct brw_sf_compile *c, + GLuint attr) +{ + return (c->key.attrs & (1<func; + GLuint i; + + for (i = 0; i < 2; i++) { + if (have_attr(c, VERT_RESULT_COL0+i) && + have_attr(c, VERT_RESULT_BFC0+i)) + brw_MOV(p, + get_vert_attr(c, vert, VERT_RESULT_COL0+i), + get_vert_attr(c, vert, VERT_RESULT_BFC0+i)); + } +} + + +static void do_twoside_color( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L; + + /* Already done in clip program: + */ + if (c->key.primitive == SF_UNFILLED_TRIS) + return; + + /* XXX: What happens if BFC isn't present? This could only happen + * for user-supplied vertex programs, as t_vp_build.c always does + * the right thing. + */ + if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) && + !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1))) + return; + + /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order + * to get all channels active inside the IF. In the clipping code + * we run with NoMask, so it's not an option and we can use + * BRW_EXECUTE_1 for all comparisions. + */ + brw_push_insn_state(p); + brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0)); + if_insn = brw_IF(p, BRW_EXECUTE_4); + { + switch (c->nr_verts) { + case 3: copy_bfc(c, c->vert[2]); + case 2: copy_bfc(c, c->vert[1]); + case 1: copy_bfc(c, c->vert[0]); + } + } + brw_ENDIF(p, if_insn); + brw_pop_insn_state(p); +} + + + +/*********************************************************************** + * Flat shading + */ + +#define VERT_RESULT_COLOR_BITS ((1<func; + GLuint i; + + for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) { + if (have_attr(c,i)) + brw_MOV(p, + get_vert_attr(c, dst, i), + get_vert_attr(c, src, i)); + } +} + + + +/* Need to use a computed jump to copy flatshaded attributes as the + * vertices are ordered according to y-coordinate before reaching this + * point, so the PV could be anywhere. + */ +static void do_flatshade_triangle( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg ip = brw_ip_reg(); + GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint jmpi = 1; + + if (!nr) + return; + + /* Already done in clip program: + */ + if (c->key.primitive == SF_UNFILLED_TRIS) + return; + + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + + brw_push_insn_state(p); + + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1))); + brw_JMPI(p, ip, ip, c->pv); + + copy_colors(c, c->vert[1], c->vert[0]); + copy_colors(c, c->vert[2], c->vert[0]); + brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1))); + + copy_colors(c, c->vert[0], c->vert[1]); + copy_colors(c, c->vert[2], c->vert[1]); + brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2)); + + copy_colors(c, c->vert[0], c->vert[2]); + copy_colors(c, c->vert[1], c->vert[2]); + + brw_pop_insn_state(p); +} + + +static void do_flatshade_line( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg ip = brw_ip_reg(); + GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint jmpi = 1; + + if (!nr) + return; + + /* Already done in clip program: + */ + if (c->key.primitive == SF_UNFILLED_TRIS) + return; + + if (BRW_IS_IGDNG(p->brw)) + jmpi = 2; + + brw_push_insn_state(p); + + brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); + brw_JMPI(p, ip, ip, c->pv); + copy_colors(c, c->vert[1], c->vert[0]); + + brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr)); + copy_colors(c, c->vert[0], c->vert[1]); + + brw_pop_insn_state(p); +} + + + +/*********************************************************************** + * Triangle setup. + */ + + +static void alloc_regs( struct brw_sf_compile *c ) +{ + GLuint reg, i; + + /* Values computed by fixed function unit: + */ + c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D); + c->det = brw_vec1_grf(1, 2); + c->dx0 = brw_vec1_grf(1, 3); + c->dx2 = brw_vec1_grf(1, 4); + c->dy0 = brw_vec1_grf(1, 5); + c->dy2 = brw_vec1_grf(1, 6); + + /* z and 1/w passed in seperately: + */ + c->z[0] = brw_vec1_grf(2, 0); + c->inv_w[0] = brw_vec1_grf(2, 1); + c->z[1] = brw_vec1_grf(2, 2); + c->inv_w[1] = brw_vec1_grf(2, 3); + c->z[2] = brw_vec1_grf(2, 4); + c->inv_w[2] = brw_vec1_grf(2, 5); + + /* The vertices: + */ + reg = 3; + for (i = 0; i < c->nr_verts; i++) { + c->vert[i] = brw_vec8_grf(reg, 0); + reg += c->nr_attr_regs; + } + + /* Temporaries, allocated after last vertex reg. + */ + c->inv_det = brw_vec1_grf(reg, 0); reg++; + c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->tmp = brw_vec8_grf(reg, 0); reg++; + + /* Note grf allocation: + */ + c->prog_data.total_grf = reg; + + + /* Outputs of this program - interpolation coefficients for + * rasterization: + */ + c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); + c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); + c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); +} + + +static void copy_z_inv_w( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + GLuint i; + + brw_push_insn_state(p); + + /* Copy both scalars with a single MOV: + */ + for (i = 0; i < c->nr_verts; i++) + brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); + + brw_pop_insn_state(p); +} + + +static void invert_det( struct brw_sf_compile *c) +{ + /* Looks like we invert all 8 elements just to get 1/det in + * position 2 !?! + */ + brw_math(&c->func, + c->inv_det, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->det, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + +} + + +static GLboolean calculate_masks( struct brw_sf_compile *c, + GLuint reg, + GLushort *pc, + GLushort *pc_persp, + GLushort *pc_linear) +{ + GLboolean is_last_attr = (reg == c->nr_setup_regs - 1); + GLuint persp_mask; + GLuint linear_mask; + + if (c->key.do_flat_shading || c->key.linear_color) + persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS | + FRAG_BIT_COL0 | + FRAG_BIT_COL1); + else + persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS); + + if (c->key.do_flat_shading) + linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1); + else + linear_mask = c->key.attrs; + + *pc_persp = 0; + *pc_linear = 0; + *pc = 0xf; + + if (persp_mask & (1 << c->idx_to_attr[reg*2])) + *pc_persp = 0xf; + + if (linear_mask & (1 << c->idx_to_attr[reg*2])) + *pc_linear = 0xf; + + /* Maybe only processs one attribute on the final round: + */ + if (reg*2+1 < c->nr_setup_attrs) { + *pc |= 0xf0; + + if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) + *pc_persp |= 0xf0; + + if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) + *pc_linear |= 0xf0; + } + + return is_last_attr; +} + + + +void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 3; + + if (allocate) + alloc_regs(c); + + invert_det(c); + copy_z_inv_w(c); + + if (c->key.do_twoside_color) + do_twoside_color(c); + + if (c->key.do_flat_shading) + do_flatshade_triangle(c); + + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + struct brw_reg a2 = offset(c->vert[2], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + brw_MUL(p, a2, a2, c->inv_w[2]); + } + + + /* Calculate coefficients for interpolated values: + */ + if (pc_linear) + { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); + + /* calculate dA/dx + */ + brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); + brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + /* calculate dA/dy + */ + brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); + brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in + * the send instruction: + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* offset */ + BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ + } + } +} + + + +void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + + c->nr_verts = 2; + + if (allocate) + alloc_regs(c); + + invert_det(c); + copy_z_inv_w(c); + + if (c->key.do_flat_shading) + do_flatshade_line(c); + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + } + + /* Calculate coefficients for position, color: + */ + if (pc_linear) { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + +void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 1; + + if (allocate) + alloc_regs(c); + + copy_z_inv_w(c); + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]]; + struct brw_reg a0 = offset(c->vert[0], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + if (!tex->CoordReplace) { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + } + + if (tex->CoordReplace) { + /* Caculate 1.0/PointWidth */ + brw_math(&c->func, + c->tmp, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->dx0, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + + if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } else { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } + } else { + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); + } + + { + brw_set_predicate_control_flag_value(p, pc); + if (tex->CoordReplace) { + if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); + brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); + } + else + brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + } else { + brw_MOV(p, c->m3C0, a0); /* constant value */ + } + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + +/* Points setup - several simplifications as all attributes are + * constant across the face of the point (point sprites excluded!) + */ +void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate) +{ + struct brw_compile *p = &c->func; + GLuint i; + + c->nr_verts = 1; + + if (allocate) + alloc_regs(c); + + copy_z_inv_w(c); + + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ + + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_reg a0 = offset(c->vert[0], i); + GLushort pc, pc_persp, pc_linear; + GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + /* This seems odd as the values are all constant, but the + * fragment shader will be expecting it: + */ + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + + + /* The delta values are always zero, just send the starting + * coordinate. Again, this is to fit in with the interpolation + * code in the fragment shader. + */ + { + brw_set_predicate_control_flag_value(p, pc); + + brw_MOV(p, c->m3C0, a0); /* constant value */ + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + +void brw_emit_anyprim_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg ip = brw_ip_reg(); + struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0); + struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); + struct brw_reg primmask; + struct brw_instruction *jmp; + struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); + + GLuint saveflag; + + c->nr_verts = 3; + alloc_regs(c); + + primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD); + + brw_MOV(p, primmask, brw_imm_ud(1)); + brw_SHL(p, primmask, primmask, payload_prim); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) | + (1<<_3DPRIM_TRISTRIP) | + (1<<_3DPRIM_TRIFAN) | + (1<<_3DPRIM_TRISTRIP_REVERSE) | + (1<<_3DPRIM_POLYGON) | + (1<<_3DPRIM_RECTLIST) | + (1<<_3DPRIM_TRIFAN_NOSTIPPLE))); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + { + saveflag = p->flag_value; + brw_push_insn_state(p); + brw_emit_tri_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; + /* note - thread killed in subroutine, so must + * restore the flag which is changed when building + * the subroutine. fix #13240 + */ + } + brw_land_fwd_jump(p, jmp); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) | + (1<<_3DPRIM_LINESTRIP) | + (1<<_3DPRIM_LINELOOP) | + (1<<_3DPRIM_LINESTRIP_CONT) | + (1<<_3DPRIM_LINESTRIP_BF) | + (1<<_3DPRIM_LINESTRIP_CONT_BF))); + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + { + saveflag = p->flag_value; + brw_push_insn_state(p); + brw_emit_line_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; + /* note - thread killed in subroutine */ + } + brw_land_fwd_jump(p, jmp); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<flag_value; + brw_push_insn_state(p); + brw_emit_point_sprite_setup( c, GL_FALSE ); + brw_pop_insn_state(p); + p->flag_value = saveflag; + } + brw_land_fwd_jump(p, jmp); + + brw_emit_point_setup( c, GL_FALSE ); +} + + + + diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c new file mode 100644 index 0000000000..bc0f076073 --- /dev/null +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -0,0 +1,365 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "main/macros.h" +#include "intel_fbo.h" + +static void upload_sf_vp(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + struct brw_sf_viewport sfv; + GLfloat y_scale, y_bias; + const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); + const GLfloat *v = ctx->Viewport._WindowMap.m; + + memset(&sfv, 0, sizeof(sfv)); + + if (render_to_fbo) { + y_scale = 1.0; + y_bias = 0; + } + else { + y_scale = -1.0; + y_bias = ctx->DrawBuffer->Height; + } + + /* _NEW_VIEWPORT */ + + sfv.viewport.m00 = v[MAT_SX]; + sfv.viewport.m11 = v[MAT_SY] * y_scale; + sfv.viewport.m22 = v[MAT_SZ] * depth_scale; + sfv.viewport.m30 = v[MAT_TX]; + sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; + sfv.viewport.m32 = v[MAT_TZ] * depth_scale; + + /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT + * for DrawBuffer->_[XY]{min,max} + */ + + /* The scissor only needs to handle the intersection of drawable and + * scissor rect. Clipping to the boundaries of static shared buffers + * for front/back/depth is covered by looping over cliprects in brw_draw.c. + * + * Note that the hardware's coordinates are inclusive, while Mesa's min is + * inclusive but max is exclusive. + */ + if (render_to_fbo) { + /* texmemory: Y=0=bottom */ + sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; + sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + sfv.scissor.ymin = ctx->DrawBuffer->_Ymin; + sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1; + } + else { + /* memory: Y=0=top */ + sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; + sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; + sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; + } + + dri_bo_unreference(brw->sf.vp_bo); + brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); +} + +const struct brw_tracked_state brw_sf_vp = { + .dirty = { + .mesa = (_NEW_VIEWPORT | + _NEW_SCISSOR | + _NEW_BUFFERS), + .brw = 0, + .cache = 0 + }, + .prepare = upload_sf_vp +}; + +struct brw_sf_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + + unsigned int nr_urb_entries, urb_size, sfsize; + + GLenum front_face, cull_face, provoking_vertex; + unsigned scissor:1; + unsigned line_smooth:1; + unsigned point_sprite:1; + unsigned point_attenuated:1; + unsigned render_to_fbo:1; + float line_width; + float point_size; +}; + +static void +sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_SF_PROG */ + key->total_grf = brw->sf.prog_data->total_grf; + key->urb_entry_read_length = brw->sf.prog_data->urb_read_length; + + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_sf_entries; + key->urb_size = brw->urb.vsize; + key->sfsize = brw->urb.sfsize; + + key->scissor = ctx->Scissor.Enabled; + key->front_face = ctx->Polygon.FrontFace; + + if (ctx->Polygon.CullFlag) + key->cull_face = ctx->Polygon.CullFaceMode; + else + key->cull_face = GL_NONE; + + key->line_width = ctx->Line.Width; + key->line_smooth = ctx->Line.SmoothFlag; + + key->point_sprite = ctx->Point.PointSprite; + key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); + key->point_attenuated = ctx->Point._Attenuated; + + /* _NEW_LIGHT */ + key->provoking_vertex = ctx->Light.ProvokingVertex; + + key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; +} + +static dri_bo * +sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, + dri_bo **reloc_bufs) +{ + struct brw_sf_unit_state sf; + dri_bo *bo; + int chipset_max_threads; + memset(&sf, 0, sizeof(sf)); + + sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */ + + sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + + sf.thread3.dispatch_grf_start_reg = 3; + + if (BRW_IS_IGDNG(brw)) + sf.thread3.urb_entry_read_offset = 3; + else + sf.thread3.urb_entry_read_offset = 1; + + sf.thread3.urb_entry_read_length = key->urb_entry_read_length; + + sf.thread4.nr_urb_entries = key->nr_urb_entries; + sf.thread4.urb_entry_allocation_size = key->sfsize - 1; + + /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or + * 48(IGDNG) threads + */ + if (BRW_IS_IGDNG(brw)) + chipset_max_threads = 48; + else + chipset_max_threads = 24; + + sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1; + + if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + sf.thread4.max_threads = 0; + + if (INTEL_DEBUG & DEBUG_STATS) + sf.thread4.stats_enable = 1; + + /* CACHE_NEW_SF_VP */ + sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */ + + sf.sf5.viewport_transform = 1; + + /* _NEW_SCISSOR */ + if (key->scissor) + sf.sf6.scissor = 1; + + /* _NEW_POLYGON */ + if (key->front_face == GL_CCW) + sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + else + sf.sf5.front_winding = BRW_FRONTWINDING_CW; + + /* The viewport is inverted for rendering to a FBO, and that inverts + * polygon front/back orientation. + */ + sf.sf5.front_winding ^= key->render_to_fbo; + + switch (key->cull_face) { + case GL_FRONT: + sf.sf6.cull_mode = BRW_CULLMODE_FRONT; + break; + case GL_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BACK; + break; + case GL_FRONT_AND_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BOTH; + break; + case GL_NONE: + sf.sf6.cull_mode = BRW_CULLMODE_NONE; + break; + default: + assert(0); + break; + } + + /* _NEW_LINE */ + /* XXX use ctx->Const.Min/MaxLineWidth here */ + sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1); + + sf.sf6.line_endcap_aa_region_width = 1; + if (key->line_smooth) + sf.sf6.aa_enable = 1; + else if (sf.sf6.line_width <= 0x2) + sf.sf6.line_width = 0; + + /* _NEW_BUFFERS */ + key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + if (!key->render_to_fbo) { + /* Rendering to an OpenGL window */ + sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; + } + else { + /* If rendering to an FBO, the pixel coordinate system is + * inverted with respect to the normal OpenGL coordinate + * system, so BRW_RASTRULE_LOWER_RIGHT is correct. + * But this value is listed as "Reserved, but not seen as useful" + * in Intel documentation (page 212, "Point Rasterization Rule", + * section 7.4 "SF Pipeline State Summary", of document + * "Intel® 965 Express Chipset Family and Intel® G35 Express + * Chipset Graphics Controller Programmer's Reference Manual, + * Volume 2: 3D/Media", Revision 1.0b as of January 2008, + * available at + * http://intellinuxgraphics.org/documentation.html + * at the time of this writing). + * + * It does work on at least some devices, if not all; + * if devices that don't support it can be identified, + * the likely failure case is that points are rasterized + * incorrectly, which is no worse than occurs without + * the value, so we're using it here. + */ + sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT; + } + /* XXX clamp max depends on AA vs. non-AA */ + + /* _NEW_POINT */ + sf.sf7.sprite_point = key->point_sprite; + sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3); + sf.sf7.use_point_size_state = !key->point_attenuated; + sf.sf7.aa_line_distance_mode = 0; + + /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: + */ + if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) { + sf.sf7.trifan_pv = 2; + sf.sf7.linestrip_pv = 1; + sf.sf7.tristrip_pv = 2; + } else { + sf.sf7.trifan_pv = 1; + sf.sf7.linestrip_pv = 0; + sf.sf7.tristrip_pv = 0; + } + sf.sf7.line_last_pixel_enable = 0; + + /* Set bias for OpenGL rasterization rules: + */ + sf.sf6.dest_org_vbias = 0x8; + sf.sf6.dest_org_hbias = 0x8; + + bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT, + key, sizeof(*key), + reloc_bufs, 2, + &sf, sizeof(sf), + NULL, NULL); + + /* STATE_PREFETCH command description describes this state as being + * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. + */ + /* Emit SF program relocation */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + sf.thread0.grf_reg_count << 1, + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_bo); + + /* Emit SF viewport relocation */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), + offsetof(struct brw_sf_unit_state, sf5), + brw->sf.vp_bo); + + return bo; +} + +static void upload_sf_unit( struct brw_context *brw ) +{ + struct brw_sf_unit_key key; + dri_bo *reloc_bufs[2]; + + sf_unit_populate_key(brw, &key); + + reloc_bufs[0] = brw->sf.prog_bo; + reloc_bufs[1] = brw->sf.vp_bo; + + dri_bo_unreference(brw->sf.state_bo); + brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT, + &key, sizeof(key), + reloc_bufs, 2, + NULL); + if (brw->sf.state_bo == NULL) { + brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs); + } +} + +const struct brw_tracked_state brw_sf_unit = { + .dirty = { + .mesa = (_NEW_POLYGON | + _NEW_LIGHT | + _NEW_LINE | + _NEW_POINT | + _NEW_SCISSOR | + _NEW_BUFFERS), + .brw = BRW_NEW_URB_FENCE, + .cache = (CACHE_NEW_SF_VP | + CACHE_NEW_SF_PROG) + }, + .prepare = upload_sf_unit, +}; diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h new file mode 100644 index 0000000000..d639656b9d --- /dev/null +++ b/src/gallium/drivers/i965/brw_state.h @@ -0,0 +1,173 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_STATE_H +#define BRW_STATE_H + +#include "brw_context.h" + +static inline void +brw_add_validated_bo(struct brw_context *brw, dri_bo *bo) +{ + assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos)); + + if (bo != NULL) { + dri_bo_reference(bo); + brw->state.validated_bos[brw->state.validated_bo_count++] = bo; + } +}; + +const struct brw_tracked_state brw_blend_constant_color; +const struct brw_tracked_state brw_cc_unit; +const struct brw_tracked_state brw_cc_vp; +const struct brw_tracked_state brw_check_fallback; +const struct brw_tracked_state brw_clip_prog; +const struct brw_tracked_state brw_clip_unit; +const struct brw_tracked_state brw_constant_buffer; +const struct brw_tracked_state brw_curbe_offsets; +const struct brw_tracked_state brw_invarient_state; +const struct brw_tracked_state brw_gs_prog; +const struct brw_tracked_state brw_gs_unit; +const struct brw_tracked_state brw_line_stipple; +const struct brw_tracked_state brw_aa_line_parameters; +const struct brw_tracked_state brw_pipelined_state_pointers; +const struct brw_tracked_state brw_binding_table_pointers; +const struct brw_tracked_state brw_depthbuffer; +const struct brw_tracked_state brw_polygon_stipple_offset; +const struct brw_tracked_state brw_polygon_stipple; +const struct brw_tracked_state brw_program_parameters; +const struct brw_tracked_state brw_recalculate_urb_fence; +const struct brw_tracked_state brw_sf_prog; +const struct brw_tracked_state brw_sf_unit; +const struct brw_tracked_state brw_sf_vp; +const struct brw_tracked_state brw_state_base_address; +const struct brw_tracked_state brw_urb_fence; +const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_surfaces; +const struct brw_tracked_state brw_vs_prog; +const struct brw_tracked_state brw_vs_unit; +const struct brw_tracked_state brw_wm_input_sizes; +const struct brw_tracked_state brw_wm_prog; +const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_constant_surface; +const struct brw_tracked_state brw_wm_surfaces; +const struct brw_tracked_state brw_wm_unit; + +const struct brw_tracked_state brw_psp_urb_cbs; + +const struct brw_tracked_state brw_pipe_control; + +const struct brw_tracked_state brw_drawing_rect; +const struct brw_tracked_state brw_indices; +const struct brw_tracked_state brw_vertices; +const struct brw_tracked_state brw_index_buffer; + +/** + * Use same key for WM and VS surfaces. + */ +struct brw_surface_key { + GLenum target, depthmode; + dri_bo *bo; + GLint format, internal_format; + GLint first_level, last_level; + GLint width, height, depth; + GLint pitch, cpp; + uint32_t tiling; + GLuint offset; +}; + +/*********************************************************************** + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_upload_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + +/*********************************************************************** + * brw_state_cache.c + */ +dri_bo *brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs); + +dri_bo *brw_cache_data_sz(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs); + +dri_bo *brw_upload_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_sz, + const void *aux, + void *aux_return ); + +dri_bo *brw_search_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + void *aux_return); +void brw_state_cache_check_size( struct brw_context *brw ); + +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); +void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo); + +/*********************************************************************** + * brw_state_batch.c + */ +#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) +#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) + +GLboolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + GLuint sz ); +void brw_destroy_batch_cache( struct brw_context *brw ); +void brw_clear_batch_cache( struct brw_context *brw ); + +/* brw_wm_surface_state.c */ +dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_surface_key *key ); + +#endif diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c new file mode 100644 index 0000000000..7821898cf9 --- /dev/null +++ b/src/gallium/drivers/i965/brw_state_batch.c @@ -0,0 +1,99 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + + +#include "brw_state.h" +#include "intel_batchbuffer.h" +#include "main/imports.h" + + + +/* A facility similar to the data caching code above, which aims to + * prevent identical commands being issued repeatedly. + */ +GLboolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + GLuint sz ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + struct header *newheader = (struct header *)data; + + if (brw->emit_state_always) { + intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + return GL_TRUE; + } + + while (item) { + if (item->header->opcode == newheader->opcode) { + if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) + return GL_FALSE; + if (item->sz != sz) { + _mesa_free(item->header); + item->header = _mesa_malloc(sz); + item->sz = sz; + } + goto emit; + } + item = item->next; + } + + assert(!item); + item = CALLOC_STRUCT(brw_cached_batch_item); + item->header = _mesa_malloc(sz); + item->sz = sz; + item->next = brw->cached_batch_items; + brw->cached_batch_items = item; + + emit: + memcpy(item->header, newheader, sz); + intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + return GL_TRUE; +} + +void brw_clear_batch_cache( struct brw_context *brw ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + + while (item) { + struct brw_cached_batch_item *next = item->next; + free((void *)item->header); + free(item); + item = next; + } + + brw->cached_batch_items = NULL; +} + +void brw_destroy_batch_cache( struct brw_context *brw ) +{ + brw_clear_batch_cache(brw); +} diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c new file mode 100644 index 0000000000..c262e1db8b --- /dev/null +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -0,0 +1,597 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +/** @file brw_state_cache.c + * + * This file implements a simple static state cache for 965. The consumers + * can query the hash table of state using a cache_id, opaque key data, + * and list of buffers that will be used in relocations, and receive the + * corresponding state buffer object of state (plus associated auxiliary + * data) in return. + * + * The inner workings are a simple hash table based on a CRC of the key data. + * The cache_id and relocation target buffers associated with the state + * buffer are included as auxiliary key data, but are not part of the hash + * value (this should be fixed, but will likely be fixed instead by making + * consumers use structured keys). + * + * Replacement is not implemented. Instead, when the cache gets too big, at + * a safe point (unlock) we throw out all of the cache data and let it + * regenerate for the next rendering operation. + * + * The reloc_buf pointers need to be included as key data, otherwise the + * non-unique values stuffed in the offset in key data through + * brw_cache_data() may result in successful probe for state buffers + * even when the buffer being referenced doesn't match. The result would be + * that the same state cache entry is used twice for different buffers, + * only one of the two buffers referenced gets put into the offset, and the + * incorrect program is run for the other instance. + */ + +#include "main/imports.h" +#include "brw_state.h" +#include "intel_batchbuffer.h" + +/* XXX: Fixme - have to include these to get the sizes of the prog_key + * structs: + */ +#include "brw_wm.h" +#include "brw_vs.h" +#include "brw_clip.h" +#include "brw_sf.h" +#include "brw_gs.h" + + +static GLuint +hash_key(const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) +{ + GLuint *ikey = (GLuint *)key; + GLuint hash = 0, i; + + assert(key_size % 4 == 0); + + /* I'm sure this can be improved on: + */ + for (i = 0; i < key_size/4; i++) { + hash ^= ikey[i]; + hash = (hash << 5) | (hash >> 27); + } + + /* Include the BO pointers as key data as well */ + ikey = (GLuint *)reloc_bufs; + key_size = nr_reloc_bufs * sizeof(dri_bo *); + for (i = 0; i < key_size/4; i++) { + hash ^= ikey[i]; + hash = (hash << 5) | (hash >> 27); + } + + return hash; +} + + +/** + * Marks a new buffer as being chosen for the given cache id. + */ +static void +update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, + dri_bo *bo) +{ + if (bo == cache->last_bo[cache_id]) + return; /* no change */ + + dri_bo_unreference(cache->last_bo[cache_id]); + cache->last_bo[cache_id] = bo; + dri_bo_reference(cache->last_bo[cache_id]); + cache->brw->state.dirty.cache |= 1 << cache_id; +} + + +static struct brw_cache_item * +search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, + GLuint hash, const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) +{ + struct brw_cache_item *c; + +#if 0 + int bucketcount = 0; + + for (c = cache->items[hash % cache->size]; c; c = c->next) + bucketcount++; + + fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size, + cache->size, bucketcount, cache->n_items); +#endif + + for (c = cache->items[hash % cache->size]; c; c = c->next) { + if (c->cache_id == cache_id && + c->hash == hash && + c->key_size == key_size && + memcmp(c->key, key, key_size) == 0 && + c->nr_reloc_bufs == nr_reloc_bufs && + memcmp(c->reloc_bufs, reloc_bufs, + nr_reloc_bufs * sizeof(dri_bo *)) == 0) + return c; + } + + return NULL; +} + + +static void +rehash(struct brw_cache *cache) +{ + struct brw_cache_item **items; + struct brw_cache_item *c, *next; + GLuint size, i; + + size = cache->size * 3; + items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items)); + + for (i = 0; i < cache->size; i++) + for (c = cache->items[i]; c; c = next) { + next = c->next; + c->next = items[c->hash % size]; + items[c->hash % size] = c; + } + + FREE(cache->items); + cache->items = items; + cache->size = size; +} + + +/** + * Returns the buffer object matching cache_id and key, or NULL. + */ +dri_bo * +brw_search_cache(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs, + void *aux_return) +{ + struct brw_cache_item *item; + GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + + item = search_cache(cache, cache_id, hash, key, key_size, + reloc_bufs, nr_reloc_bufs); + + if (item == NULL) + return NULL; + + if (aux_return) + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + + update_cache_last(cache, cache_id, item->bo); + + dri_bo_reference(item->bo); + return item->bo; +} + + +dri_bo * +brw_upload_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_size, + const void *aux, + void *aux_return ) +{ + struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); + GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *); + GLuint aux_size = cache->aux_size[cache_id]; + void *tmp; + dri_bo *bo; + int i; + + /* Create the buffer object to contain the data */ + bo = dri_bo_alloc(cache->brw->intel.bufmgr, + cache->name[cache_id], data_size, 1 << 6); + + + /* Set up the memory containing the key, aux_data, and reloc_bufs */ + tmp = _mesa_malloc(key_size + aux_size + relocs_size); + + memcpy(tmp, key, key_size); + memcpy(tmp + key_size, aux, cache->aux_size[cache_id]); + memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); + for (i = 0; i < nr_reloc_bufs; i++) { + if (reloc_bufs[i] != NULL) + dri_bo_reference(reloc_bufs[i]); + } + + item->cache_id = cache_id; + item->key = tmp; + item->hash = hash; + item->key_size = key_size; + item->reloc_bufs = tmp + key_size + aux_size; + item->nr_reloc_bufs = nr_reloc_bufs; + + item->bo = bo; + dri_bo_reference(bo); + item->data_size = data_size; + + if (cache->n_items > cache->size * 1.5) + rehash(cache); + + hash %= cache->size; + item->next = cache->items[hash]; + cache->items[hash] = item; + cache->n_items++; + + if (aux_return) { + assert(cache->aux_size[cache_id]); + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + } + + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("upload %s: %d bytes to cache id %d\n", + cache->name[cache_id], + data_size, cache_id); + + /* Copy data to the buffer */ + dri_bo_subdata(bo, 0, data_size, data); + + update_cache_last(cache, cache_id, bo); + + return bo; +} + + +/** + * This doesn't really work with aux data. Use search/upload instead + */ +dri_bo * +brw_cache_data_sz(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs) +{ + dri_bo *bo; + struct brw_cache_item *item; + GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs); + + item = search_cache(cache, cache_id, hash, data, data_size, + reloc_bufs, nr_reloc_bufs); + if (item) { + update_cache_last(cache, cache_id, item->bo); + dri_bo_reference(item->bo); + return item->bo; + } + + bo = brw_upload_cache(cache, cache_id, + data, data_size, + reloc_bufs, nr_reloc_bufs, + data, data_size, + NULL, NULL); + + return bo; +} + + +/** + * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. + * + * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be + * better to use, as the potentially changing offsets in the data-used-as-key + * will result in excessive cache misses. + */ +dri_bo * +brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs) +{ + return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id], + reloc_bufs, nr_reloc_bufs); +} + +enum pool_type { + DW_SURFACE_STATE, + DW_GENERAL_STATE +}; + + +static void +brw_init_cache_id(struct brw_cache *cache, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) +{ + cache->name[id] = strdup(name); + cache->key_size[id] = key_size; + cache->aux_size[id] = aux_size; +} + + +static void +brw_init_non_surface_cache(struct brw_context *brw) +{ + struct brw_cache *cache = &brw->cache; + + cache->brw = brw; + + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, + "CC_VP", + BRW_CC_VP, + sizeof(struct brw_cc_viewport), + 0); + + brw_init_cache_id(cache, + "CC_UNIT", + BRW_CC_UNIT, + sizeof(struct brw_cc_unit_state), + 0); + + brw_init_cache_id(cache, + "WM_PROG", + BRW_WM_PROG, + sizeof(struct brw_wm_prog_key), + sizeof(struct brw_wm_prog_data)); + + brw_init_cache_id(cache, + "SAMPLER_DEFAULT_COLOR", + BRW_SAMPLER_DEFAULT_COLOR, + sizeof(struct brw_sampler_default_color), + 0); + + brw_init_cache_id(cache, + "SAMPLER", + BRW_SAMPLER, + 0, /* variable key/data size */ + 0); + + brw_init_cache_id(cache, + "WM_UNIT", + BRW_WM_UNIT, + sizeof(struct brw_wm_unit_state), + 0); + + brw_init_cache_id(cache, + "SF_PROG", + BRW_SF_PROG, + sizeof(struct brw_sf_prog_key), + sizeof(struct brw_sf_prog_data)); + + brw_init_cache_id(cache, + "SF_VP", + BRW_SF_VP, + sizeof(struct brw_sf_viewport), + 0); + + brw_init_cache_id(cache, + "SF_UNIT", + BRW_SF_UNIT, + sizeof(struct brw_sf_unit_state), + 0); + + brw_init_cache_id(cache, + "VS_UNIT", + BRW_VS_UNIT, + sizeof(struct brw_vs_unit_state), + 0); + + brw_init_cache_id(cache, + "VS_PROG", + BRW_VS_PROG, + sizeof(struct brw_vs_prog_key), + sizeof(struct brw_vs_prog_data)); + + brw_init_cache_id(cache, + "CLIP_UNIT", + BRW_CLIP_UNIT, + sizeof(struct brw_clip_unit_state), + 0); + + brw_init_cache_id(cache, + "CLIP_PROG", + BRW_CLIP_PROG, + sizeof(struct brw_clip_prog_key), + sizeof(struct brw_clip_prog_data)); + + brw_init_cache_id(cache, + "GS_UNIT", + BRW_GS_UNIT, + sizeof(struct brw_gs_unit_state), + 0); + + brw_init_cache_id(cache, + "GS_PROG", + BRW_GS_PROG, + sizeof(struct brw_gs_prog_key), + sizeof(struct brw_gs_prog_data)); +} + + +static void +brw_init_surface_cache(struct brw_context *brw) +{ + struct brw_cache *cache = &brw->surface_cache; + + cache->brw = brw; + + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + + brw_init_cache_id(cache, + "SS_SURFACE", + BRW_SS_SURFACE, + sizeof(struct brw_surface_state), + 0); + + brw_init_cache_id(cache, + "SS_SURF_BIND", + BRW_SS_SURF_BIND, + 0, + 0); +} + + +void +brw_init_caches(struct brw_context *brw) +{ + brw_init_non_surface_cache(brw); + brw_init_surface_cache(brw); +} + + +static void +brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) +{ + struct brw_cache_item *c, *next; + GLuint i; + + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { + int j; + + next = c->next; + for (j = 0; j < c->nr_reloc_bufs; j++) + dri_bo_unreference(c->reloc_bufs[j]); + dri_bo_unreference(c->bo); + free((void *)c->key); + free(c); + } + cache->items[i] = NULL; + } + + cache->n_items = 0; + + if (brw->curbe.last_buf) { + _mesa_free(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + } + + brw->state.dirty.mesa |= ~0; + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + +/* Clear all entries from the cache that point to the given bo. + * + * This lets us release memory for reuse earlier for known-dead buffers, + * at the cost of walking the entire hash table. + */ +void +brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo) +{ + struct brw_cache_item **prev; + GLuint i; + + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + for (i = 0; i < cache->size; i++) { + for (prev = &cache->items[i]; *prev;) { + struct brw_cache_item *c = *prev; + + if (drm_intel_bo_references(c->bo, bo)) { + int j; + + *prev = c->next; + + for (j = 0; j < c->nr_reloc_bufs; j++) + dri_bo_unreference(c->reloc_bufs[j]); + dri_bo_unreference(c->bo); + free((void *)c->key); + free(c); + cache->n_items--; + } else { + prev = &c->next; + } + } + } +} + +void +brw_state_cache_check_size(struct brw_context *brw) +{ + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + + /* un-tuned guess. We've got around 20 state objects for a total of around + * 32k, so 1000 of them is around 1.5MB. + */ + if (brw->cache.n_items > 1000) + brw_clear_cache(brw, &brw->cache); + + if (brw->surface_cache.n_items > 1000) + brw_clear_cache(brw, &brw->surface_cache); +} + + +static void +brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) +{ + GLuint i; + + if (INTEL_DEBUG & DEBUG_STATE) + _mesa_printf("%s\n", __FUNCTION__); + + brw_clear_cache(brw, cache); + for (i = 0; i < BRW_MAX_CACHE; i++) { + dri_bo_unreference(cache->last_bo[i]); + free(cache->name[i]); + } + free(cache->items); + cache->items = NULL; + cache->size = 0; +} + + +void +brw_destroy_caches(struct brw_context *brw) +{ + brw_destroy_cache(brw, &brw->cache); + brw_destroy_cache(brw, &brw->surface_cache); +} diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c new file mode 100644 index 0000000000..e94fa7d2b4 --- /dev/null +++ b/src/gallium/drivers/i965/brw_state_dump.c @@ -0,0 +1,224 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#include "main/mtypes.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +/** + * Prints out a header, the contents, and the message associated with + * the hardware state data given. + * + * \param name Name of the state object + * \param data Pointer to the base of the state object + * \param hw_offset Hardware offset of the base of the state data. + * \param index Index of the DWORD being output. + */ +static void +state_out(const char *name, void *data, uint32_t hw_offset, int index, + char *fmt, ...) +{ + va_list va; + + fprintf(stderr, "%8s: 0x%08x: 0x%08x: ", + name, hw_offset + index * 4, ((uint32_t *)data)[index]); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); +} + +/** Generic, undecoded state buffer debug printout */ +static void +state_struct_out(const char *name, dri_bo *buffer, unsigned int state_size) +{ + int i; + + if (buffer == NULL) + return; + + dri_bo_map(buffer, GL_FALSE); + for (i = 0; i < state_size / 4; i++) { + state_out(name, buffer->virtual, buffer->offset, i, + "dword %d\n", i); + } + dri_bo_unmap(buffer); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_surface_format(unsigned int surface_format) +{ + switch (surface_format) { + case 0x000: return "r32g32b32a32_float"; + case 0x0c1: return "b8g8r8a8_unorm"; + case 0x100: return "b5g6r5_unorm"; + case 0x102: return "b5g5r5a1_unorm"; + case 0x104: return "b4g4r4a4_unorm"; + default: return "unknown"; + } +} + +static void dump_wm_surface_state(struct brw_context *brw) +{ + int i; + + for (i = 0; i < brw->wm.nr_surfaces; i++) { + dri_bo *surf_bo = brw->wm.surf_bo[i]; + unsigned int surfoff; + struct brw_surface_state *surf; + char name[20]; + + if (surf_bo == NULL) { + fprintf(stderr, " WM SS%d: NULL\n", i); + continue; + } + dri_bo_map(surf_bo, GL_FALSE); + surfoff = surf_bo->offset; + surf = (struct brw_surface_state *)(surf_bo->virtual); + + sprintf(name, "WM SS%d", i); + state_out(name, surf, surfoff, 0, "%s %s\n", + get_965_surfacetype(surf->ss0.surface_type), + get_965_surface_format(surf->ss0.surface_format)); + state_out(name, surf, surfoff, 1, "offset\n"); + state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", + surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count); + state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n", + surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not "); + state_out(name, surf, surfoff, 4, "mip base %d\n", + surf->ss4.min_lod); + state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", + surf->ss5.x_offset, surf->ss5.y_offset); + + dri_bo_unmap(surf_bo); + } +} + +static void dump_sf_viewport_state(struct brw_context *brw) +{ + const char *name = "SF VP"; + struct brw_sf_viewport *vp; + uint32_t vp_off; + + if (brw->sf.vp_bo == NULL) + return; + + dri_bo_map(brw->sf.vp_bo, GL_FALSE); + + vp = brw->sf.vp_bo->virtual; + vp_off = brw->sf.vp_bo->offset; + + state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); + state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); + state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); + state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); + state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); + state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); + + state_out(name, vp, vp_off, 6, "top left = %d,%d\n", + vp->scissor.xmin, vp->scissor.ymin); + state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", + vp->scissor.xmax, vp->scissor.ymax); + + dri_bo_unmap(brw->sf.vp_bo); +} + +static void brw_debug_prog(const char *name, dri_bo *prog) +{ + unsigned int i; + uint32_t *data; + + if (prog == NULL) + return; + + dri_bo_map(prog, GL_FALSE); + + data = prog->virtual; + + for (i = 0; i < prog->size / 4 / 4; i++) { + fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", + name, (unsigned int)prog->offset + i * 4 * 4, + data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); + /* Stop at the end of the program. It'd be nice to keep track of the actual + * intended program size instead of guessing like this. + */ + if (data[i * 4 + 0] == 0 && + data[i * 4 + 1] == 0 && + data[i * 4 + 2] == 0 && + data[i * 4 + 3] == 0) + break; + } + + dri_bo_unmap(prog); +} + + +/** + * Print additional debug information associated with the batchbuffer + * when DEBUG_BATCH is set. + * + * For 965, this means mapping the state buffers that would have been referenced + * by the batchbuffer and dumping them. + * + * The buffer offsets printed rely on the buffer containing the last offset + * it was validated at. + */ +void brw_debug_batch(struct intel_context *intel) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces); + dump_wm_surface_state(brw); + + state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state)); + brw_debug_prog("VS prog", brw->vs.prog_bo); + + state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state)); + brw_debug_prog("GS prog", brw->gs.prog_bo); + + state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state)); + dump_sf_viewport_state(brw); + brw_debug_prog("SF prog", brw->sf.prog_bo); + + state_struct_out("WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state)); + brw_debug_prog("WM prog", brw->wm.prog_bo); +} diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c new file mode 100644 index 0000000000..b817b741e7 --- /dev/null +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -0,0 +1,416 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "intel_batchbuffer.h" + +/* This is used to initialize brw->state.atoms[]. We could use this + * list directly except for a single atom, brw_constant_buffer, which + * has a .dirty value which changes according to the parameters of the + * current fragment and vertex programs, and so cannot be a static + * value. + */ +const struct brw_tracked_state *atoms[] = +{ + &brw_check_fallback, + + &brw_wm_input_sizes, + &brw_vs_prog, + &brw_gs_prog, + &brw_clip_prog, + &brw_sf_prog, + &brw_wm_prog, + + /* Once all the programs are done, we know how large urb entry + * sizes need to be and can decide if we need to change the urb + * layout. + */ + &brw_curbe_offsets, + &brw_recalculate_urb_fence, + + &brw_cc_vp, + &brw_cc_unit, + + &brw_vs_surfaces, /* must do before unit */ + &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ + &brw_wm_surfaces, /* must do before samplers and unit */ + &brw_wm_samplers, + + &brw_wm_unit, + &brw_sf_vp, + &brw_sf_unit, + &brw_vs_unit, /* always required, enabled or not */ + &brw_clip_unit, + &brw_gs_unit, + + /* Command packets: + */ + &brw_invarient_state, + &brw_state_base_address, + + &brw_binding_table_pointers, + &brw_blend_constant_color, + + &brw_depthbuffer, + + &brw_polygon_stipple, + &brw_polygon_stipple_offset, + + &brw_line_stipple, + &brw_aa_line_parameters, + + &brw_psp_urb_cbs, + + &brw_drawing_rect, + &brw_indices, + &brw_index_buffer, + &brw_vertices, + + &brw_constant_buffer +}; + + +void brw_init_state( struct brw_context *brw ) +{ + brw_init_caches(brw); +} + + +void brw_destroy_state( struct brw_context *brw ) +{ + brw_destroy_caches(brw); + brw_destroy_batch_cache(brw); +} + +/*********************************************************************** + */ + +static GLboolean check_state( const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + return ((a->mesa & b->mesa) || + (a->brw & b->brw) || + (a->cache & b->cache)); +} + +static void accumulate_state( struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + a->mesa |= b->mesa; + a->brw |= b->brw; + a->cache |= b->cache; +} + + +static void xor_states( struct brw_state_flags *result, + const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + result->mesa = a->mesa ^ b->mesa; + result->brw = a->brw ^ b->brw; + result->cache = a->cache ^ b->cache; +} + +static void +brw_clear_validated_bos(struct brw_context *brw) +{ + int i; + + /* Clear the last round of validated bos */ + for (i = 0; i < brw->state.validated_bo_count; i++) { + dri_bo_unreference(brw->state.validated_bos[i]); + brw->state.validated_bos[i] = NULL; + } + brw->state.validated_bo_count = 0; +} + +struct dirty_bit_map { + uint32_t bit; + char *name; + uint32_t count; +}; + +#define DEFINE_BIT(name) {name, #name, 0} + +static struct dirty_bit_map mesa_bits[] = { + DEFINE_BIT(_NEW_MODELVIEW), + DEFINE_BIT(_NEW_PROJECTION), + DEFINE_BIT(_NEW_TEXTURE_MATRIX), + DEFINE_BIT(_NEW_COLOR_MATRIX), + DEFINE_BIT(_NEW_ACCUM), + DEFINE_BIT(_NEW_COLOR), + DEFINE_BIT(_NEW_DEPTH), + DEFINE_BIT(_NEW_EVAL), + DEFINE_BIT(_NEW_FOG), + DEFINE_BIT(_NEW_HINT), + DEFINE_BIT(_NEW_LIGHT), + DEFINE_BIT(_NEW_LINE), + DEFINE_BIT(_NEW_PIXEL), + DEFINE_BIT(_NEW_POINT), + DEFINE_BIT(_NEW_POLYGON), + DEFINE_BIT(_NEW_POLYGONSTIPPLE), + DEFINE_BIT(_NEW_SCISSOR), + DEFINE_BIT(_NEW_STENCIL), + DEFINE_BIT(_NEW_TEXTURE), + DEFINE_BIT(_NEW_TRANSFORM), + DEFINE_BIT(_NEW_VIEWPORT), + DEFINE_BIT(_NEW_PACKUNPACK), + DEFINE_BIT(_NEW_ARRAY), + DEFINE_BIT(_NEW_RENDERMODE), + DEFINE_BIT(_NEW_BUFFERS), + DEFINE_BIT(_NEW_MULTISAMPLE), + DEFINE_BIT(_NEW_TRACK_MATRIX), + DEFINE_BIT(_NEW_PROGRAM), + DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), + {0, 0, 0} +}; + +static struct dirty_bit_map brw_bits[] = { + DEFINE_BIT(BRW_NEW_URB_FENCE), + DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM), + DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM), + DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS), + DEFINE_BIT(BRW_NEW_CURBE_OFFSETS), + DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE), + DEFINE_BIT(BRW_NEW_PRIMITIVE), + DEFINE_BIT(BRW_NEW_CONTEXT), + DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), + DEFINE_BIT(BRW_NEW_PSP), + DEFINE_BIT(BRW_NEW_FENCE), + DEFINE_BIT(BRW_NEW_INDICES), + DEFINE_BIT(BRW_NEW_INDEX_BUFFER), + DEFINE_BIT(BRW_NEW_VERTICES), + DEFINE_BIT(BRW_NEW_BATCH), + DEFINE_BIT(BRW_NEW_DEPTH_BUFFER), + {0, 0, 0} +}; + +static struct dirty_bit_map cache_bits[] = { + DEFINE_BIT(CACHE_NEW_CC_VP), + DEFINE_BIT(CACHE_NEW_CC_UNIT), + DEFINE_BIT(CACHE_NEW_WM_PROG), + DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR), + DEFINE_BIT(CACHE_NEW_SAMPLER), + DEFINE_BIT(CACHE_NEW_WM_UNIT), + DEFINE_BIT(CACHE_NEW_SF_PROG), + DEFINE_BIT(CACHE_NEW_SF_VP), + DEFINE_BIT(CACHE_NEW_SF_UNIT), + DEFINE_BIT(CACHE_NEW_VS_UNIT), + DEFINE_BIT(CACHE_NEW_VS_PROG), + DEFINE_BIT(CACHE_NEW_GS_UNIT), + DEFINE_BIT(CACHE_NEW_GS_PROG), + DEFINE_BIT(CACHE_NEW_CLIP_VP), + DEFINE_BIT(CACHE_NEW_CLIP_UNIT), + DEFINE_BIT(CACHE_NEW_CLIP_PROG), + DEFINE_BIT(CACHE_NEW_SURFACE), + DEFINE_BIT(CACHE_NEW_SURF_BIND), + {0, 0, 0} +}; + + +static void +brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) +{ + int i; + + for (i = 0; i < 32; i++) { + if (bit_map[i].bit == 0) + return; + + if (bit_map[i].bit & bits) + bit_map[i].count++; + } +} + +static void +brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) +{ + int i; + + for (i = 0; i < 32; i++) { + if (bit_map[i].bit == 0) + return; + + fprintf(stderr, "0x%08x: %12d (%s)\n", + bit_map[i].bit, bit_map[i].count, bit_map[i].name); + } +} + +/*********************************************************************** + * Emit all state: + */ +void brw_validate_state( struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + struct brw_state_flags *state = &brw->state.dirty; + GLuint i; + + brw_clear_validated_bos(brw); + + state->mesa |= brw->intel.NewGLState; + brw->intel.NewGLState = 0; + + brw_add_validated_bo(brw, intel->batch->buf); + + if (brw->emit_state_always) { + state->mesa |= ~0; + state->brw |= ~0; + state->cache |= ~0; + } + + if (brw->fragment_program != ctx->FragmentProgram._Current) { + brw->fragment_program = ctx->FragmentProgram._Current; + brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; + } + + if (brw->vertex_program != ctx->VertexProgram._Current) { + brw->vertex_program = ctx->VertexProgram._Current; + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + } + + if (state->mesa == 0 && + state->cache == 0 && + state->brw == 0) + return; + + if (brw->state.dirty.brw & BRW_NEW_CONTEXT) + brw_clear_batch_cache(brw); + + brw->intel.Fallback = 0; + + /* do prepare stage for all atoms */ + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + + if (brw->intel.Fallback) + break; + + if (check_state(state, &atom->dirty)) { + if (atom->prepare) { + atom->prepare(brw); + } + } + } + + /* Make sure that the textures which are referenced by the current + * brw fragment program are actually present/valid. + * If this fails, we can experience GPU lock-ups. + */ + { + const struct brw_fragment_program *fp; + fp = brw_fragment_program_const(brw->fragment_program); + if (fp) { + assert((fp->tex_units_used & ctx->Texture._EnabledUnits) + == fp->tex_units_used); + } + } +} + + +void brw_upload_state(struct brw_context *brw) +{ + struct brw_state_flags *state = &brw->state.dirty; + int i; + static int dirty_count = 0; + + brw_clear_validated_bos(brw); + + if (INTEL_DEBUG) { + /* Debug version which enforces various sanity checks on the + * state flags which are generated and checked to help ensure + * state atoms are ordered correctly in the list. + */ + struct brw_state_flags examined, prev; + _mesa_memset(&examined, 0, sizeof(examined)); + prev = *state; + + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + struct brw_state_flags generated; + + assert(atom->dirty.mesa || + atom->dirty.brw || + atom->dirty.cache); + + if (brw->intel.Fallback) + break; + + if (check_state(state, &atom->dirty)) { + if (atom->emit) { + atom->emit( brw ); + } + } + + accumulate_state(&examined, &atom->dirty); + + /* generated = (prev ^ state) + * if (examined & generated) + * fail; + */ + xor_states(&generated, &prev, state); + assert(!check_state(&examined, &generated)); + prev = *state; + } + } + else { + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + + if (brw->intel.Fallback) + break; + + if (check_state(state, &atom->dirty)) { + if (atom->emit) { + atom->emit( brw ); + } + } + } + } + + if (INTEL_DEBUG & DEBUG_STATE) { + brw_update_dirty_count(mesa_bits, state->mesa); + brw_update_dirty_count(brw_bits, state->brw); + brw_update_dirty_count(cache_bits, state->cache); + if (dirty_count++ % 1000 == 0) { + brw_print_dirty_count(mesa_bits, state->mesa); + brw_print_dirty_count(brw_bits, state->brw); + brw_print_dirty_count(cache_bits, state->cache); + fprintf(stderr, "\n"); + } + } + + if (!brw->intel.Fallback) + memset(state, 0, sizeof(*state)); +} diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h new file mode 100644 index 0000000000..66d4127271 --- /dev/null +++ b/src/gallium/drivers/i965/brw_structs.h @@ -0,0 +1,1575 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_STRUCTS_H +#define BRW_STRUCTS_H + + +/** Number of general purpose registers (VS, WM, etc) */ +#define BRW_MAX_GRF 128 + +/** Number of message register file registers */ +#define BRW_MAX_MRF 16 + + +/* Command packets: + */ +struct header +{ + GLuint length:16; + GLuint opcode:16; +}; + + +union header_union +{ + struct header bits; + GLuint dword; +}; + +struct brw_3d_control +{ + struct + { + GLuint length:8; + GLuint notify_enable:1; + GLuint pad:3; + GLuint wc_flush_enable:1; + GLuint depth_stall_enable:1; + GLuint operation:2; + GLuint opcode:16; + } header; + + struct + { + GLuint pad:2; + GLuint dest_addr_type:1; + GLuint dest_addr:29; + } dest; + + GLuint dword2; + GLuint dword3; +}; + + +struct brw_3d_primitive +{ + struct + { + GLuint length:8; + GLuint pad:2; + GLuint topology:5; + GLuint indexed:1; + GLuint opcode:16; + } header; + + GLuint verts_per_instance; + GLuint start_vert_location; + GLuint instance_count; + GLuint start_instance_location; + GLuint base_vert_location; +}; + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define BRW_FLUSH_READ_CACHE 0x1 +#define BRW_FLUSH_STATE_CACHE 0x2 +#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 + +struct brw_mi_flush +{ + GLuint flags:4; + GLuint pad:12; + GLuint opcode:16; +}; + +struct brw_vf_statistics +{ + GLuint statistics_enable:1; + GLuint pad:15; + GLuint opcode:16; +}; + + + +struct brw_binding_table_pointers +{ + struct header header; + GLuint vs; + GLuint gs; + GLuint clp; + GLuint sf; + GLuint wm; +}; + + +struct brw_blend_constant_color +{ + struct header header; + GLfloat blend_constant_color[4]; +}; + + +struct brw_depthbuffer +{ + union header_union header; + + union { + struct { + GLuint pitch:18; + GLuint format:3; + GLuint pad:2; + GLuint software_tiled_rendering_mode:2; + GLuint depth_offset_disable:1; + GLuint tile_walk:1; + GLuint tiled_surface:1; + GLuint pad2:1; + GLuint surface_type:3; + } bits; + GLuint dword; + } dword1; + + GLuint dword2_base_addr; + + union { + struct { + GLuint pad:1; + GLuint mipmap_layout:1; + GLuint lod:4; + GLuint width:13; + GLuint height:13; + } bits; + GLuint dword; + } dword3; + + union { + struct { + GLuint pad:10; + GLuint min_array_element:11; + GLuint depth:11; + } bits; + GLuint dword; + } dword4; +}; + +struct brw_depthbuffer_g4x +{ + union header_union header; + + union { + struct { + GLuint pitch:18; + GLuint format:3; + GLuint pad:2; + GLuint software_tiled_rendering_mode:2; + GLuint depth_offset_disable:1; + GLuint tile_walk:1; + GLuint tiled_surface:1; + GLuint pad2:1; + GLuint surface_type:3; + } bits; + GLuint dword; + } dword1; + + GLuint dword2_base_addr; + + union { + struct { + GLuint pad:1; + GLuint mipmap_layout:1; + GLuint lod:4; + GLuint width:13; + GLuint height:13; + } bits; + GLuint dword; + } dword3; + + union { + struct { + GLuint pad:10; + GLuint min_array_element:11; + GLuint depth:11; + } bits; + GLuint dword; + } dword4; + + union { + struct { + GLuint xoffset:16; + GLuint yoffset:16; + } bits; + GLuint dword; + } dword5; /* NEW in Integrated Graphics Device */ +}; + +struct brw_drawrect +{ + struct header header; + GLuint xmin:16; + GLuint ymin:16; + GLuint xmax:16; + GLuint ymax:16; + GLuint xorg:16; + GLuint yorg:16; +}; + + + + +struct brw_global_depth_offset_clamp +{ + struct header header; + GLfloat depth_offset_clamp; +}; + +struct brw_indexbuffer +{ + union { + struct + { + GLuint length:8; + GLuint index_format:2; + GLuint cut_index_enable:1; + GLuint pad:5; + GLuint opcode:16; + } bits; + GLuint dword; + + } header; + + GLuint buffer_start; + GLuint buffer_end; +}; + +/* NEW in Integrated Graphics Device */ +struct brw_aa_line_parameters +{ + struct header header; + + struct { + GLuint aa_coverage_scope:8; + GLuint pad0:8; + GLuint aa_coverage_bias:8; + GLuint pad1:8; + } bits0; + + struct { + GLuint aa_coverage_endcap_slope:8; + GLuint pad0:8; + GLuint aa_coverage_endcap_bias:8; + GLuint pad1:8; + } bits1; +}; + +struct brw_line_stipple +{ + struct header header; + + struct + { + GLuint pattern:16; + GLuint pad:16; + } bits0; + + struct + { + GLuint repeat_count:9; + GLuint pad:7; + GLuint inverse_repeat_count:16; + } bits1; +}; + + +struct brw_pipelined_state_pointers +{ + struct header header; + + struct { + GLuint pad:5; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ + } vs; + + struct + { + GLuint enable:1; + GLuint pad:4; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ + } gs; + + struct + { + GLuint enable:1; + GLuint pad:4; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ + } clp; + + struct + { + GLuint pad:5; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ + } sf; + + struct + { + GLuint pad:5; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ + } wm; + + struct + { + GLuint pad:5; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */ + } cc; +}; + + +struct brw_polygon_stipple_offset +{ + struct header header; + + struct { + GLuint y_offset:5; + GLuint pad:3; + GLuint x_offset:5; + GLuint pad0:19; + } bits0; +}; + + + +struct brw_polygon_stipple +{ + struct header header; + GLuint stipple[32]; +}; + + + +struct brw_pipeline_select +{ + struct + { + GLuint pipeline_select:1; + GLuint pad:15; + GLuint opcode:16; + } header; +}; + + +struct brw_pipe_control +{ + struct + { + GLuint length:8; + GLuint notify_enable:1; + GLuint texture_cache_flush_enable:1; + GLuint indirect_state_pointers_disable:1; + GLuint instruction_state_cache_flush_enable:1; + GLuint write_cache_flush_enable:1; + GLuint depth_stall_enable:1; + GLuint post_sync_operation:2; + + GLuint opcode:16; + } header; + + struct + { + GLuint pad:2; + GLuint dest_addr_type:1; + GLuint dest_addr:29; + } bits1; + + GLuint data0; + GLuint data1; +}; + + +struct brw_urb_fence +{ + struct + { + GLuint length:8; + GLuint vs_realloc:1; + GLuint gs_realloc:1; + GLuint clp_realloc:1; + GLuint sf_realloc:1; + GLuint vfe_realloc:1; + GLuint cs_realloc:1; + GLuint pad:2; + GLuint opcode:16; + } header; + + struct + { + GLuint vs_fence:10; + GLuint gs_fence:10; + GLuint clp_fence:10; + GLuint pad:2; + } bits0; + + struct + { + GLuint sf_fence:10; + GLuint vf_fence:10; + GLuint cs_fence:11; + GLuint pad:1; + } bits1; +}; + +struct brw_cs_urb_state +{ + struct header header; + + struct + { + GLuint nr_urb_entries:3; + GLuint pad:1; + GLuint urb_entry_size:5; + GLuint pad0:23; + } bits0; +}; + +struct brw_constant_buffer +{ + struct + { + GLuint length:8; + GLuint valid:1; + GLuint pad:7; + GLuint opcode:16; + } header; + + struct + { + GLuint buffer_length:6; + GLuint buffer_address:26; + } bits0; +}; + +struct brw_state_base_address +{ + struct header header; + + struct + { + GLuint modify_enable:1; + GLuint pad:4; + GLuint general_state_address:27; + } bits0; + + struct + { + GLuint modify_enable:1; + GLuint pad:4; + GLuint surface_state_address:27; + } bits1; + + struct + { + GLuint modify_enable:1; + GLuint pad:4; + GLuint indirect_object_state_address:27; + } bits2; + + struct + { + GLuint modify_enable:1; + GLuint pad:11; + GLuint general_state_upper_bound:20; + } bits3; + + struct + { + GLuint modify_enable:1; + GLuint pad:11; + GLuint indirect_object_state_upper_bound:20; + } bits4; +}; + +struct brw_state_prefetch +{ + struct header header; + + struct + { + GLuint prefetch_count:3; + GLuint pad:3; + GLuint prefetch_pointer:26; + } bits0; +}; + +struct brw_system_instruction_pointer +{ + struct header header; + + struct + { + GLuint pad:4; + GLuint system_instruction_pointer:28; + } bits0; +}; + + + + +/* State structs for the various fixed function units: + */ + + +struct thread0 +{ + GLuint pad0:1; + GLuint grf_reg_count:3; + GLuint pad1:2; + GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */ +}; + +struct thread1 +{ + GLuint ext_halt_exception_enable:1; + GLuint sw_exception_enable:1; + GLuint mask_stack_exception_enable:1; + GLuint timeout_exception_enable:1; + GLuint illegal_op_exception_enable:1; + GLuint pad0:3; + GLuint depth_coef_urb_read_offset:6; /* WM only */ + GLuint pad1:2; + GLuint floating_point_mode:1; + GLuint thread_priority:1; + GLuint binding_table_entry_count:8; + GLuint pad3:5; + GLuint single_program_flow:1; +}; + +struct thread2 +{ + GLuint per_thread_scratch_space:4; + GLuint pad0:6; + GLuint scratch_space_base_pointer:22; +}; + + +struct thread3 +{ + GLuint dispatch_grf_start_reg:4; + GLuint urb_entry_read_offset:6; + GLuint pad0:1; + GLuint urb_entry_read_length:6; + GLuint pad1:1; + GLuint const_urb_entry_read_offset:6; + GLuint pad2:1; + GLuint const_urb_entry_read_length:6; + GLuint pad3:1; +}; + + + +struct brw_clip_unit_state +{ + struct thread0 thread0; + struct + { + GLuint pad0:7; + GLuint sw_exception_enable:1; + GLuint pad1:3; + GLuint mask_stack_exception_enable:1; + GLuint pad2:1; + GLuint illegal_op_exception_enable:1; + GLuint pad3:2; + GLuint floating_point_mode:1; + GLuint thread_priority:1; + GLuint binding_table_entry_count:8; + GLuint pad4:5; + GLuint single_program_flow:1; + } thread1; + + struct thread2 thread2; + struct thread3 thread3; + + struct + { + GLuint pad0:9; + GLuint gs_output_stats:1; /* not always */ + GLuint stats_enable:1; + GLuint nr_urb_entries:7; + GLuint pad1:1; + GLuint urb_entry_allocation_size:5; + GLuint pad2:1; + GLuint max_threads:5; /* may be less */ + GLuint pad3:2; + } thread4; + + struct + { + GLuint pad0:13; + GLuint clip_mode:3; + GLuint userclip_enable_flags:8; + GLuint userclip_must_clip:1; + GLuint negative_w_clip_test:1; + GLuint guard_band_enable:1; + GLuint viewport_z_clip_enable:1; + GLuint viewport_xy_clip_enable:1; + GLuint vertex_position_space:1; + GLuint api_mode:1; + GLuint pad2:1; + } clip5; + + struct + { + GLuint pad0:5; + GLuint clipper_viewport_state_ptr:27; + } clip6; + + + GLfloat viewport_xmin; + GLfloat viewport_xmax; + GLfloat viewport_ymin; + GLfloat viewport_ymax; +}; + + + +struct brw_cc_unit_state +{ + struct + { + GLuint pad0:3; + GLuint bf_stencil_pass_depth_pass_op:3; + GLuint bf_stencil_pass_depth_fail_op:3; + GLuint bf_stencil_fail_op:3; + GLuint bf_stencil_func:3; + GLuint bf_stencil_enable:1; + GLuint pad1:2; + GLuint stencil_write_enable:1; + GLuint stencil_pass_depth_pass_op:3; + GLuint stencil_pass_depth_fail_op:3; + GLuint stencil_fail_op:3; + GLuint stencil_func:3; + GLuint stencil_enable:1; + } cc0; + + + struct + { + GLuint bf_stencil_ref:8; + GLuint stencil_write_mask:8; + GLuint stencil_test_mask:8; + GLuint stencil_ref:8; + } cc1; + + + struct + { + GLuint logicop_enable:1; + GLuint pad0:10; + GLuint depth_write_enable:1; + GLuint depth_test_function:3; + GLuint depth_test:1; + GLuint bf_stencil_write_mask:8; + GLuint bf_stencil_test_mask:8; + } cc2; + + + struct + { + GLuint pad0:8; + GLuint alpha_test_func:3; + GLuint alpha_test:1; + GLuint blend_enable:1; + GLuint ia_blend_enable:1; + GLuint pad1:1; + GLuint alpha_test_format:1; + GLuint pad2:16; + } cc3; + + struct + { + GLuint pad0:5; + GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ + } cc4; + + struct + { + GLuint pad0:2; + GLuint ia_dest_blend_factor:5; + GLuint ia_src_blend_factor:5; + GLuint ia_blend_function:3; + GLuint statistics_enable:1; + GLuint logicop_func:4; + GLuint pad1:11; + GLuint dither_enable:1; + } cc5; + + struct + { + GLuint clamp_post_alpha_blend:1; + GLuint clamp_pre_alpha_blend:1; + GLuint clamp_range:2; + GLuint pad0:11; + GLuint y_dither_offset:2; + GLuint x_dither_offset:2; + GLuint dest_blend_factor:5; + GLuint src_blend_factor:5; + GLuint blend_function:3; + } cc6; + + struct { + union { + GLfloat f; + GLubyte ub[4]; + } alpha_ref; + } cc7; +}; + + + +struct brw_sf_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + GLuint pad0:10; + GLuint stats_enable:1; + GLuint nr_urb_entries:7; + GLuint pad1:1; + GLuint urb_entry_allocation_size:5; + GLuint pad2:1; + GLuint max_threads:6; + GLuint pad3:1; + } thread4; + + struct + { + GLuint front_winding:1; + GLuint viewport_transform:1; + GLuint pad0:3; + GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ + } sf5; + + struct + { + GLuint pad0:9; + GLuint dest_org_vbias:4; + GLuint dest_org_hbias:4; + GLuint scissor:1; + GLuint disable_2x2_trifilter:1; + GLuint disable_zero_pix_trifilter:1; + GLuint point_rast_rule:2; + GLuint line_endcap_aa_region_width:2; + GLuint line_width:4; + GLuint fast_scissor_disable:1; + GLuint cull_mode:2; + GLuint aa_enable:1; + } sf6; + + struct + { + GLuint point_size:11; + GLuint use_point_size_state:1; + GLuint subpixel_precision:1; + GLuint sprite_point:1; + GLuint pad0:10; + GLuint aa_line_distance_mode:1; + GLuint trifan_pv:2; + GLuint linestrip_pv:2; + GLuint tristrip_pv:2; + GLuint line_last_pixel_enable:1; + } sf7; + +}; + + +struct brw_gs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + GLuint pad0:8; + GLuint rendering_enable:1; /* for IGDNG */ + GLuint pad4:1; + GLuint stats_enable:1; + GLuint nr_urb_entries:7; + GLuint pad1:1; + GLuint urb_entry_allocation_size:5; + GLuint pad2:1; + GLuint max_threads:5; + GLuint pad3:2; + } thread4; + + struct + { + GLuint sampler_count:3; + GLuint pad0:2; + GLuint sampler_state_pointer:27; + } gs5; + + + struct + { + GLuint max_vp_index:4; + GLuint pad0:12; + GLuint svbi_post_inc_value:10; + GLuint pad1:1; + GLuint svbi_post_inc_enable:1; + GLuint svbi_payload:1; + GLuint discard_adjaceny:1; + GLuint reorder_enable:1; + GLuint pad2:1; + } gs6; +}; + + +struct brw_vs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + GLuint pad0:10; + GLuint stats_enable:1; + GLuint nr_urb_entries:7; + GLuint pad1:1; + GLuint urb_entry_allocation_size:5; + GLuint pad2:1; + GLuint max_threads:6; + GLuint pad3:1; + } thread4; + + struct + { + GLuint sampler_count:3; + GLuint pad0:2; + GLuint sampler_state_pointer:27; + } vs5; + + struct + { + GLuint vs_enable:1; + GLuint vert_cache_disable:1; + GLuint pad0:30; + } vs6; +}; + + +struct brw_wm_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + GLuint stats_enable:1; + GLuint depth_buffer_clear:1; + GLuint sampler_count:3; + GLuint sampler_state_pointer:27; + } wm4; + + struct + { + GLuint enable_8_pix:1; + GLuint enable_16_pix:1; + GLuint enable_32_pix:1; + GLuint enable_con_32_pix:1; + GLuint enable_con_64_pix:1; + GLuint pad0:5; + GLuint legacy_global_depth_bias:1; + GLuint line_stipple:1; + GLuint depth_offset:1; + GLuint polygon_stipple:1; + GLuint line_aa_region_width:2; + GLuint line_endcap_aa_region_width:2; + GLuint early_depth_test:1; + GLuint thread_dispatch_enable:1; + GLuint program_uses_depth:1; + GLuint program_computes_depth:1; + GLuint program_uses_killpixel:1; + GLuint legacy_line_rast: 1; + GLuint transposed_urb_read_enable:1; + GLuint max_threads:7; + } wm5; + + GLfloat global_depth_offset_constant; + GLfloat global_depth_offset_scale; + + /* for IGDNG only */ + struct { + GLuint pad0:1; + GLuint grf_reg_count_1:3; + GLuint pad1:2; + GLuint kernel_start_pointer_1:26; + } wm8; + + struct { + GLuint pad0:1; + GLuint grf_reg_count_2:3; + GLuint pad1:2; + GLuint kernel_start_pointer_2:26; + } wm9; + + struct { + GLuint pad0:1; + GLuint grf_reg_count_3:3; + GLuint pad1:2; + GLuint kernel_start_pointer_3:26; + } wm10; +}; + +struct brw_sampler_default_color { + GLfloat color[4]; +}; + +struct brw_sampler_state +{ + + struct + { + GLuint shadow_function:3; + GLuint lod_bias:11; + GLuint min_filter:3; + GLuint mag_filter:3; + GLuint mip_filter:2; + GLuint base_level:5; + GLuint pad:1; + GLuint lod_preclamp:1; + GLuint default_color_mode:1; + GLuint pad0:1; + GLuint disable:1; + } ss0; + + struct + { + GLuint r_wrap_mode:3; + GLuint t_wrap_mode:3; + GLuint s_wrap_mode:3; + GLuint pad:3; + GLuint max_lod:10; + GLuint min_lod:10; + } ss1; + + + struct + { + GLuint pad:5; + GLuint default_color_pointer:27; + } ss2; + + struct + { + GLuint pad:19; + GLuint max_aniso:3; + GLuint chroma_key_mode:1; + GLuint chroma_key_index:2; + GLuint chroma_key_enable:1; + GLuint monochrome_filter_width:3; + GLuint monochrome_filter_height:3; + } ss3; +}; + + +struct brw_clipper_viewport +{ + GLfloat xmin; + GLfloat xmax; + GLfloat ymin; + GLfloat ymax; +}; + +struct brw_cc_viewport +{ + GLfloat min_depth; + GLfloat max_depth; +}; + +struct brw_sf_viewport +{ + struct { + GLfloat m00; + GLfloat m11; + GLfloat m22; + GLfloat m30; + GLfloat m31; + GLfloat m32; + } viewport; + + /* scissor coordinates are inclusive */ + struct { + GLshort xmin; + GLshort ymin; + GLshort xmax; + GLshort ymax; + } scissor; +}; + +/* Documented in the subsystem/shared-functions/sampler chapter... + */ +struct brw_surface_state +{ + struct { + GLuint cube_pos_z:1; + GLuint cube_neg_z:1; + GLuint cube_pos_y:1; + GLuint cube_neg_y:1; + GLuint cube_pos_x:1; + GLuint cube_neg_x:1; + GLuint pad:4; + GLuint mipmap_layout_mode:1; + GLuint vert_line_stride_ofs:1; + GLuint vert_line_stride:1; + GLuint color_blend:1; + GLuint writedisable_blue:1; + GLuint writedisable_green:1; + GLuint writedisable_red:1; + GLuint writedisable_alpha:1; + GLuint surface_format:9; /**< BRW_SURFACEFORMAT_x */ + GLuint data_return_format:1; + GLuint pad0:1; + GLuint surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */ + } ss0; + + struct { + GLuint base_addr; + } ss1; + + struct { + GLuint pad:2; + GLuint mip_count:4; + GLuint width:13; + GLuint height:13; + } ss2; + + struct { + GLuint tile_walk:1; + GLuint tiled_surface:1; + GLuint pad:1; + GLuint pitch:18; + GLuint depth:11; + } ss3; + + struct { + GLuint multisample_position_palette_index:3; + GLuint pad1:1; + GLuint num_multisamples:3; + GLuint pad0:1; + GLuint render_target_view_extent:9; + GLuint min_array_elt:11; + GLuint min_lod:4; + } ss4; + + struct { + GLuint pad1:16; + GLuint llc_mapping:1; + GLuint mlc_mapping:1; + GLuint gfdt:1; + GLuint gfdt_src:1; + GLuint y_offset:4; + GLuint pad0:1; + GLuint x_offset:7; + } ss5; /* New in G4X */ + +}; + + + +struct brw_vertex_buffer_state +{ + struct { + GLuint pitch:11; + GLuint pad:15; + GLuint access_type:1; + GLuint vb_index:5; + } vb0; + + GLuint start_addr; + GLuint max_index; +#if 1 + GLuint instance_data_step_rate; /* not included for sequential/random vertices? */ +#endif +}; + +#define BRW_VBP_MAX 17 + +struct brw_vb_array_state { + struct header header; + struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; +}; + + +struct brw_vertex_element_state +{ + struct + { + GLuint src_offset:11; + GLuint pad:5; + GLuint src_format:9; + GLuint pad0:1; + GLuint valid:1; + GLuint vertex_buffer_index:5; + } ve0; + + struct + { + GLuint dst_offset:8; + GLuint pad:8; + GLuint vfcomponent3:4; + GLuint vfcomponent2:4; + GLuint vfcomponent1:4; + GLuint vfcomponent0:4; + } ve1; +}; + +#define BRW_VEP_MAX 18 + +struct brw_vertex_element_packet { + struct header header; + struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ +}; + + +struct brw_urb_immediate { + GLuint opcode:4; + GLuint offset:6; + GLuint swizzle_control:2; + GLuint pad:1; + GLuint allocate:1; + GLuint used:1; + GLuint complete:1; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; +}; + +/* Instruction format for the execution units: + */ + +struct brw_instruction +{ + struct + { + GLuint opcode:7; + GLuint pad:1; + GLuint access_mode:1; + GLuint mask_control:1; + GLuint dependency_control:2; + GLuint compression_control:2; + GLuint thread_control:2; + GLuint predicate_control:4; + GLuint predicate_inverse:1; + GLuint execution_size:3; + GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */ + GLuint pad0:2; + GLuint debug_control:1; + GLuint saturate:1; + } header; + + union { + struct + { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint src1_reg_file:2; + GLuint src1_reg_type:3; + GLuint pad:1; + GLuint dest_subreg_nr:5; + GLuint dest_reg_nr:8; + GLuint dest_horiz_stride:2; + GLuint dest_address_mode:1; + } da1; + + struct + { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint src1_reg_file:2; /* 0x00000c00 */ + GLuint src1_reg_type:3; /* 0x00007000 */ + GLuint pad:1; + GLint dest_indirect_offset:10; /* offset against the deref'd address reg */ + GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */ + GLuint dest_horiz_stride:2; + GLuint dest_address_mode:1; + } ia1; + + struct + { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint src1_reg_file:2; + GLuint src1_reg_type:3; + GLuint pad:1; + GLuint dest_writemask:4; + GLuint dest_subreg_nr:1; + GLuint dest_reg_nr:8; + GLuint pad1:2; + GLuint dest_address_mode:1; + } da16; + + struct + { + GLuint dest_reg_file:2; + GLuint dest_reg_type:3; + GLuint src0_reg_file:2; + GLuint src0_reg_type:3; + GLuint pad0:6; + GLuint dest_writemask:4; + GLint dest_indirect_offset:6; + GLuint dest_subreg_nr:3; + GLuint pad1:2; + GLuint dest_address_mode:1; + } ia16; + } bits1; + + + union { + struct + { + GLuint src0_subreg_nr:5; + GLuint src0_reg_nr:8; + GLuint src0_abs:1; + GLuint src0_negate:1; + GLuint src0_address_mode:1; + GLuint src0_horiz_stride:2; + GLuint src0_width:3; + GLuint src0_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad:6; + } da1; + + struct + { + GLint src0_indirect_offset:10; + GLuint src0_subreg_nr:3; + GLuint src0_abs:1; + GLuint src0_negate:1; + GLuint src0_address_mode:1; + GLuint src0_horiz_stride:2; + GLuint src0_width:3; + GLuint src0_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad:6; + } ia1; + + struct + { + GLuint src0_swz_x:2; + GLuint src0_swz_y:2; + GLuint src0_subreg_nr:1; + GLuint src0_reg_nr:8; + GLuint src0_abs:1; + GLuint src0_negate:1; + GLuint src0_address_mode:1; + GLuint src0_swz_z:2; + GLuint src0_swz_w:2; + GLuint pad0:1; + GLuint src0_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad1:6; + } da16; + + struct + { + GLuint src0_swz_x:2; + GLuint src0_swz_y:2; + GLint src0_indirect_offset:6; + GLuint src0_subreg_nr:3; + GLuint src0_abs:1; + GLuint src0_negate:1; + GLuint src0_address_mode:1; + GLuint src0_swz_z:2; + GLuint src0_swz_w:2; + GLuint pad0:1; + GLuint src0_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad1:6; + } ia16; + + struct + { + GLuint pad:26; + GLuint end_of_thread:1; + GLuint pad1:1; + GLuint sfid:4; + } send_igdng; /* for IGDNG only */ + + } bits2; + + union + { + struct + { + GLuint src1_subreg_nr:5; + GLuint src1_reg_nr:8; + GLuint src1_abs:1; + GLuint src1_negate:1; + GLuint src1_address_mode:1; + GLuint src1_horiz_stride:2; + GLuint src1_width:3; + GLuint src1_vert_stride:4; + GLuint pad0:7; + } da1; + + struct + { + GLuint src1_swz_x:2; + GLuint src1_swz_y:2; + GLuint src1_subreg_nr:1; + GLuint src1_reg_nr:8; + GLuint src1_abs:1; + GLuint src1_negate:1; + GLuint src1_address_mode:1; + GLuint src1_swz_z:2; + GLuint src1_swz_w:2; + GLuint pad1:1; + GLuint src1_vert_stride:4; + GLuint pad2:7; + } da16; + + struct + { + GLint src1_indirect_offset:10; + GLuint src1_subreg_nr:3; + GLuint src1_abs:1; + GLuint src1_negate:1; + GLuint src1_address_mode:1; + GLuint src1_horiz_stride:2; + GLuint src1_width:3; + GLuint src1_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad1:6; + } ia1; + + struct + { + GLuint src1_swz_x:2; + GLuint src1_swz_y:2; + GLint src1_indirect_offset:6; + GLuint src1_subreg_nr:3; + GLuint src1_abs:1; + GLuint src1_negate:1; + GLuint pad0:1; + GLuint src1_swz_z:2; + GLuint src1_swz_w:2; + GLuint pad1:1; + GLuint src1_vert_stride:4; + GLuint flag_reg_nr:1; + GLuint pad2:6; + } ia16; + + + struct + { + GLint jump_count:16; /* note: signed */ + GLuint pop_count:4; + GLuint pad0:12; + } if_else; + + struct { + GLuint function:4; + GLuint int_type:1; + GLuint precision:1; + GLuint saturate:1; + GLuint data_type:1; + GLuint pad0:8; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } math; + + struct { + GLuint function:4; + GLuint int_type:1; + GLuint precision:1; + GLuint saturate:1; + GLuint data_type:1; + GLuint snapshot:1; + GLuint pad0:10; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } math_igdng; + + struct { + GLuint binding_table_index:8; + GLuint sampler:4; + GLuint return_format:2; + GLuint msg_type:2; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } sampler; + + struct { + GLuint binding_table_index:8; + GLuint sampler:4; + GLuint msg_type:4; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } sampler_g4x; + + struct { + GLuint binding_table_index:8; + GLuint sampler:4; + GLuint msg_type:4; + GLuint simd_mode:2; + GLuint pad0:1; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } sampler_igdng; + + struct brw_urb_immediate urb; + + struct { + GLuint opcode:4; + GLuint offset:6; + GLuint swizzle_control:2; + GLuint pad:1; + GLuint allocate:1; + GLuint used:1; + GLuint complete:1; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } urb_igdng; + + struct { + GLuint binding_table_index:8; + GLuint msg_control:4; + GLuint msg_type:2; + GLuint target_cache:2; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } dp_read; + + struct { + GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint msg_type:3; + GLuint target_cache:2; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_read_igdng; + + struct { + GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint pixel_scoreboard_clear:1; + GLuint msg_type:3; + GLuint send_commit_msg:1; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } dp_write; + + struct { + GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint pixel_scoreboard_clear:1; + GLuint msg_type:3; + GLuint send_commit_msg:1; + GLuint pad0:3; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } dp_write_igdng; + + struct { + GLuint pad:16; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } generic; + + struct { + GLuint pad:19; + GLuint header_present:1; + GLuint response_length:5; + GLuint msg_length:4; + GLuint pad1:2; + GLuint end_of_thread:1; + } generic_igdng; + + GLint d; + GLuint ud; + float f; + } bits3; +}; + + +#endif diff --git a/src/gallium/drivers/i965/brw_tex.c b/src/gallium/drivers/i965/brw_tex.c new file mode 100644 index 0000000000..e911b105b2 --- /dev/null +++ b/src/gallium/drivers/i965/brw_tex.c @@ -0,0 +1,59 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/glheader.h" +#include "main/mtypes.h" +#include "main/teximage.h" + +#include "intel_context.h" +#include "intel_regions.h" +#include "intel_tex.h" +#include "brw_context.h" + +/** + * Finalizes all textures, completing any rendering that needs to be done + * to prepare them. + */ +void brw_validate_textures( struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct intel_context *intel = &brw->intel; + int i; + + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; + + if (texUnit->_ReallyEnabled) { + intel_finalize_mipmap_tree(intel, i); + } + } +} diff --git a/src/gallium/drivers/i965/brw_tex_layout.c b/src/gallium/drivers/i965/brw_tex_layout.c new file mode 100644 index 0000000000..5986cbffad --- /dev/null +++ b/src/gallium/drivers/i965/brw_tex_layout.c @@ -0,0 +1,222 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +/* Code to layout images in a mipmap tree for i965. + */ + +#include "intel_mipmap_tree.h" +#include "intel_tex_layout.h" +#include "intel_context.h" +#include "main/macros.h" +#include "intel_chipset.h" + +#define FILE_DEBUG_FLAG DEBUG_MIPTREE + +GLboolean brw_miptree_layout(struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling) +{ + /* XXX: these vary depending on image format: */ + /* GLint align_w = 4; */ + + switch (mt->target) { + case GL_TEXTURE_CUBE_MAP: + if (IS_IGDNG(intel->intelScreen->deviceID)) { + GLuint align_h = 2, align_w = 4; + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = mt->width0; + GLuint height = mt->height0; + GLuint qpitch = 0; + GLuint y_pitch = 0; + + mt->pitch = mt->width0; + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + y_pitch = ALIGN(height, align_h); + + if (mt->compressed) { + mt->pitch = ALIGN(mt->width0, align_w); + } + + if (mt->first_level != mt->last_level) { + GLuint mip1_width; + + if (mt->compressed) { + mip1_width = ALIGN(minify(mt->width0), align_w) + + ALIGN(minify(minify(mt->width0)), align_w); + } else { + mip1_width = ALIGN(minify(mt->width0), align_w) + + minify(minify(mt->width0)); + } + + if (mip1_width > mt->pitch) { + mt->pitch = mip1_width; + } + } + + mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); + + if (mt->compressed) { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; + } else { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; + } + + for (level = mt->first_level; level <= mt->last_level; level++) { + GLuint img_height; + GLuint nr_images = 6; + GLuint q = 0; + + intel_miptree_set_level_info(mt, level, nr_images, x, y, width, + height, 1); + + for (q = 0; q < nr_images; q++) + intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch); + + if (mt->compressed) + img_height = MAX2(1, height/4); + else + img_height = ALIGN(height, align_h); + + if (level == mt->first_level + 1) { + x += ALIGN(width, align_w); + } + else { + y += img_height; + } + + width = minify(width); + height = minify(height); + } + + break; + } + + case GL_TEXTURE_3D: { + GLuint width = mt->width0; + GLuint height = mt->height0; + GLuint depth = mt->depth0; + GLuint pack_x_pitch, pack_x_nr; + GLuint pack_y_pitch; + GLuint level; + GLuint align_h = 2; + GLuint align_w = 4; + + mt->total_height = 0; + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + + if (mt->compressed) { + mt->pitch = ALIGN(width, align_w); + pack_y_pitch = (height + 3) / 4; + } else { + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); + pack_y_pitch = ALIGN(mt->height0, align_h); + } + + pack_x_pitch = width; + pack_x_nr = 1; + + for (level = mt->first_level ; level <= mt->last_level ; level++) { + GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; + GLint x = 0; + GLint y = 0; + GLint q, j; + + intel_miptree_set_level_info(mt, level, nr_images, + 0, mt->total_height, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + intel_miptree_set_image_offset(mt, level, q, x, y); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + mt->total_height += y; + width = minify(width); + height = minify(height); + depth = minify(depth); + + if (mt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > ALIGN(width, align_w)) { + pack_x_pitch = ALIGN(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = ALIGN(pack_y_pitch, align_h); + } + } + + } + /* The 965's sampler lays cachelines out according to how accesses + * in the texture surfaces run, so they may be "vertical" through + * memory. As a result, the docs say in Surface Padding Requirements: + * Sampling Engine Surfaces that two extra rows of padding are required. + * We don't know of similar requirements for pre-965, but given that + * those docs are silent on padding requirements in general, let's play + * it safe. + */ + if (mt->target == GL_TEXTURE_CUBE_MAP) + mt->total_height += 2; + break; + } + + default: + i945_miptree_layout_2d(intel, mt, tiling); + break; + } + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + mt->pitch, + mt->total_height, + mt->cpp, + mt->pitch * mt->total_height * mt->cpp ); + + return GL_TRUE; +} + diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c new file mode 100644 index 0000000000..8c6f4355a6 --- /dev/null +++ b/src/gallium/drivers/i965/brw_urb.c @@ -0,0 +1,250 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + + +#include "intel_batchbuffer.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +#define VS 0 +#define GS 1 +#define CLP 2 +#define SF 3 +#define CS 4 + +/** @file brw_urb.c + * + * Manages the division of the URB space between the various fixed-function + * units. + * + * See the Thread Initiation Management section of the GEN4 B-Spec, and + * the individual *_STATE structures for restrictions on numbers of + * entries and threads. + */ + +/* + * Generally, a unit requires a min_nr_entries based on how many entries + * it produces before the downstream unit gets unblocked and can use and + * dereference some of its handles. + * + * The SF unit preallocates a PUE at the start of thread dispatch, and only + * uses that one. So it requires one entry per thread. + * + * For CLIP, the SF unit will hold the previous primitive while the + * next is getting assembled, meaning that linestrips require 3 CLIP VUEs + * (vertices) to ensure continued processing, trifans require 4, and tristrips + * require 5. There can be 1 or 2 threads, and each has the same requirement. + * + * GS has the same requirement as CLIP, but it never handles tristrips, + * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces. + * We only run it single-threaded. + * + * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X). + * Each thread processes 2 preallocated VUEs (vertices) at a time, and they + * get streamed down as soon as threads processing earlier vertices get + * theirs accepted. + * + * Each unit will take the number of URB entries we give it (based on the + * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs, + * and brw_curbe.c for the CURBEs) and decide its maximum number of + * threads it can support based on that. in brw_*_state.c. + * + * XXX: Are the min_entry_size numbers useful? + * XXX: Verify min_nr_entries, esp for VS. + * XXX: Verify SF min_entry_size. + */ +static const struct { + GLuint min_nr_entries; + GLuint preferred_nr_entries; + GLuint min_entry_size; + GLuint max_entry_size; +} limits[CS+1] = { + { 16, 32, 1, 5 }, /* vs */ + { 4, 8, 1, 5 }, /* gs */ + { 5, 10, 1, 5 }, /* clp */ + { 1, 8, 1, 12 }, /* sf */ + { 1, 4, 1, 32 } /* cs */ +}; + + +static GLboolean check_urb_layout( struct brw_context *brw ) +{ + brw->urb.vs_start = 0; + brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize; + brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize; + brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; + brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; + + return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw); +} + +/* Most minimal update, forces re-emit of URB fence packet after GS + * unit turned on/off. + */ +static void recalculate_urb_fence( struct brw_context *brw ) +{ + GLuint csize = brw->curbe.total_size; + GLuint vsize = brw->vs.prog_data->urb_entry_size; + GLuint sfsize = brw->sf.prog_data->urb_entry_size; + + if (csize < limits[CS].min_entry_size) + csize = limits[CS].min_entry_size; + + if (vsize < limits[VS].min_entry_size) + vsize = limits[VS].min_entry_size; + + if (sfsize < limits[SF].min_entry_size) + sfsize = limits[SF].min_entry_size; + + if (brw->urb.vsize < vsize || + brw->urb.sfsize < sfsize || + brw->urb.csize < csize || + (brw->urb.constrained && (brw->urb.vsize > vsize || + brw->urb.sfsize > sfsize || + brw->urb.csize > csize))) { + + + brw->urb.csize = csize; + brw->urb.sfsize = sfsize; + brw->urb.vsize = vsize; + + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; + brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; + brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; + + brw->urb.constrained = 0; + + if (BRW_IS_IGDNG(brw)) { + brw->urb.nr_vs_entries = 128; + brw->urb.nr_sf_entries = 48; + if (check_urb_layout(brw)) { + goto done; + } else { + brw->urb.constrained = 1; + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; + } + } else if (BRW_IS_G4X(brw)) { + brw->urb.nr_vs_entries = 64; + if (check_urb_layout(brw)) { + goto done; + } else { + brw->urb.constrained = 1; + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + } + } + + if (!check_urb_layout(brw)) { + brw->urb.nr_vs_entries = limits[VS].min_nr_entries; + brw->urb.nr_gs_entries = limits[GS].min_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].min_nr_entries; + brw->urb.nr_sf_entries = limits[SF].min_nr_entries; + brw->urb.nr_cs_entries = limits[CS].min_nr_entries; + + /* Mark us as operating with constrained nr_entries, so that next + * time we recalculate we'll resize the fences in the hope of + * escaping constrained mode and getting back to normal performance. + */ + brw->urb.constrained = 1; + + if (!check_urb_layout(brw)) { + /* This is impossible, given the maximal sizes of urb + * entries and the values for minimum nr of entries + * provided above. + */ + _mesa_printf("couldn't calculate URB layout!\n"); + exit(1); + } + + if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) + _mesa_printf("URB CONSTRAINED\n"); + } + +done: + if (INTEL_DEBUG & DEBUG_URB) + _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", + brw->urb.vs_start, + brw->urb.gs_start, + brw->urb.clip_start, + brw->urb.sf_start, + brw->urb.cs_start, + URB_SIZES(brw)); + + brw->state.dirty.brw |= BRW_NEW_URB_FENCE; + } +} + + +const struct brw_tracked_state brw_recalculate_urb_fence = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CURBE_OFFSETS, + .cache = (CACHE_NEW_VS_PROG | + CACHE_NEW_SF_PROG) + }, + .prepare = recalculate_urb_fence +}; + + + + + +void brw_upload_urb_fence(struct brw_context *brw) +{ + struct brw_urb_fence uf; + memset(&uf, 0, sizeof(uf)); + + uf.header.opcode = CMD_URB_FENCE; + uf.header.length = sizeof(uf)/4-2; + uf.header.vs_realloc = 1; + uf.header.gs_realloc = 1; + uf.header.clp_realloc = 1; + uf.header.sf_realloc = 1; + uf.header.vfe_realloc = 1; + uf.header.cs_realloc = 1; + + /* The ordering below is correct, not the layout in the + * instruction. + * + * There are 256/384 urb reg pairs in total. + */ + uf.bits0.vs_fence = brw->urb.gs_start; + uf.bits0.gs_fence = brw->urb.clip_start; + uf.bits0.clp_fence = brw->urb.sf_start; + uf.bits1.sf_fence = brw->urb.cs_start; + uf.bits1.cs_fence = URB_SIZES(brw); + + BRW_BATCH_STRUCT(brw, &uf); +} diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c new file mode 100644 index 0000000000..ce21aa4869 --- /dev/null +++ b/src/gallium/drivers/i965/brw_util.c @@ -0,0 +1,104 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/mtypes.h" +#include "shader/prog_parameter.h" +#include "brw_util.h" +#include "brw_defines.h" + +GLuint brw_count_bits( GLuint val ) +{ + GLuint i; + for (i = 0; val ; val >>= 1) + if (val & 1) + i++; + return i; +} + + +GLuint brw_translate_blend_equation( GLenum mode ) +{ + switch (mode) { + case GL_FUNC_ADD: + return BRW_BLENDFUNCTION_ADD; + case GL_MIN: + return BRW_BLENDFUNCTION_MIN; + case GL_MAX: + return BRW_BLENDFUNCTION_MAX; + case GL_FUNC_SUBTRACT: + return BRW_BLENDFUNCTION_SUBTRACT; + case GL_FUNC_REVERSE_SUBTRACT: + return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; + default: + assert(0); + return BRW_BLENDFUNCTION_ADD; + } +} + +GLuint brw_translate_blend_factor( GLenum factor ) +{ + switch(factor) { + case GL_ZERO: + return BRW_BLENDFACTOR_ZERO; + case GL_SRC_ALPHA: + return BRW_BLENDFACTOR_SRC_ALPHA; + case GL_ONE: + return BRW_BLENDFACTOR_ONE; + case GL_SRC_COLOR: + return BRW_BLENDFACTOR_SRC_COLOR; + case GL_ONE_MINUS_SRC_COLOR: + return BRW_BLENDFACTOR_INV_SRC_COLOR; + case GL_DST_COLOR: + return BRW_BLENDFACTOR_DST_COLOR; + case GL_ONE_MINUS_DST_COLOR: + return BRW_BLENDFACTOR_INV_DST_COLOR; + case GL_ONE_MINUS_SRC_ALPHA: + return BRW_BLENDFACTOR_INV_SRC_ALPHA; + case GL_DST_ALPHA: + return BRW_BLENDFACTOR_DST_ALPHA; + case GL_ONE_MINUS_DST_ALPHA: + return BRW_BLENDFACTOR_INV_DST_ALPHA; + case GL_SRC_ALPHA_SATURATE: + return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; + case GL_CONSTANT_COLOR: + return BRW_BLENDFACTOR_CONST_COLOR; + case GL_ONE_MINUS_CONSTANT_COLOR: + return BRW_BLENDFACTOR_INV_CONST_COLOR; + case GL_CONSTANT_ALPHA: + return BRW_BLENDFACTOR_CONST_ALPHA; + case GL_ONE_MINUS_CONSTANT_ALPHA: + return BRW_BLENDFACTOR_INV_CONST_ALPHA; + default: + assert(0); + return BRW_BLENDFACTOR_ZERO; + } +} diff --git a/src/gallium/drivers/i965/brw_util.h b/src/gallium/drivers/i965/brw_util.h new file mode 100644 index 0000000000..33e7cd87e4 --- /dev/null +++ b/src/gallium/drivers/i965/brw_util.h @@ -0,0 +1,45 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_UTIL_H +#define BRW_UTIL_H + +#include "main/mtypes.h" + +extern GLuint brw_count_bits( GLuint val ); +extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList); +extern GLuint brw_translate_blend_factor( GLenum factor ); +extern GLuint brw_translate_blend_equation( GLenum mode ); + + + +#endif diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c new file mode 100644 index 0000000000..f0c79efbd9 --- /dev/null +++ b/src/gallium/drivers/i965/brw_vs.c @@ -0,0 +1,124 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_vs.h" +#include "brw_util.h" +#include "brw_state.h" +#include "shader/prog_print.h" + + + +static void do_vs_prog( struct brw_context *brw, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key ) +{ + GLuint program_size; + const GLuint *program; + struct brw_vs_compile c; + + memset(&c, 0, sizeof(c)); + memcpy(&c.key, key, sizeof(*key)); + + brw_init_compile(brw, &c.func); + c.vp = vp; + + c.prog_data.outputs_written = vp->program.Base.OutputsWritten; + c.prog_data.inputs_read = vp->program.Base.InputsRead; + + if (c.key.copy_edgeflag) { + c.prog_data.outputs_written |= 1<program.Base); + + + + /* Emit GEN4 code. + */ + brw_vs_emit(&c); + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + dri_bo_unreference(brw->vs.prog_bo); + brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->vs.prog_data ); +} + + +static void brw_upload_vs_prog(struct brw_context *brw) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_vs_prog_key key; + struct brw_vertex_program *vp = + (struct brw_vertex_program *)brw->vertex_program; + + memset(&key, 0, sizeof(key)); + + /* Just upload the program verbatim for now. Always send it all + * the inputs it asks for, whether they are varying or not. + */ + key.program_string_id = vp->id; + key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); + key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL); + + /* Make an early check for the key. + */ + dri_bo_unreference(brw->vs.prog_bo); + brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG, + &key, sizeof(key), + NULL, 0, + &brw->vs.prog_data); + if (brw->vs.prog_bo == NULL) + do_vs_prog(brw, vp, &key); +} + + +/* See brw_vs.c: + */ +const struct brw_tracked_state brw_vs_prog = { + .dirty = { + .mesa = _NEW_TRANSFORM | _NEW_POLYGON, + .brw = BRW_NEW_VERTEX_PROGRAM, + .cache = 0 + }, + .prepare = brw_upload_vs_prog +}; diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h new file mode 100644 index 0000000000..4a591365c9 --- /dev/null +++ b/src/gallium/drivers/i965/brw_vs.h @@ -0,0 +1,88 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_VS_H +#define BRW_VS_H + + +#include "brw_context.h" +#include "brw_eu.h" +#include "shader/program.h" + + +struct brw_vs_prog_key { + GLuint program_string_id; + GLuint nr_userclip:4; + GLuint copy_edgeflag:1; + GLuint pad:26; +}; + + +struct brw_vs_compile { + struct brw_compile func; + struct brw_vs_prog_key key; + struct brw_vs_prog_data prog_data; + + struct brw_vertex_program *vp; + + GLuint nr_inputs; + + GLuint first_output; + GLuint nr_outputs; + GLuint first_overflow_output; /**< VERT_ATTRIB_x */ + + GLuint first_tmp; + GLuint last_tmp; + + struct brw_reg r0; + struct brw_reg r1; + struct brw_reg regs[PROGRAM_ADDRESS+1][128]; + struct brw_reg tmp; + struct brw_reg stack; + + struct { + GLboolean used_in_src; + struct brw_reg reg; + } output_regs[128]; + + struct brw_reg userplane[6]; + + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; +}; + +void brw_vs_emit( struct brw_vs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c new file mode 100644 index 0000000000..1638ef8111 --- /dev/null +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -0,0 +1,1667 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/macros.h" +#include "shader/program.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "brw_context.h" +#include "brw_vs.h" + + +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +/** + * Preallocate GRF register before code emit. + * Do things as simply as possible. Allocate and populate all regs + * ahead of time. + */ +static void brw_vs_alloc_regs( struct brw_vs_compile *c ) +{ + GLuint i, reg = 0, mrf; + int attributes_in_vue; + + /* Determine whether to use a real constant buffer or use a block + * of GRF registers for constants. The later is faster but only + * works if everything fits in the GRF. + * XXX this heuristic/check may need some fine tuning... + */ + if (c->vp->program.Base.Parameters->NumParameters + + c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF) + c->vp->use_const_buffer = GL_TRUE; + else + c->vp->use_const_buffer = GL_FALSE; + + /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/ + + /* r0 -- reserved as usual + */ + c->r0 = brw_vec8_grf(reg, 0); + reg++; + + /* User clip planes from curbe: + */ + if (c->key.nr_userclip) { + for (i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); + } + + /* Deal with curbe alignment: + */ + reg += ((6 + c->key.nr_userclip + 3) / 4) * 2; + } + + /* Vertex program parameters from curbe: + */ + if (c->vp->use_const_buffer) { + /* get constants from a real constant buffer */ + c->prog_data.curb_read_length = 0; + c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ + } + else { + /* use a section of the GRF for constants */ + GLuint nr_params = c->vp->program.Base.Parameters->NumParameters; + for (i = 0; i < nr_params; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + } + reg += (nr_params + 1) / 2; + c->prog_data.curb_read_length = reg - 1; + + c->prog_data.nr_params = nr_params * 4; + } + + /* Allocate input regs: + */ + c->nr_inputs = 0; + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + if (c->prog_data.inputs_read & (1 << i)) { + c->nr_inputs++; + c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + } + /* If there are no inputs, we'll still be reading one attribute's worth + * because it's required -- see urb_read_length setting. + */ + if (c->nr_inputs == 0) + reg++; + + /* Allocate outputs. The non-position outputs go straight into message regs. + */ + c->nr_outputs = 0; + c->first_output = reg; + c->first_overflow_output = 0; + + if (BRW_IS_IGDNG(c->func.brw)) + mrf = 8; + else + mrf = 4; + + for (i = 0; i < VERT_RESULT_MAX; i++) { + if (c->prog_data.outputs_written & (1 << i)) { + c->nr_outputs++; + assert(i < Elements(c->regs[PROGRAM_OUTPUT])); + if (i == VERT_RESULT_HPOS) { + c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + else if (i == VERT_RESULT_PSIZ) { + c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + mrf++; /* just a placeholder? XXX fix later stages & remove this */ + } + else { + if (mrf < 16) { + c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } + else { + /* too many vertex results to fit in MRF, use GRF for overflow */ + if (!c->first_overflow_output) + c->first_overflow_output = i; + c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + } + } + } + + /* Allocate program temporaries: + */ + for (i = 0; i < c->vp->program.Base.NumTemporaries; i++) { + c->regs[PROGRAM_TEMPORARY][i] = brw_vec8_grf(reg, 0); + reg++; + } + + /* Address reg(s). Don't try to use the internal address reg until + * deref time. + */ + for (i = 0; i < c->vp->program.Base.NumAddressRegs; i++) { + c->regs[PROGRAM_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE, + reg, + 0, + BRW_REGISTER_TYPE_D, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XXXX, + WRITEMASK_X); + reg++; + } + + if (c->vp->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + + for (i = 0; i < 128; i++) { + if (c->output_regs[i].used_in_src) { + c->output_regs[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; + + /* Some opcodes need an internal temporary: + */ + c->first_tmp = reg; + c->last_tmp = reg; /* for allocation purposes */ + + /* Each input reg holds data from two vertices. The + * urb_read_length is the number of registers read from *each* + * vertex urb, so is half the amount: + */ + c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; + /* Setting this field to 0 leads to undefined behavior according to the + * the VS_STATE docs. Our VUEs will always have at least one attribute + * sitting in them, even if it's padding. + */ + if (c->prog_data.urb_read_length == 0) + c->prog_data.urb_read_length = 1; + + /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size + * them to fit the biggest thing they need to. + */ + attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); + + if (BRW_IS_IGDNG(c->func.brw)) + c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; + else + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; + + c->prog_data.total_grf = reg; + + if (INTEL_DEBUG & DEBUG_VS) { + _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); + _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); + _mesa_printf("%s reg = %d\n", __FUNCTION__, reg); + } +} + + +/** + * If an instruction uses a temp reg both as a src and the dest, we + * sometimes need to allocate an intermediate temporary. + */ +static void unalias1( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg )) +{ + if (dst.file == arg0.file && dst.nr == arg0.nr) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0); + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } + else { + func(c, dst, arg0); + } +} + +/** + * \sa unalias2 + * Checkes if 2-operand instruction needs an intermediate temporary. + */ +static void unalias2( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg )) +{ + if ((dst.file == arg0.file && dst.nr == arg0.nr) || + (dst.file == arg1.file && dst.nr == arg1.nr)) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0, arg1); + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } + else { + func(c, dst, arg0, arg1); + } +} + +/** + * \sa unalias2 + * Checkes if 3-operand instruction needs an intermediate temporary. + */ +static void unalias3( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + struct brw_reg arg2, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg, + struct brw_reg )) +{ + if ((dst.file == arg0.file && dst.nr == arg0.nr) || + (dst.file == arg1.file && dst.nr == arg1.nr) || + (dst.file == arg2.file && dst.nr == arg2.nr)) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0, arg1, arg2); + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } + else { + func(c, dst, arg0, arg1, arg2); + } +} + +static void emit_sop( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint cond) +{ + brw_MOV(p, dst, brw_imm_f(0.0f)); + brw_CMP(p, brw_null_reg(), cond, arg0, arg1); + brw_MOV(p, dst, brw_imm_f(1.0f)); + brw_set_predicate_control_flag_value(p, 0xff); +} + +static void emit_seq( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); +} + +static void emit_sne( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); +} +static void emit_slt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); +} + +static void emit_sle( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); +} + +static void emit_sgt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); +} + +static void emit_sge( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); +} + +static void emit_max( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg1, arg0); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + +static void emit_min( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + + +static void emit_math1( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + GLuint precision) +{ + /* There are various odd behaviours with SEND on the simulator. In + * addition there are documented issues with the fact that the GEN4 + * processor doesn't do dependency control properly on SEND + * results. So, on balance, this kludge to get around failures + * with writemasked math results looks like it might be necessary + * whether that turns out to be a simulator bug or not: + */ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + +static void emit_math2( struct brw_vs_compile *c, + GLuint function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + GLuint precision) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_message_reg(3), arg1); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + +static void emit_exp_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + + + if (dst.dw1.bits.writemask & WRITEMASK_X) { + struct brw_reg tmp = get_tmp(c); + struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); + + /* tmp_d = floor(arg0.x) */ + brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0)); + + /* result[0] = 2.0 ^ tmp */ + + /* Adjust exponent for floating point: + * exp += 127 + */ + brw_ADD(p, brw_writemask(tmp_d, WRITEMASK_X), tmp_d, brw_imm_d(127)); + + /* Install exponent and sign. + * Excess drops off the edge: + */ + brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), WRITEMASK_X), + tmp_d, brw_imm_d(23)); + + release_tmp(c, tmp); + } + + if (dst.dw1.bits.writemask & WRITEMASK_Y) { + /* result[1] = arg0.x - floor(arg0.x) */ + brw_FRC(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0, 0)); + } + + if (dst.dw1.bits.writemask & WRITEMASK_Z) { + /* As with the LOG instruction, we might be better off just + * doing a taylor expansion here, seeing as we have to do all + * the prep work. + * + * If mathbox partial precision is too low, consider also: + * result[3] = result[0] * EXP(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_EXP, + brw_writemask(dst, WRITEMASK_Z), + brw_swizzle1(arg0, 0), + BRW_MATH_PRECISION_FULL); + } + + if (dst.dw1.bits.writemask & WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(dst, WRITEMASK_W), brw_imm_f(1)); + } +} + + +static void emit_log_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD); + GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) { + tmp = get_tmp(c); + tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + } + + /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt + * according to spec: + * + * These almost look likey they could be joined up, but not really + * practical: + * + * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 + * result[1].i = (x.i & ((1<<23)-1) + (127<<23) + */ + if (dst.dw1.bits.writemask & WRITEMASK_XZ) { + brw_AND(p, + brw_writemask(tmp_ud, WRITEMASK_X), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1U<<31)-1)); + + brw_SHR(p, + brw_writemask(tmp_ud, WRITEMASK_X), + tmp_ud, + brw_imm_ud(23)); + + brw_ADD(p, + brw_writemask(tmp, WRITEMASK_X), + retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ + brw_imm_d(-127)); + } + + if (dst.dw1.bits.writemask & WRITEMASK_YZ) { + brw_AND(p, + brw_writemask(tmp_ud, WRITEMASK_Y), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1<<23)-1)); + + brw_OR(p, + brw_writemask(tmp_ud, WRITEMASK_Y), + tmp_ud, + brw_imm_ud(127<<23)); + } + + if (dst.dw1.bits.writemask & WRITEMASK_Z) { + /* result[2] = result[0] + LOG2(result[1]); */ + + /* Why bother? The above is just a hint how to do this with a + * taylor series. Maybe we *should* use a taylor series as by + * the time all the above has been done it's almost certainly + * quicker than calling the mathbox, even with low precision. + * + * Options are: + * - result[0] + mathbox.LOG2(result[1]) + * - mathbox.LOG2(arg0.x) + * - result[0] + inline_taylor_approx(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_LOG, + brw_writemask(tmp, WRITEMASK_Z), + brw_swizzle1(tmp, 1), + BRW_MATH_PRECISION_FULL); + + brw_ADD(p, + brw_writemask(tmp, WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(tmp, 0)); + } + + if (dst.dw1.bits.writemask & WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(tmp, WRITEMASK_W), brw_imm_f(1)); + } + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + +/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1 + */ +static void emit_dst_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1) +{ + struct brw_compile *p = &c->func; + + /* There must be a better way to do this: + */ + if (dst.dw1.bits.writemask & WRITEMASK_X) + brw_MOV(p, brw_writemask(dst, WRITEMASK_X), brw_imm_f(1.0)); + if (dst.dw1.bits.writemask & WRITEMASK_Y) + brw_MUL(p, brw_writemask(dst, WRITEMASK_Y), arg0, arg1); + if (dst.dw1.bits.writemask & WRITEMASK_Z) + brw_MOV(p, brw_writemask(dst, WRITEMASK_Z), arg0); + if (dst.dw1.bits.writemask & WRITEMASK_W) + brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1); +} + + +static void emit_xpd( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg t, + struct brw_reg u) +{ + brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3)); + brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3)); +} + + +static void emit_lit_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + struct brw_reg tmp = dst; + GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_writemask(dst, WRITEMASK_YZ), brw_imm_f(0)); + brw_MOV(p, brw_writemask(dst, WRITEMASK_XW), brw_imm_f(1)); + + /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order + * to get all channels active inside the IF. In the clipping code + * we run with NoMask, so it's not an option and we can use + * BRW_EXECUTE_1 for all comparisions. + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0)); + if_insn = brw_IF(p, BRW_EXECUTE_8); + { + brw_MOV(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0,0)); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0)); + brw_MOV(p, brw_writemask(tmp, WRITEMASK_Z), brw_swizzle1(arg0,1)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + emit_math2(c, + BRW_MATH_FUNCTION_POW, + brw_writemask(dst, WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(arg0, 3), + BRW_MATH_PRECISION_PARTIAL); + } + + brw_ENDIF(p, if_insn); + + release_tmp(c, tmp); +} + +static void emit_lrp_noalias(struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + struct brw_reg arg2) +{ + struct brw_compile *p = &c->func; + + brw_ADD(p, dst, negate(arg0), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst, arg2); + brw_MAC(p, dst, arg0, arg1); +} + +/** 3 or 4-component vector normalization */ +static void emit_nrm( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + int num_comps) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + + /* tmp = dot(arg0, arg0) */ + if (num_comps == 3) + brw_DP3(p, tmp, arg0, arg0); + else + brw_DP4(p, tmp, arg0, arg0); + + /* tmp = 1 / sqrt(tmp) */ + emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL); + + /* dst = arg0 * tmp */ + brw_MUL(p, dst, arg0, tmp); + + release_tmp(c, tmp); +} + + +static struct brw_reg +get_constant(struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex) +{ + const struct prog_src_register *src = &inst->SrcReg[argIndex]; + struct brw_compile *p = &c->func; + struct brw_reg const_reg; + struct brw_reg const2_reg; + const GLboolean relAddr = src->RelAddr; + + assert(argIndex < 3); + + if (c->current_const[argIndex].index != src->Index || relAddr) { + struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + + c->current_const[argIndex].index = src->Index; + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, argIndex, c->current_const[argIndex].reg.nr); +#endif + /* need to fetch the constant now */ + brw_dp_READ_4_vs(p, + c->current_const[argIndex].reg,/* writeback dest */ + 0, /* oword */ + relAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ + ); + + if (relAddr) { + /* second read */ + const2_reg = get_tmp(c); + + /* use upper half of address reg for second read */ + addrReg = stride(addrReg, 0, 4, 0); + addrReg.subnr = 16; + + brw_dp_READ_4_vs(p, + const2_reg, /* writeback dest */ + 1, /* oword */ + relAddr, /* relative indexing? */ + addrReg, /* address register */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER + ); + } + } + + const_reg = c->current_const[argIndex].reg; + + if (relAddr) { + /* merge the two Owords into the constant register */ + /* const_reg[7..4] = const2_reg[7..4] */ + brw_MOV(p, + suboffset(stride(const_reg, 0, 4, 1), 4), + suboffset(stride(const2_reg, 0, 4, 1), 4)); + release_tmp(c, const2_reg); + } + else { + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; + } + + return const_reg; +} + + + +/* TODO: relative addressing! + */ +static struct brw_reg get_reg( struct brw_vs_compile *c, + gl_register_file file, + GLuint index ) +{ + switch (file) { + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); + return c->regs[PROGRAM_STATE_VAR][index]; + case PROGRAM_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case PROGRAM_UNDEFINED: /* undef values */ + return brw_null_reg(); + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_WRITE_ONLY: + default: + assert(0); + return brw_null_reg(); + } +} + + +/** + * Indirect addressing: get reg[[arg] + offset]. + */ +static struct brw_reg deref( struct brw_vs_compile *c, + struct brw_reg arg, + GLint offset) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = vec4(get_tmp(c)); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); + GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; + struct brw_reg indirect = brw_vec4_indirect(0,0); + + { + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + + /* This is pretty clunky - load the address register twice and + * fetch each 4-dword value in turn. There must be a way to do + * this in a single pass, but I couldn't get it to work. + */ + brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); + brw_MOV(p, tmp, indirect); + + brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); + brw_MOV(p, suboffset(tmp, 4), indirect); + + brw_pop_insn_state(p); + } + + /* NOTE: tmp not released */ + return vec8(tmp); +} + + +/** + * Get brw reg corresponding to the instruction's [argIndex] src reg. + * TODO: relative addressing! + */ +static struct brw_reg +get_src_reg( struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex ) +{ + const GLuint file = inst->SrcReg[argIndex].File; + const GLint index = inst->SrcReg[argIndex].Index; + const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr; + + switch (file) { + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + if (relAddr) { + return deref(c, c->regs[file][0], index); + } + else { + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + } + + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + case PROGRAM_ENV_PARAM: + if (c->vp->use_const_buffer) { + return get_constant(c, inst, argIndex); + } + else if (relAddr) { + return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); + } + else { + assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); + return c->regs[PROGRAM_STATE_VAR][index]; + } + case PROGRAM_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case PROGRAM_UNDEFINED: + /* this is a normal case since we loop over all three src args */ + return brw_null_reg(); + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_WRITE_ONLY: + default: + assert(0); + return brw_null_reg(); + } +} + + +static void emit_arl( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */ + brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */ + + if (need_tmp) + release_tmp(c, tmp); +} + + +/** + * Return the brw reg for the given instruction's src argument. + * Will return mangled results for SWZ op. The emit_swz() function + * ignores this result and recalculates taking extended swizzles into + * account. + */ +static struct brw_reg get_arg( struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex ) +{ + const struct prog_src_register *src = &inst->SrcReg[argIndex]; + struct brw_reg reg; + + if (src->File == PROGRAM_UNDEFINED) + return brw_null_reg(); + + reg = get_src_reg(c, inst, argIndex); + + /* Convert 3-bit swizzle to 2-bit. + */ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), + GET_SWZ(src->Swizzle, 1), + GET_SWZ(src->Swizzle, 2), + GET_SWZ(src->Swizzle, 3)); + + /* Note this is ok for non-swizzle instructions: + */ + reg.negate = src->Negate ? 1 : 0; + + return reg; +} + + +/** + * Get brw register for the given program dest register. + */ +static struct brw_reg get_dst( struct brw_vs_compile *c, + struct prog_dst_register dst ) +{ + struct brw_reg reg; + + switch (dst.File) { + case PROGRAM_TEMPORARY: + case PROGRAM_OUTPUT: + assert(c->regs[dst.File][dst.Index].nr != 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_ADDRESS: + assert(dst.Index == 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_UNDEFINED: + /* we may hit this for OPCODE_END, OPCODE_KIL, etc */ + reg = brw_null_reg(); + break; + default: + assert(0); + reg = brw_null_reg(); + } + + reg.dw1.bits.writemask = dst.WriteMask; + + return reg; +} + + +static void emit_swz( struct brw_vs_compile *c, + struct brw_reg dst, + const struct prog_instruction *inst) +{ + const GLuint argIndex = 0; + const struct prog_src_register src = inst->SrcReg[argIndex]; + struct brw_compile *p = &c->func; + GLuint zeros_mask = 0; + GLuint ones_mask = 0; + GLuint src_mask = 0; + GLubyte src_swz[4]; + GLboolean need_tmp = (src.Negate && + dst.file != BRW_GENERAL_REGISTER_FILE); + struct brw_reg tmp = dst; + GLuint i; + + if (need_tmp) + tmp = get_tmp(c); + + for (i = 0; i < 4; i++) { + if (dst.dw1.bits.writemask & (1<func; + struct brw_reg m0 = brw_message_reg(0); + struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS]; + struct brw_reg ndc; + int eot; + GLuint len_vertext_header = 2; + + if (c->key.copy_edgeflag) { + brw_MOV(p, + get_reg(c, PROGRAM_OUTPUT, VERT_RESULT_EDGE), + get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG)); + } + + /* Build ndc coords */ + ndc = get_tmp(c); + /* ndc = 1.0 / pos.w */ + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + /* ndc.xyz = pos * ndc */ + brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + + /* Update the header for point size, user clipping flags, and -ve rhw + * workaround. + */ + if ((c->prog_data.outputs_written & (1<key.nr_userclip || BRW_IS_965(p->brw)) + { + struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + GLuint i; + + brw_MOV(p, header1, brw_imm_ud(0)); + + brw_set_access_mode(p, BRW_ALIGN_16); + + if (c->prog_data.outputs_written & (1<regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; + brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); + brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); + } + + for (i = 0; i < c->key.nr_userclip; i++) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); + brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<brw)) { + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); + + brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + brw_set_access_mode(p, BRW_ALIGN_1); /* why? */ + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1); + brw_set_access_mode(p, BRW_ALIGN_16); + + release_tmp(c, header1); + } + else { + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); + } + + /* Emit the (interleaved) headers for the two vertices - an 8-reg + * of zeros followed by two sets of NDC coordinates: + */ + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, offset(m0, 2), ndc); + + if (BRW_IS_IGDNG(p->brw)) { + /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */ + brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ + /* m4, m5 contain the distances from vertex to the user clip planeXXX. + * Seems it is useless for us. + * m6 is used for aligning, so that the remainder of vertex element is + * reg-aligned. + */ + brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */ + len_vertext_header = 6; + } else { + brw_MOV(p, offset(m0, 3), pos); + len_vertext_header = 2; + } + + eot = (c->first_overflow_output == 0); + + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + 0, /* starting mrf reg nr */ + c->r0, /* src */ + 0, /* allocate */ + 1, /* used */ + MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */ + 0, /* response len */ + eot, /* eot */ + eot, /* writes complete */ + 0, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); + + if (c->first_overflow_output > 0) { + /* Not all of the vertex outputs/results fit into the MRF. + * Move the overflowed attributes from the GRF to the MRF and + * issue another brw_urb_WRITE(). + */ + /* XXX I'm not 100% sure about which MRF regs to use here. Starting + * at mrf[4] atm... + */ + GLuint i, mrf = 0; + for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) { + if (c->prog_data.outputs_written & (1 << i)) { + /* move from GRF to MRF */ + brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); + mrf++; + } + } + + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + 4, /* starting mrf reg nr */ + c->r0, /* src */ + 0, /* allocate */ + 1, /* used */ + mrf+1, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + BRW_MAX_MRF-1, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); + } +} + + +/** + * Called after code generation to resolve subroutine calls and the + * END instruction. + * \param end_inst points to brw code for END instruction + * \param last_inst points to last instruction emitted before vertex write + */ +static void +post_vs_emit( struct brw_vs_compile *c, + struct brw_instruction *end_inst, + struct brw_instruction *last_inst ) +{ + GLint offset; + + brw_resolve_cals(&c->func); + + /* patch up the END code to jump past subroutines, etc */ + offset = last_inst - end_inst; + if (offset > 1) { + brw_set_src1(end_inst, brw_imm_d(offset * 16)); + } else { + end_inst->header.opcode = BRW_OPCODE_NOP; + } +} + +static uint32_t +get_predicate(const struct prog_instruction *inst) +{ + if (inst->DstReg.CondMask == COND_TR) + return BRW_PREDICATE_NONE; + + /* All of GLSL only produces predicates for COND_NE and one channel per + * vector. Fail badly if someone starts doing something else, as it might + * mean infinite looping or something. + * + * We'd like to support all the condition codes, but our hardware doesn't + * quite match the Mesa IR, which is modeled after the NV extensions. For + * those, the instruction may update the condition codes or not, then any + * later instruction may use one of those condition codes. For gen4, the + * instruction may update the flags register based on one of the condition + * codes output by the instruction, and then further instructions may + * predicate on that. We can probably support this, but it won't + * necessarily be easy. + */ + assert(inst->DstReg.CondMask == COND_NE); + + switch (inst->DstReg.CondSwizzle) { + case SWIZZLE_XXXX: + return BRW_PREDICATE_ALIGN16_REPLICATE_X; + case SWIZZLE_YYYY: + return BRW_PREDICATE_ALIGN16_REPLICATE_Y; + case SWIZZLE_ZZZZ: + return BRW_PREDICATE_ALIGN16_REPLICATE_Z; + case SWIZZLE_WWWW: + return BRW_PREDICATE_ALIGN16_REPLICATE_W; + default: + _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", + inst->DstReg.CondMask); + return BRW_PREDICATE_NORMAL; + } +} + +/* Emit the vertex program instructions here. + */ +void brw_vs_emit(struct brw_vs_compile *c ) +{ +#define MAX_IF_DEPTH 32 +#define MAX_LOOP_DEPTH 32 + struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + const GLuint nr_insns = c->vp->program.Base.NumInstructions; + GLuint insn, if_depth = 0, loop_depth = 0; + GLuint end_offset = 0; + struct brw_instruction *end_inst, *last_inst; + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + const struct brw_indirect stack_index = brw_indirect(0, 0); + GLuint index; + GLuint file; + + if (INTEL_DEBUG & DEBUG_VS) { + _mesa_printf("vs-mesa:\n"); + _mesa_print_program(&c->vp->program.Base); + _mesa_printf("\n"); + } + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_access_mode(p, BRW_ALIGN_16); + + /* Message registers can't be read, so copy the output into GRF register + if they are used in source registers */ + for (insn = 0; insn < nr_insns; insn++) { + GLuint i; + struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + for (i = 0; i < 3; i++) { + struct prog_src_register *src = &inst->SrcReg[i]; + GLuint index = src->Index; + GLuint file = src->File; + if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS) + c->output_regs[index].used_in_src = GL_TRUE; + } + } + + /* Static register allocation + */ + brw_vs_alloc_regs(c); + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + + for (insn = 0; insn < nr_insns; insn++) { + + const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; + struct brw_reg args[3], dst; + GLuint i; + +#if 0 + printf("%d: ", insn); + _mesa_print_instruction(inst); +#endif + + /* Get argument regs. SWZ is special and does this itself. + */ + if (inst->Opcode != OPCODE_SWZ) + for (i = 0; i < 3; i++) { + const struct prog_src_register *src = &inst->SrcReg[i]; + index = src->Index; + file = src->File; + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) + args[i] = c->output_regs[index].reg; + else + args[i] = get_arg(c, inst, i); + } + + /* Get dest regs. Note that it is possible for a reg to be both + * dst and arg, given the static allocation of registers. So + * care needs to be taken emitting multi-operation instructions. + */ + index = inst->DstReg.Index; + file = inst->DstReg.File; + if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) + dst = c->output_regs[index].reg; + else + dst = get_dst(c, inst->DstReg); + + if (inst->SaturateMode != SATURATE_OFF) { + _mesa_problem(NULL, "Unsupported saturate %d in vertex shader", + inst->SaturateMode); + } + + switch (inst->Opcode) { + case OPCODE_ABS: + brw_MOV(p, dst, brw_abs(args[0])); + break; + case OPCODE_ADD: + brw_ADD(p, dst, args[0], args[1]); + break; + case OPCODE_COS: + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case OPCODE_DP3: + brw_DP3(p, dst, args[0], args[1]); + break; + case OPCODE_DP4: + brw_DP4(p, dst, args[0], args[1]); + break; + case OPCODE_DPH: + brw_DPH(p, dst, args[0], args[1]); + break; + case OPCODE_NRM3: + emit_nrm(c, dst, args[0], 3); + break; + case OPCODE_NRM4: + emit_nrm(c, dst, args[0], 4); + break; + case OPCODE_DST: + unalias2(c, dst, args[0], args[1], emit_dst_noalias); + break; + case OPCODE_EXP: + unalias1(c, dst, args[0], emit_exp_noalias); + break; + case OPCODE_EX2: + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case OPCODE_ARL: + emit_arl(c, dst, args[0]); + break; + case OPCODE_FLR: + brw_RNDD(p, dst, args[0]); + break; + case OPCODE_FRC: + brw_FRC(p, dst, args[0]); + break; + case OPCODE_LOG: + unalias1(c, dst, args[0], emit_log_noalias); + break; + case OPCODE_LG2: + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case OPCODE_LIT: + unalias1(c, dst, args[0], emit_lit_noalias); + break; + case OPCODE_LRP: + unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias); + break; + case OPCODE_MAD: + brw_MOV(p, brw_acc_reg(), args[2]); + brw_MAC(p, dst, args[0], args[1]); + break; + case OPCODE_MAX: + emit_max(p, dst, args[0], args[1]); + break; + case OPCODE_MIN: + emit_min(p, dst, args[0], args[1]); + break; + case OPCODE_MOV: + brw_MOV(p, dst, args[0]); + break; + case OPCODE_MUL: + brw_MUL(p, dst, args[0], args[1]); + break; + case OPCODE_POW: + emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); + break; + case OPCODE_RCP: + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case OPCODE_RSQ: + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + + case OPCODE_SEQ: + emit_seq(p, dst, args[0], args[1]); + break; + case OPCODE_SIN: + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case OPCODE_SNE: + emit_sne(p, dst, args[0], args[1]); + break; + case OPCODE_SGE: + emit_sge(p, dst, args[0], args[1]); + break; + case OPCODE_SGT: + emit_sgt(p, dst, args[0], args[1]); + break; + case OPCODE_SLT: + emit_slt(p, dst, args[0], args[1]); + break; + case OPCODE_SLE: + emit_sle(p, dst, args[0], args[1]); + break; + case OPCODE_SUB: + brw_ADD(p, dst, args[0], negate(args[1])); + break; + case OPCODE_SWZ: + /* The args[0] value can't be used here as it won't have + * correctly encoded the full swizzle: + */ + emit_swz(c, dst, inst); + break; + case OPCODE_TRUNC: + /* round toward zero */ + brw_RNDZ(p, dst, args[0]); + break; + case OPCODE_XPD: + emit_xpd(p, dst, args[0], args[1]); + break; + case OPCODE_IF: + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); + /* Note that brw_IF smashes the predicate_control field. */ + if_inst[if_depth]->header.predicate_control = get_predicate(inst); + if_depth++; + break; + case OPCODE_ELSE: + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); + break; + case OPCODE_ENDIF: + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); + break; + case OPCODE_BGNLOOP: + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + break; + case OPCODE_BRK: + brw_set_predicate_control(p, get_predicate(inst)); + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_CONT: + brw_set_predicate_control(p, get_predicate(inst)); + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_ENDLOOP: + { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + loop_depth--; + + if (BRW_IS_IGDNG(brw)) + br = 2; + + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + inst0->bits3.if_else.pop_count = 0; + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + inst0->bits3.if_else.pop_count = 0; + } + } + } + break; + case OPCODE_BRA: + brw_set_predicate_control(p, get_predicate(inst)); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_CAL: + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + brw_save_call(p, inst->Comment, p->nr_insn); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; + case OPCODE_RET: + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + break; + case OPCODE_END: + end_offset = p->nr_insn; + /* this instruction will get patched later to jump past subroutine + * code, etc. + */ + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; + case OPCODE_PRINT: + /* no-op */ + break; + case OPCODE_BGNSUB: + brw_save_label(p, inst->Comment, p->nr_insn); + break; + case OPCODE_ENDSUB: + /* no-op */ + break; + default: + _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader", + inst->Opcode, inst->Opcode < MAX_OPCODE ? + _mesa_opcode_string(inst->Opcode) : + "unknown"); + } + + /* Set the predication update on the last instruction of the native + * instruction sequence. + * + * This would be problematic if it was set on a math instruction, + * but that shouldn't be the case with the current GLSL compiler. + */ + if (inst->CondUpdate) { + struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1]; + + assert(hw_insn->header.destreg__conditionalmod == 0); + hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; + } + + if ((inst->DstReg.File == PROGRAM_OUTPUT) + && (inst->DstReg.Index != VERT_RESULT_HPOS) + && c->output_regs[inst->DstReg.Index].used_in_src) { + brw_MOV(p, get_dst(c, inst->DstReg), dst); + } + + /* Result color clamping. + * + * When destination register is an output register and + * it's primary/secondary front/back color, we have to clamp + * the result to [0,1]. This is done by enabling the + * saturation bit for the last instruction. + * + * We don't use brw_set_saturate() as it modifies + * p->current->header.saturate, which affects all the subsequent + * instructions. Instead, we directly modify the header + * of the last (already stored) instruction. + */ + if (inst->DstReg.File == PROGRAM_OUTPUT) { + if ((inst->DstReg.Index == VERT_RESULT_COL0) + || (inst->DstReg.Index == VERT_RESULT_COL1) + || (inst->DstReg.Index == VERT_RESULT_BFC0) + || (inst->DstReg.Index == VERT_RESULT_BFC1)) { + p->store[p->nr_insn-1].header.saturate = 1; + } + } + + release_tmps(c); + } + + end_inst = &p->store[end_offset]; + last_inst = &p->store[p->nr_insn]; + + /* The END instruction will be patched to jump to this code */ + emit_vertex_write(c); + + post_vs_emit(c, end_inst, last_inst); + + if (INTEL_DEBUG & DEBUG_VS) { + int i; + + _mesa_printf("vs-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } +} diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c new file mode 100644 index 0000000000..d790ab6555 --- /dev/null +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -0,0 +1,185 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "main/macros.h" + +struct brw_vs_unit_key { + unsigned int total_grf; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + + unsigned int curbe_offset; + + unsigned int nr_urb_entries, urb_size; + + unsigned int nr_surfaces; +}; + +static void +vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_VS_PROG */ + key->total_grf = brw->vs.prog_data->total_grf; + key->urb_entry_read_length = brw->vs.prog_data->urb_read_length; + key->curb_entry_read_length = brw->vs.prog_data->curb_read_length; + + /* BRW_NEW_URB_FENCE */ + key->nr_urb_entries = brw->urb.nr_vs_entries; + key->urb_size = brw->urb.vsize; + + /* BRW_NEW_NR_VS_SURFACES */ + key->nr_surfaces = brw->vs.nr_surfaces; + + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ + if (ctx->Transform.ClipPlanesEnabled) { + /* Note that we read in the userclip planes as well, hence + * clip_start: + */ + key->curbe_offset = brw->curbe.clip_start; + } + else { + key->curbe_offset = brw->curbe.vs_start; + } +} + +static dri_bo * +vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) +{ + struct brw_vs_unit_state vs; + dri_bo *bo; + int chipset_max_threads; + + memset(&vs, 0, sizeof(vs)); + + vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ + vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + /* Choosing multiple program flow means that we may get 2-vertex threads, + * which will have the channel mask for dwords 4-7 enabled in the thread, + * and those dwords will be written to the second URB handle when we + * brw_urb_WRITE() results. + */ + vs.thread1.single_program_flow = 0; + + if (BRW_IS_IGDNG(brw)) + vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ + else + vs.thread1.binding_table_entry_count = key->nr_surfaces; + + vs.thread3.urb_entry_read_length = key->urb_entry_read_length; + vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + vs.thread3.dispatch_grf_start_reg = 1; + vs.thread3.urb_entry_read_offset = 0; + vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + + if (BRW_IS_IGDNG(brw)) + vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2; + else + vs.thread4.nr_urb_entries = key->nr_urb_entries; + + vs.thread4.urb_entry_allocation_size = key->urb_size - 1; + + if (BRW_IS_IGDNG(brw)) + chipset_max_threads = 72; + else if (BRW_IS_G4X(brw)) + chipset_max_threads = 32; + else + chipset_max_threads = 16; + vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2, + 1, chipset_max_threads) - 1; + + if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + vs.thread4.max_threads = 0; + + /* No samplers for ARB_vp programs: + */ + /* It has to be set to 0 for IGDNG + */ + vs.vs5.sampler_count = 0; + + if (INTEL_DEBUG & DEBUG_STATS) + vs.thread4.stats_enable = 1; + + /* Vertex program always enabled: + */ + vs.vs6.vs_enable = 1; + + bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT, + key, sizeof(*key), + &brw->vs.prog_bo, 1, + &vs, sizeof(vs), + NULL, NULL); + + /* Emit VS program relocation */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + vs.thread0.grf_reg_count << 1, + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_bo); + + return bo; +} + +static void prepare_vs_unit(struct brw_context *brw) +{ + struct brw_vs_unit_key key; + + vs_unit_populate_key(brw, &key); + + dri_bo_unreference(brw->vs.state_bo); + brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT, + &key, sizeof(key), + &brw->vs.prog_bo, 1, + NULL); + if (brw->vs.state_bo == NULL) { + brw->vs.state_bo = vs_unit_create_from_key(brw, &key); + } +} + +const struct brw_tracked_state brw_vs_unit = { + .dirty = { + .mesa = _NEW_TRANSFORM, + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_VS_SURFACES | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_VS_PROG + }, + .prepare = prepare_vs_unit, +}; diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c new file mode 100644 index 0000000000..89f47522a1 --- /dev/null +++ b/src/gallium/drivers/i965/brw_vs_surface_state.c @@ -0,0 +1,226 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "main/mtypes.h" +#include "main/texformat.h" +#include "main/texstore.h" +#include "shader/prog_parameter.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +/* Creates a new VS constant buffer reflecting the current VS program's + * constants, if needed by the VS program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_vs_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_VERTEX_PROGRAM */ + if (!vp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} + +/** + * Update the surface state for a VS constant buffer. + * + * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer. + */ +static void +brw_update_vs_constant_surface( GLcontext *ctx, + GLuint surf) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + + assert(surf == 0); + + /* If we're in this state update atom, we need to update VS constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(vp->const_buffer); + vp->const_buffer = brw_vs_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (vp->const_buffer == 0) { + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = NULL; + return; + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = vp->const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + drm_intel_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->vs.surf_bo[surf] == NULL) { + brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } +} + + +/** + * Constructs the binding table for the VS surface state. + */ +static dri_bo * +brw_vs_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < BRW_VS_MAX_SURF; i++) + if (brw->vs.surf_bo[i]) + data[i] = brw->vs.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + data, data_size, + NULL, NULL); + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + /* The presumed offsets were set in the data values for + * brw_upload_cache. + */ + drm_intel_bo_emit_reloc(bind_bo, i * 4, + brw->vs.surf_bo[i], 0, + I915_GEM_DOMAIN_INSTRUCTION, 0); + } + } + + free(data); + } + + return bind_bo; +} + +/** + * Vertex shader surfaces (constant buffer). + * + * This consumes the state updates for the constant buffer needing + * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and + * CACHE_NEW_SURF_BIND for the binding table upload. + */ +static void prepare_vs_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + int i; + int nr_surfaces = 0; + + brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER); + + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + nr_surfaces = i + 1; + } + } + + if (brw->vs.nr_surfaces != nr_surfaces) { + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; + brw->vs.nr_surfaces = nr_surfaces; + } + + /* Note that we don't end up updating the bind_bo if we don't have a + * surface to be pointing at. This should be relatively harmless, as it + * just slightly increases our working set size. + */ + if (brw->vs.nr_surfaces != 0) { + dri_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = brw_vs_get_binding_table(brw); + } +} + +const struct brw_tracked_state brw_vs_surfaces = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_VERTEX_PROGRAM), + .cache = 0 + }, + .prepare = prepare_vs_surfaces, +}; + + + diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c new file mode 100644 index 0000000000..2292de94c4 --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm.c @@ -0,0 +1,375 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "main/texformat.h" +#include "brw_context.h" +#include "brw_util.h" +#include "brw_wm.h" +#include "brw_state.h" + + +/** Return number of src args for given instruction */ +GLuint brw_wm_nr_args( GLuint opcode ) +{ + switch (opcode) { + case WM_FRONTFACING: + case WM_PIXELXY: + return 0; + case WM_CINTERP: + case WM_WPOSXY: + case WM_DELTAXY: + return 1; + case WM_LINTERP: + case WM_PIXELW: + return 2; + case WM_FB_WRITE: + case WM_PINTERP: + return 3; + default: + assert(opcode < MAX_OPCODE); + return _mesa_num_inst_src_regs(opcode); + } +} + + +GLuint brw_wm_is_scalar_result( GLuint opcode ) +{ + switch (opcode) { + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_POW: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_DPH: + case OPCODE_DST: + return 1; + + default: + return 0; + } +} + + +/** + * Do GPU code generation for non-GLSL shader. non-GLSL shaders have + * no flow control instructions so we can more readily do SSA-style + * optimizations. + */ +static void +brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + /* Augment fragment program. Add instructions for pre- and + * post-fragment-program tasks such as interpolation and fogging. + */ + brw_wm_pass_fp(c); + + /* Translate to intermediate representation. Build register usage + * chains. + */ + brw_wm_pass0(c); + + /* Dead code removal. + */ + brw_wm_pass1(c); + + /* Register allocation. + * Divide by two because we operate on 16 pixels at a time and require + * two GRF entries for each logical shader register. + */ + c->grf_limit = BRW_WM_MAX_GRF / 2; + + brw_wm_pass2(c); + + /* how many general-purpose registers are used */ + c->prog_data.total_grf = c->max_wm_grf; + + /* Scratch space is used for register spilling */ + if (c->last_scratch) { + c->prog_data.total_scratch = c->last_scratch + 0x40; + } + else { + c->prog_data.total_scratch = 0; + } + + /* Emit GEN4 code. + */ + brw_wm_emit(c); +} + + +/** + * All Mesa program -> GPU code generation goes through this function. + * Depending on the instructions used (i.e. flow control instructions) + * we'll use one of two code generators. + */ +static void do_wm_prog( struct brw_context *brw, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) +{ + struct brw_wm_compile *c; + const GLuint *program; + GLuint program_size; + + c = brw->wm.compile_data; + if (c == NULL) { + brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); + c = brw->wm.compile_data; + if (c == NULL) { + /* Ouch - big out of memory problem. Can't continue + * without triggering a segfault, no way to signal, + * so just return. + */ + return; + } + } else { + memset(c, 0, sizeof(*brw->wm.compile_data)); + } + memcpy(&c->key, key, sizeof(*key)); + + c->fp = fp; + c->env_param = brw->intel.ctx.FragmentProgram.Parameters; + + brw_init_compile(brw, &c->func); + + /* temporary sanity check assertion */ + ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + + /* + * Shader which use GLSL features such as flow control are handled + * differently from "simple" shaders. + */ + if (fp->isGLSL) { + c->dispatch_width = 8; + brw_wm_glsl_emit(brw, c); + } + else { + c->dispatch_width = 16; + brw_wm_non_glsl_emit(brw, c); + } + + if (INTEL_DEBUG & DEBUG_WM) + fprintf(stderr, "\n"); + + /* get the program + */ + program = brw_get_program(&c->func, &program_size); + + dri_bo_unreference(brw->wm.prog_bo); + brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + NULL, 0, + program, program_size, + &c->prog_data, + &brw->wm.prog_data ); +} + + + +static void brw_wm_populate_key( struct brw_context *brw, + struct brw_wm_prog_key *key ) +{ + GLcontext *ctx = &brw->intel.ctx; + /* BRW_NEW_FRAGMENT_PROGRAM */ + const struct brw_fragment_program *fp = + (struct brw_fragment_program *)brw->fragment_program; + GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; + GLuint lookup = 0; + GLuint line_aa; + GLuint i; + + memset(key, 0, sizeof(*key)); + + /* Build the index for table lookup + */ + /* _NEW_COLOR */ + if (fp->program.UsesKill || + ctx->Color.AlphaEnabled) + lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->program.Base.OutputsWritten & (1<Depth.Test) + lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (ctx->Depth.Test && + ctx->Depth.Mask) /* ?? */ + lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + /* _NEW_STENCIL */ + if (ctx->Stencil._Enabled) { + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (ctx->Stencil.WriteMask[0] || + ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + + line_aa = AA_NEVER; + + /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ + if (ctx->Line.SmoothFlag) { + if (brw->intel.reduced_primitive == GL_LINES) { + line_aa = AA_ALWAYS; + } + else if (brw->intel.reduced_primitive == GL_TRIANGLES) { + if (ctx->Polygon.FrontMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if (ctx->Polygon.BackMode == GL_LINE || + (ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_BACK)) + line_aa = AA_ALWAYS; + } + else if (ctx->Polygon.BackMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if ((ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_FRONT)) + line_aa = AA_ALWAYS; + } + } + } + + brw_wm_lookup_iz(line_aa, + lookup, + uses_depth, + key); + + + /* BRW_NEW_WM_INPUT_DIMENSIONS */ + key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; + + /* _NEW_LIGHT */ + key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + + /* _NEW_HINT */ + key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + + /* _NEW_TEXTURE */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; + + if (unit->_ReallyEnabled) { + const struct gl_texture_object *t = unit->_Current; + const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + if (img->InternalFormat == GL_YCBCR_MESA) { + key->yuvtex_mask |= 1 << i; + if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR) + key->yuvtex_swap_mask |= 1 << i; + } + + key->tex_swizzles[i] = t->_Swizzle; + } + else { + key->tex_swizzles[i] = SWIZZLE_NOOP; + } + } + + /* Shadow */ + key->shadowtex_mask = fp->program.Base.ShadowSamplers; + + /* _NEW_BUFFERS */ + /* + * Include the draw buffer origin and height so that we can calculate + * fragment position values relative to the bottom left of the drawable, + * from the incoming screen origin relative position we get as part of our + * payload. + * + * We could avoid recompiling by including this as a constant referenced by + * our program, but if we were to do that it would also be nice to handle + * getting that constant updated at batchbuffer submit time (when we + * hold the lock and know where the buffer really is) rather than at emit + * time when we don't hold the lock and are just guessing. We could also + * just avoid using this as key data if the program doesn't use + * fragment.position. + * + * This pretty much becomes moot with DRI2 and redirected buffers anyway, + * as our origins will always be zero then. + */ + if (brw->intel.driDrawable != NULL) { + key->origin_x = brw->intel.driDrawable->x; + key->origin_y = brw->intel.driDrawable->y; + key->drawable_height = brw->intel.driDrawable->h; + } + + /* CACHE_NEW_VS_PROG */ + key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS; + + /* The unique fragment program ID */ + key->program_string_id = fp->id; +} + + +static void brw_prepare_wm_prog(struct brw_context *brw) +{ + struct brw_wm_prog_key key; + struct brw_fragment_program *fp = (struct brw_fragment_program *) + brw->fragment_program; + + brw_wm_populate_key(brw, &key); + + /* Make an early check for the key. + */ + dri_bo_unreference(brw->wm.prog_bo); + brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, + &key, sizeof(key), + NULL, 0, + &brw->wm.prog_data); + if (brw->wm.prog_bo == NULL) + do_wm_prog(brw, fp, &key); +} + + +const struct brw_tracked_state brw_wm_prog = { + .dirty = { + .mesa = (_NEW_COLOR | + _NEW_DEPTH | + _NEW_HINT | + _NEW_STENCIL | + _NEW_POLYGON | + _NEW_LINE | + _NEW_LIGHT | + _NEW_BUFFERS | + _NEW_TEXTURE), + .brw = (BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_WM_INPUT_DIMENSIONS | + BRW_NEW_REDUCED_PRIMITIVE), + .cache = CACHE_NEW_VS_PROG, + }, + .prepare = brw_prepare_wm_prog +}; + diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h new file mode 100644 index 0000000000..872b1f3ecf --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm.h @@ -0,0 +1,309 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRW_WM_H +#define BRW_WM_H + + +#include "shader/prog_instruction.h" +#include "brw_context.h" +#include "brw_eu.h" + +#define SATURATE (1<<5) + +/* A big lookup table is used to figure out which and how many + * additional regs will inserted before the main payload in the WM + * program execution. These mainly relate to depth and stencil + * processing and the early-depth-test optimization. + */ +#define IZ_PS_KILL_ALPHATEST_BIT 0x1 +#define IZ_PS_COMPUTES_DEPTH_BIT 0x2 +#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4 +#define IZ_DEPTH_TEST_ENABLE_BIT 0x8 +#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10 +#define IZ_STENCIL_TEST_ENABLE_BIT 0x20 +#define IZ_BIT_MAX 0x40 + +#define AA_NEVER 0 +#define AA_SOMETIMES 1 +#define AA_ALWAYS 2 + +struct brw_wm_prog_key { + GLuint source_depth_reg:3; + GLuint aa_dest_stencil_reg:3; + GLuint dest_depth_reg:3; + GLuint nr_depth_regs:3; + GLuint computes_depth:1; /* could be derived from program string */ + GLuint source_depth_to_render_target:1; + GLuint flat_shade:1; + GLuint linear_color:1; /**< linear interpolation vs perspective interp */ + GLuint runtime_check_aads_emit:1; + + GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ + GLuint shadowtex_mask:16; + GLuint yuvtex_mask:16; + GLuint yuvtex_swap_mask:16; /* UV swaped */ + + GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; + + GLuint program_string_id:32; + GLuint origin_x, origin_y; + GLuint drawable_height; + GLuint vp_outputs_written; +}; + + +/* A bit of a glossary: + * + * brw_wm_value: A computed value or program input. Values are + * constant, they are created once and are never modified. When a + * fragment program register is written or overwritten, new values are + * created fresh, preserving the rule that values are constant. + * + * brw_wm_ref: A reference to a value. Wherever a value used is by an + * instruction or as a program output, that is tracked with an + * instance of this struct. All references to a value occur after it + * is created. After the last reference, a value is dead and can be + * discarded. + * + * brw_wm_grf: Represents a physical hardware register. May be either + * empty or hold a value. Register allocation is the process of + * assigning values to grf registers. This occurs in pass2 and the + * brw_wm_grf struct is not used before that. + * + * Fragment program registers: These are time-varying constructs that + * are hard to reason about and which we translate away in pass0. A + * single fragment program register element (eg. temp[0].x) will be + * translated to one or more brw_wm_value structs, one for each time + * that temp[0].x is written to during the program. + */ + + + +/* Used in pass2 to track register allocation. + */ +struct brw_wm_grf { + struct brw_wm_value *value; + GLuint nextuse; +}; + +struct brw_wm_value { + struct brw_reg hw_reg; /* emitted to this reg, may not always be there */ + struct brw_wm_ref *lastuse; + struct brw_wm_grf *resident; + GLuint contributes_to_output:1; + GLuint spill_slot:16; /* if non-zero, spill immediately after calculation */ +}; + +struct brw_wm_ref { + struct brw_reg hw_reg; /* nr filled in in pass2, everything else, pass0 */ + struct brw_wm_value *value; + struct brw_wm_ref *prevuse; + GLuint unspill_reg:7; /* unspill to reg */ + GLuint emitted:1; + GLuint insn:24; +}; + +struct brw_wm_constref { + const struct brw_wm_ref *ref; + GLfloat constval; +}; + + +struct brw_wm_instruction { + struct brw_wm_value *dst[4]; + struct brw_wm_ref *src[3][4]; + GLuint opcode:8; + GLuint saturate:1; + GLuint writemask:4; + GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ + GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ + GLuint tex_shadow:1; /* do shadow comparison? */ + GLuint eot:1; /* End of thread indicator for FB_WRITE*/ + GLuint target:10; /* target binding table index for FB_WRITE*/ +}; + + +#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) +#define BRW_WM_MAX_GRF 128 /* hardware limit */ +#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) +#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) +#define BRW_WM_MAX_PARAM 256 +#define BRW_WM_MAX_CONST 256 +#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS +#define BRW_WM_MAX_SUBROUTINE 16 + + + +/* New opcodes to track internal operations required for WM unit. + * These are added early so that the registers used can be tracked, + * freed and reused like those of other instructions. + */ +#define WM_PIXELXY (MAX_OPCODE) +#define WM_DELTAXY (MAX_OPCODE + 1) +#define WM_PIXELW (MAX_OPCODE + 2) +#define WM_LINTERP (MAX_OPCODE + 3) +#define WM_PINTERP (MAX_OPCODE + 4) +#define WM_CINTERP (MAX_OPCODE + 5) +#define WM_WPOSXY (MAX_OPCODE + 6) +#define WM_FB_WRITE (MAX_OPCODE + 7) +#define WM_FRONTFACING (MAX_OPCODE + 8) +#define MAX_WM_OPCODE (MAX_OPCODE + 9) + +#define PROGRAM_PAYLOAD (PROGRAM_FILE_MAX) +#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) + +struct brw_wm_compile { + struct brw_compile func; + struct brw_wm_prog_key key; + struct brw_wm_prog_data prog_data; + + struct brw_fragment_program *fp; + + GLfloat (*env_param)[4]; + + enum { + START, + PASS2_DONE + } state; + + /* Initial pass - translate fp instructions to fp instructions, + * simplifying and adding instructions for interpolation and + * framebuffer writes. + */ + struct prog_instruction prog_instructions[BRW_WM_MAX_INSN]; + GLuint nr_fp_insns; + GLuint fp_temp; + GLuint fp_interp_emitted; + GLuint fp_fragcolor_emitted; + + struct prog_src_register pixel_xy; + struct prog_src_register delta_xy; + struct prog_src_register pixel_w; + + + struct brw_wm_value vreg[BRW_WM_MAX_VREG]; + GLuint nr_vreg; + + struct brw_wm_value creg[BRW_WM_MAX_PARAM]; + GLuint nr_creg; + + struct { + struct brw_wm_value depth[4]; /* includes r0/r1 */ + struct brw_wm_value input_interp[FRAG_ATTRIB_MAX]; + } payload; + + + const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4]; + + struct brw_wm_ref undef_ref; + struct brw_wm_value undef_value; + + struct brw_wm_ref refs[BRW_WM_MAX_REF]; + GLuint nr_refs; + + struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; + GLuint nr_insns; + + struct brw_wm_constref constref[BRW_WM_MAX_CONST]; + GLuint nr_constrefs; + + struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2]; + + GLuint grf_limit; + GLuint max_wm_grf; + GLuint last_scratch; + + GLuint cur_inst; /**< index of current instruction */ + + GLboolean out_of_regs; /**< ran out of GRF registers? */ + + /** Mapping from Mesa registers to hardware registers */ + struct { + GLboolean inited; + struct brw_reg reg; + } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + + GLboolean used_grf[BRW_WM_MAX_GRF]; + GLuint first_free_grf; + struct brw_reg stack; + struct brw_reg emit_mask_reg; + GLuint tmp_regs[BRW_WM_MAX_GRF]; + GLuint tmp_index; + GLuint tmp_max; + GLuint subroutines[BRW_WM_MAX_SUBROUTINE]; + GLuint dispatch_width; + + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; +}; + + +GLuint brw_wm_nr_args( GLuint opcode ); +GLuint brw_wm_is_scalar_result( GLuint opcode ); + +void brw_wm_pass_fp( struct brw_wm_compile *c ); +void brw_wm_pass0( struct brw_wm_compile *c ); +void brw_wm_pass1( struct brw_wm_compile *c ); +void brw_wm_pass2( struct brw_wm_compile *c ); +void brw_wm_emit( struct brw_wm_compile *c ); + +void brw_wm_print_value( struct brw_wm_compile *c, + struct brw_wm_value *value ); + +void brw_wm_print_ref( struct brw_wm_compile *c, + struct brw_wm_ref *ref ); + +void brw_wm_print_insn( struct brw_wm_compile *c, + struct brw_wm_instruction *inst ); + +void brw_wm_print_program( struct brw_wm_compile *c, + const char *stage ); + +void brw_wm_lookup_iz( GLuint line_aa, + GLuint lookup, + GLboolean ps_uses_depth, + struct brw_wm_prog_key *key ); + +GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); +void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); + +void emit_ddxy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + GLboolean is_ddx, + const struct brw_reg *arg0); + +#endif diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c new file mode 100644 index 0000000000..220821087c --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_debug.c @@ -0,0 +1,174 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_wm.h" + + +void brw_wm_print_value( struct brw_wm_compile *c, + struct brw_wm_value *value ) +{ + assert(value); + if (c->state >= PASS2_DONE) + brw_print_reg(value->hw_reg); + else if( value == &c->undef_value ) + _mesa_printf("undef"); + else if( value - c->vreg >= 0 && + value - c->vreg < BRW_WM_MAX_VREG) + _mesa_printf("r%d", value - c->vreg); + else if (value - c->creg >= 0 && + value - c->creg < BRW_WM_MAX_PARAM) + _mesa_printf("c%d", value - c->creg); + else if (value - c->payload.input_interp >= 0 && + value - c->payload.input_interp < FRAG_ATTRIB_MAX) + _mesa_printf("i%d", value - c->payload.input_interp); + else if (value - c->payload.depth >= 0 && + value - c->payload.depth < FRAG_ATTRIB_MAX) + _mesa_printf("d%d", value - c->payload.depth); + else + _mesa_printf("?"); +} + +void brw_wm_print_ref( struct brw_wm_compile *c, + struct brw_wm_ref *ref ) +{ + struct brw_reg hw_reg = ref->hw_reg; + + if (ref->unspill_reg) + _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot); + + if (c->state >= PASS2_DONE) + brw_print_reg(ref->hw_reg); + else { + _mesa_printf("%s", hw_reg.negate ? "-" : ""); + _mesa_printf("%s", hw_reg.abs ? "abs/" : ""); + brw_wm_print_value(c, ref->value); + if ((hw_reg.nr&1) || hw_reg.subnr) { + _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); + } + } +} + +void brw_wm_print_insn( struct brw_wm_compile *c, + struct brw_wm_instruction *inst ) +{ + GLuint i, arg; + GLuint nr_args = brw_wm_nr_args(inst->opcode); + + _mesa_printf("["); + for (i = 0; i < 4; i++) { + if (inst->dst[i]) { + brw_wm_print_value(c, inst->dst[i]); + if (inst->dst[i]->spill_slot) + _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot); + } + else + _mesa_printf("#"); + if (i < 3) + _mesa_printf(","); + } + _mesa_printf("]"); + + if (inst->writemask != WRITEMASK_XYZW) + _mesa_printf(".%s%s%s%s", + GET_BIT(inst->writemask, 0) ? "x" : "", + GET_BIT(inst->writemask, 1) ? "y" : "", + GET_BIT(inst->writemask, 2) ? "z" : "", + GET_BIT(inst->writemask, 3) ? "w" : ""); + + switch (inst->opcode) { + case WM_PIXELXY: + _mesa_printf(" = PIXELXY"); + break; + case WM_DELTAXY: + _mesa_printf(" = DELTAXY"); + break; + case WM_PIXELW: + _mesa_printf(" = PIXELW"); + break; + case WM_WPOSXY: + _mesa_printf(" = WPOSXY"); + break; + case WM_PINTERP: + _mesa_printf(" = PINTERP"); + break; + case WM_LINTERP: + _mesa_printf(" = LINTERP"); + break; + case WM_CINTERP: + _mesa_printf(" = CINTERP"); + break; + case WM_FB_WRITE: + _mesa_printf(" = FB_WRITE"); + break; + case WM_FRONTFACING: + _mesa_printf(" = FRONTFACING"); + break; + default: + _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode)); + break; + } + + if (inst->saturate) + _mesa_printf("_SAT"); + + for (arg = 0; arg < nr_args; arg++) { + + _mesa_printf(" ["); + + for (i = 0; i < 4; i++) { + if (inst->src[arg][i]) { + brw_wm_print_ref(c, inst->src[arg][i]); + } + else + _mesa_printf("%%"); + + if (i < 3) + _mesa_printf(","); + else + _mesa_printf("]"); + } + } + _mesa_printf("\n"); +} + +void brw_wm_print_program( struct brw_wm_compile *c, + const char *stage ) +{ + GLuint insn; + + _mesa_printf("%s:\n", stage); + for (insn = 0; insn < c->nr_insns; insn++) + brw_wm_print_insn(c, &c->instruction[insn]); + _mesa_printf("\n"); +} + diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c new file mode 100644 index 0000000000..bf80a2942a --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -0,0 +1,1509 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/macros.h" +#include "brw_context.h" +#include "brw_wm.h" + +/* Not quite sure how correct this is - need to understand horiz + * vs. vertical strides a little better. + */ +static INLINE struct brw_reg sechalf( struct brw_reg reg ) +{ + if (reg.vstride) + reg.nr++; + return reg; +} + +/* Payload R0: + * + * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, + * corresponding to each of the 16 execution channels. + * R0.1..8 -- ? + * R1.0 -- triangle vertex 0.X + * R1.1 -- triangle vertex 0.Y + * R1.2 -- tile 0 x,y coords (2 packed uwords) + * R1.3 -- tile 1 x,y coords (2 packed uwords) + * R1.4 -- tile 2 x,y coords (2 packed uwords) + * R1.5 -- tile 3 x,y coords (2 packed uwords) + * R1.6 -- ? + * R1.7 -- ? + * R1.8 -- ? + */ + + +static void emit_pixel_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw,5), 2, 4, 0), + brw_imm_v(0x11001100)); + } + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); +} + + + +static void emit_delta_xy(struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + dst[1], + retype(arg0[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + + } +} + +static void emit_wpos_xy(struct brw_wm_compile *c, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0) +{ + struct brw_compile *p = &c->func; + + /* Calculate the pixel offset from window bottom left into destination + * X and Y channels. + */ + if (mask & WRITEMASK_X) { + /* X' = X - origin */ + brw_ADD(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_W), + brw_imm_d(0 - c->key.origin_x)); + } + + if (mask & WRITEMASK_Y) { + /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + brw_ADD(p, + dst[1], + negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), + brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + } +} + + +static void emit_pixel_w( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas) +{ + /* Don't need this if all you are doing is interpolating color, for + * instance. + */ + if (mask & WRITEMASK_W) { + struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4); + + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + brw_LINE(p, brw_null_reg(), interp3, deltas[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]); + + /* Calc w */ + brw_math_16( p, dst[3], + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} + + + +static void emit_linterp( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0, + const struct brw_reg *deltas ) +{ + struct brw_reg interp[4]; + GLuint nr = arg0[0].nr; + GLuint i; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for (i = 0; i < 4; i++) { + if (mask & (1<func; + GLuint msgLength, responseLength; + GLuint i, nr; + GLuint emit; + GLuint msg_type; + + /* How many input regs are there? + */ + switch (inst->tex_idx) { + case TEXTURE_1D_INDEX: + emit = WRITEMASK_X; + nr = 1; + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + emit = WRITEMASK_XY; + nr = 2; + break; + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: + emit = WRITEMASK_XYZ; + nr = 3; + break; + default: + /* unexpected target */ + abort(); + } + + if (inst->tex_shadow) { + nr = 4; + emit |= WRITEMASK_W; + } + + msgLength = 1; + + for (i = 0; i < nr; i++) { + static const GLuint swz[4] = {0,1,2,2}; + if (emit & (1<brw)) { + if (inst->tex_shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; + } else { + if (inst->tex_shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; + } + + brw_SAMPLE(p, + retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), + SURF_INDEX_TEXTURE(inst->tex_unit), + inst->tex_unit, /* sampler */ + inst->writemask, + msg_type, + responseLength, + msgLength, + 0, + 1, + BRW_SAMPLER_SIMD_MODE_SIMD16); +} + + +static void emit_txb( struct brw_wm_compile *c, + const struct brw_wm_instruction *inst, + struct brw_reg *dst, + GLuint dst_flags, + struct brw_reg *arg ) +{ + struct brw_compile *p = &c->func; + GLuint msgLength; + GLuint msg_type; + /* Shadow ignored for txb. + */ + switch (inst->tex_idx) { + case TEXTURE_1D_INDEX: + brw_MOV(p, brw_message_reg(2), arg[0]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + brw_MOV(p, brw_message_reg(2), arg[0]); + brw_MOV(p, brw_message_reg(4), arg[1]); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + break; + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: + brw_MOV(p, brw_message_reg(2), arg[0]); + brw_MOV(p, brw_message_reg(4), arg[1]); + brw_MOV(p, brw_message_reg(6), arg[2]); + break; + default: + /* unexpected target */ + abort(); + } + + brw_MOV(p, brw_message_reg(8), arg[3]); + msgLength = 9; + + if (BRW_IS_IGDNG(p->brw)) + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + + brw_SAMPLE(p, + retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), + SURF_INDEX_TEXTURE(inst->tex_unit), + inst->tex_unit, /* sampler */ + inst->writemask, + msg_type, + 8, /* responseLength */ + msgLength, + 0, + 1, + BRW_SAMPLER_SIMD_MODE_SIMD16); +} + + +static void emit_lit( struct brw_compile *p, + const struct brw_reg *dst, + GLuint mask, + const struct brw_reg *arg0 ) +{ + assert((mask & WRITEMASK_XW) == 0); + + if (mask & WRITEMASK_Y) { + brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); + brw_MOV(p, dst[1], arg0[0]); + brw_set_saturate(p, 0); + } + + if (mask & WRITEMASK_Z) { + emit_math2(p, BRW_MATH_FUNCTION_POW, + &dst[2], + WRITEMASK_X | (mask & SATURATE), + &arg0[1], + &arg0[3]); + } + + /* Ordinarily you'd use an iff statement to skip or shortcircuit + * some of the POW calculations above, but 16-wide iff statements + * seem to lock c1 hardware, so this is a nasty workaround: + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0)); + { + if (mask & WRITEMASK_Y) + brw_MOV(p, dst[1], brw_imm_f(0)); + + if (mask & WRITEMASK_Z) + brw_MOV(p, dst[2], brw_imm_f(0)); + } + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + + +/* Kill pixel - set execution mask to zero for those pixels which + * fail. + */ +static void emit_kil( struct brw_wm_compile *c, + struct brw_reg *arg0) +{ + struct brw_compile *p = &c->func; + struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + GLuint i; + + /* XXX - usually won't need 4 compares! + */ + for (i = 0; i < 4; i++) { + brw_push_insn_state(p); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0)); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_AND(p, r0uw, brw_flag_reg(), r0uw); + brw_pop_insn_state(p); + } +} + +/* KIL_NV kills the pixels that are currently executing, not based on a test + * of the arguments. + */ +static void emit_kil_nv( struct brw_wm_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, r0uw, c->emit_mask_reg, r0uw); + brw_pop_insn_state(p); +} + +static void fire_fb_write( struct brw_wm_compile *c, + GLuint base_reg, + GLuint nr, + GLuint target, + GLuint eot ) +{ + struct brw_compile *p = &c->func; + + /* Pass through control information: + */ +/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + + /* Send framebuffer write message: */ +/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */ + brw_fb_WRITE(p, + retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + target, + nr, + 0, + eot); +} + + +static void emit_aa( struct brw_wm_compile *c, + struct brw_reg *arg1, + GLuint reg ) +{ + struct brw_compile *p = &c->func; + GLuint comp = c->key.aa_dest_stencil_reg / 2; + GLuint off = c->key.aa_dest_stencil_reg % 2; + struct brw_reg aa = offset(arg1[comp], off); + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */ + brw_MOV(p, brw_message_reg(reg), aa); + brw_pop_insn_state(p); +} + + +/* Post-fragment-program processing. Send the results to the + * framebuffer. + * \param arg0 the fragment color + * \param arg1 the pass-through depth value + * \param arg2 the shader-computed depth value + */ +static void emit_fb_write( struct brw_wm_compile *c, + struct brw_reg *arg0, + struct brw_reg *arg1, + struct brw_reg *arg2, + GLuint target, + GLuint eot) +{ + struct brw_compile *p = &c->func; + GLuint nr = 2; + GLuint channel; + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + + /* I don't really understand how this achieves the color interleave + * (ie RGBARGBA) in the result: [Do the saturation here] + */ + { + brw_push_insn_state(p); + + for (channel = 0; channel < 4; channel++) { + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + brw_message_reg(nr + channel), + arg0[channel]); + + brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); + brw_MOV(p, + brw_message_reg(nr + channel + 4), + sechalf(arg0[channel])); + } + + /* skip over the regs populated above: + */ + nr += 8; + + brw_pop_insn_state(p); + } + + if (c->key.source_depth_to_render_target) + { + if (c->key.computes_depth) + brw_MOV(p, brw_message_reg(nr), arg2[2]); + else + brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */ + + nr += 2; + } + + if (c->key.dest_depth_reg) + { + GLuint comp = c->key.dest_depth_reg / 2; + GLuint off = c->key.dest_depth_reg % 2; + + if (off != 0) { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1)); + /* 2nd half? */ + brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]); + brw_pop_insn_state(p); + } + else { + brw_MOV(p, brw_message_reg(nr), arg1[comp]); + } + nr += 2; + } + + if (!c->key.runtime_check_aads_emit) { + if (c->key.aa_dest_stencil_reg) + emit_aa(c, arg1, 2); + + fire_fb_write(c, 0, nr, target, eot); + } + else { + struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); + struct brw_reg ip = brw_ip_reg(); + struct brw_instruction *jmp; + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_conditionalmod(p, BRW_CONDITIONAL_Z); + brw_AND(p, + v1_null_ud, + get_element_ud(brw_vec8_grf(1,0), 6), + brw_imm_ud(1<<26)); + + jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)); + { + emit_aa(c, arg1, 2); + fire_fb_write(c, 0, nr, target, eot); + /* note - thread killed in subroutine */ + } + brw_land_fwd_jump(p, jmp); + + /* ELSE: Shuffle up one register to fill in the hole left for AA: + */ + fire_fb_write(c, 1, nr-1, target, eot); + } +} + + +/** + * Move a GPR to scratch memory. + */ +static void emit_spill( struct brw_wm_compile *c, + struct brw_reg reg, + GLuint slot ) +{ + struct brw_compile *p = &c->func; + + /* + mov (16) m2.0<1>:ud r2.0<8;8,1>:ud { Align1 Compr } + */ + brw_MOV(p, brw_message_reg(2), reg); + + /* + mov (1) r0.2<1>:d 0x00000080:d { Align1 NoMask } + send (16) null.0<1>:uw m1 r0.0<8;8,1>:uw 0x053003ff:ud { Align1 } + */ + brw_dp_WRITE_16(p, + retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW), + slot); +} + + +/** + * Load a GPR from scratch memory. + */ +static void emit_unspill( struct brw_wm_compile *c, + struct brw_reg reg, + GLuint slot ) +{ + struct brw_compile *p = &c->func; + + /* Slot 0 is the undef value. + */ + if (slot == 0) { + brw_MOV(p, reg, brw_imm_f(0)); + return; + } + + /* + mov (1) r0.2<1>:d 0x000000c0:d { Align1 NoMask } + send (16) r110.0<1>:uw m1 r0.0<8;8,1>:uw 0x041243ff:ud { Align1 } + */ + + brw_dp_READ_16(p, + retype(vec16(reg), BRW_REGISTER_TYPE_UW), + slot); +} + + +/** + * Retrieve up to 4 GEN4 register pairs for the given wm reg: + * Args with unspill_reg != 0 will be loaded from scratch memory. + */ +static void get_argument_regs( struct brw_wm_compile *c, + struct brw_wm_ref *arg[], + struct brw_reg *regs ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + if (arg[i]) { + if (arg[i]->unspill_reg) + emit_unspill(c, + brw_vec8_grf(arg[i]->unspill_reg, 0), + arg[i]->value->spill_slot); + + regs[i] = arg[i]->hw_reg; + } + else { + regs[i] = brw_null_reg(); + } + } +} + + +/** + * For values that have a spill_slot!=0, write those regs to scratch memory. + */ +static void spill_values( struct brw_wm_compile *c, + struct brw_wm_value *values, + GLuint nr ) +{ + GLuint i; + + for (i = 0; i < nr; i++) + if (values[i].spill_slot) + emit_spill(c, values[i].hw_reg, values[i].spill_slot); +} + + +/* Emit the fragment program instructions here. + */ +void brw_wm_emit( struct brw_wm_compile *c ) +{ + struct brw_compile *p = &c->func; + GLuint insn; + + brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); + + /* Check if any of the payload regs need to be spilled: + */ + spill_values(c, c->payload.depth, 4); + spill_values(c, c->creg, c->nr_creg); + spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX); + + + for (insn = 0; insn < c->nr_insns; insn++) { + + struct brw_wm_instruction *inst = &c->instruction[insn]; + struct brw_reg args[3][4], dst[4]; + GLuint i, dst_flags; + + /* Get argument regs: + */ + for (i = 0; i < 3; i++) + get_argument_regs(c, inst->src[i], args[i]); + + /* Get dest regs: + */ + for (i = 0; i < 4; i++) + if (inst->dst[i]) + dst[i] = inst->dst[i]->hw_reg; + else + dst[i] = brw_null_reg(); + + /* Flags + */ + dst_flags = inst->writemask; + if (inst->saturate) + dst_flags |= SATURATE; + + switch (inst->opcode) { + /* Generated instructions for calculating triangle interpolants: + */ + case WM_PIXELXY: + emit_pixel_xy(p, dst, dst_flags); + break; + + case WM_DELTAXY: + emit_delta_xy(p, dst, dst_flags, args[0]); + break; + + case WM_WPOSXY: + emit_wpos_xy(c, dst, dst_flags, args[0]); + break; + + case WM_PIXELW: + emit_pixel_w(p, dst, dst_flags, args[0], args[1]); + break; + + case WM_LINTERP: + emit_linterp(p, dst, dst_flags, args[0], args[1]); + break; + + case WM_PINTERP: + emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]); + break; + + case WM_CINTERP: + emit_cinterp(p, dst, dst_flags, args[0]); + break; + + case WM_FB_WRITE: + emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot); + break; + + case WM_FRONTFACING: + emit_frontfacing(p, dst, dst_flags); + break; + + /* Straightforward arithmetic: + */ + case OPCODE_ADD: + emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_FRC: + emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); + break; + + case OPCODE_FLR: + emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); + break; + + case OPCODE_DDX: + emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]); + break; + + case OPCODE_DDY: + emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]); + break; + + case OPCODE_DP3: + emit_dp3(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_DP4: + emit_dp4(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_DPH: + emit_dph(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_TRUNC: + emit_trunc(p, dst, dst_flags, args[0]); + break; + + case OPCODE_LRP: + emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); + break; + + case OPCODE_MAD: + emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); + break; + + case OPCODE_MOV: + case OPCODE_SWZ: + emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); + break; + + case OPCODE_MUL: + emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_XPD: + emit_xpd(p, dst, dst_flags, args[0], args[1]); + break; + + /* Higher math functions: + */ + case OPCODE_RCP: + emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); + break; + + case OPCODE_RSQ: + emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); + break; + + case OPCODE_SIN: + emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); + break; + + case OPCODE_COS: + emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); + break; + + case OPCODE_EX2: + emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); + break; + + case OPCODE_LG2: + emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); + break; + + case OPCODE_SCS: + /* There is an scs math function, but it would need some + * fixup for 16-element execution. + */ + if (dst_flags & WRITEMASK_X) + emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + if (dst_flags & WRITEMASK_Y) + emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + break; + + case OPCODE_POW: + emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); + break; + + /* Comparisons: + */ + case OPCODE_CMP: + emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); + break; + + case OPCODE_MAX: + emit_max(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_MIN: + emit_min(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_SLT: + emit_slt(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_SLE: + emit_sle(p, dst, dst_flags, args[0], args[1]); + break; + case OPCODE_SGT: + emit_sgt(p, dst, dst_flags, args[0], args[1]); + break; + case OPCODE_SGE: + emit_sge(p, dst, dst_flags, args[0], args[1]); + break; + case OPCODE_SEQ: + emit_seq(p, dst, dst_flags, args[0], args[1]); + break; + case OPCODE_SNE: + emit_sne(p, dst, dst_flags, args[0], args[1]); + break; + + case OPCODE_LIT: + emit_lit(p, dst, dst_flags, args[0]); + break; + + /* Texturing operations: + */ + case OPCODE_TEX: + emit_tex(c, inst, dst, dst_flags, args[0]); + break; + + case OPCODE_TXB: + emit_txb(c, inst, dst, dst_flags, args[0]); + break; + + case OPCODE_KIL: + emit_kil(c, args[0]); + break; + + case OPCODE_KIL_NV: + emit_kil_nv(c); + break; + + default: + _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n", + inst->opcode, inst->opcode < MAX_OPCODE ? + _mesa_opcode_string(inst->opcode) : + "unknown"); + } + + for (i = 0; i < 4; i++) + if (inst->dst[i] && inst->dst[i]->spill_slot) + emit_spill(c, + inst->dst[i]->hw_reg, + inst->dst[i]->spill_slot); + } + + if (INTEL_DEBUG & DEBUG_WM) { + int i; + + _mesa_printf("wm-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } +} diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c new file mode 100644 index 0000000000..4e3edfbbff --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -0,0 +1,1177 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "brw_context.h" +#include "brw_wm.h" +#include "brw_util.h" + +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_statevars.h" + + +/** An invalid texture target */ +#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS + +/** An invalid texture unit */ +#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT + +#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS + +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 + + +static const char *wm_opcode_strings[] = { + "PIXELXY", + "DELTAXY", + "PIXELW", + "LINTERP", + "PINTERP", + "CINTERP", + "WPOSXY", + "FB_WRITE", + "FRONTFACING", +}; + +#if 0 +static const char *wm_file_strings[] = { + "PAYLOAD" +}; +#endif + + +/*********************************************************************** + * Source regs + */ + +static struct prog_src_register src_reg(GLuint file, GLuint idx) +{ + struct prog_src_register reg; + reg.File = file; + reg.Index = idx; + reg.Swizzle = SWIZZLE_NOOP; + reg.RelAddr = 0; + reg.Negate = NEGATE_NONE; + reg.Abs = 0; + return reg; +} + +static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst) +{ + return src_reg(dst.File, dst.Index); +} + +static struct prog_src_register src_undef( void ) +{ + return src_reg(PROGRAM_UNDEFINED, 0); +} + +static GLboolean src_is_undef(struct prog_src_register src) +{ + return src.File == PROGRAM_UNDEFINED; +} + +static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w ) +{ + reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w); + return reg; +} + +static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x ) +{ + return src_swizzle(reg, x, x, x, x); +} + +static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle ) +{ + reg.Swizzle = swizzle; + return reg; +} + + +/*********************************************************************** + * Dest regs + */ + +static struct prog_dst_register dst_reg(GLuint file, GLuint idx) +{ + struct prog_dst_register reg; + reg.File = file; + reg.Index = idx; + reg.WriteMask = WRITEMASK_XYZW; + reg.RelAddr = 0; + reg.CondMask = COND_TR; + reg.CondSwizzle = 0; + reg.CondSrc = 0; + reg.pad = 0; + return reg; +} + +static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask ) +{ + reg.WriteMask &= mask; + return reg; +} + +static struct prog_dst_register dst_undef( void ) +{ + return dst_reg(PROGRAM_UNDEFINED, 0); +} + + + +static struct prog_dst_register get_temp( struct brw_wm_compile *c ) +{ + int bit = _mesa_ffs( ~c->fp_temp ); + + if (!bit) { + _mesa_printf("%s: out of temporaries\n", __FILE__); + exit(1); + } + + c->fp_temp |= 1<<(bit-1); + return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1)); +} + + +static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) +{ + c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP)); +} + + +/*********************************************************************** + * Instructions + */ + +static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) +{ + return &c->prog_instructions[c->nr_fp_insns++]; +} + +static struct prog_instruction *emit_insn(struct brw_wm_compile *c, + const struct prog_instruction *inst0) +{ + struct prog_instruction *inst = get_fp_inst(c); + *inst = *inst0; + return inst; +} + +static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, + GLuint op, + struct prog_dst_register dest, + GLuint saturate, + GLuint tex_src_unit, + GLuint tex_src_target, + GLuint tex_shadow, + struct prog_src_register src0, + struct prog_src_register src1, + struct prog_src_register src2 ) +{ + struct prog_instruction *inst = get_fp_inst(c); + + assert(tex_src_unit < BRW_MAX_TEX_UNIT || + tex_src_unit == TEX_UNIT_NONE); + assert(tex_src_target < NUM_TEXTURE_TARGETS || + tex_src_target == TEX_TARGET_NONE); + + /* update mask of which texture units are referenced by this program */ + if (tex_src_unit != TEX_UNIT_NONE) + c->fp->tex_units_used |= (1 << tex_src_unit); + + memset(inst, 0, sizeof(*inst)); + + inst->Opcode = op; + inst->DstReg = dest; + inst->SaturateMode = saturate; + inst->TexSrcUnit = tex_src_unit; + inst->TexSrcTarget = tex_src_target; + inst->TexShadow = tex_shadow; + inst->SrcReg[0] = src0; + inst->SrcReg[1] = src1; + inst->SrcReg[2] = src2; + return inst; +} + + +static struct prog_instruction * emit_op(struct brw_wm_compile *c, + GLuint op, + struct prog_dst_register dest, + GLuint saturate, + struct prog_src_register src0, + struct prog_src_register src1, + struct prog_src_register src2 ) +{ + return emit_tex_op(c, op, dest, saturate, + TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */ + src0, src1, src2); +} + + +/* Many Mesa opcodes produce the same value across all the result channels. + * We'd rather not have to support that splatting in the opcode implementations, + * and brw_wm_pass*.c wants to optimize them out by shuffling references around + * anyway. We can easily get both by emitting the opcode to one channel, and + * then MOVing it to the others, which brw_wm_pass*.c already understands. + */ +static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, + const struct prog_instruction *inst0) +{ + struct prog_instruction *inst; + unsigned int dst_chan; + unsigned int other_channel_mask; + + if (inst0->DstReg.WriteMask == 0) + return NULL; + + dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1; + inst = get_fp_inst(c); + *inst = *inst0; + inst->DstReg.WriteMask = 1 << dst_chan; + + other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); + if (other_channel_mask != 0) { + inst = emit_op(c, + OPCODE_MOV, + dst_mask(inst0->DstReg, other_channel_mask), + 0, + src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), + src_undef(), + src_undef()); + } + return inst; +} + + +/*********************************************************************** + * Special instructions for interpolation and other tasks + */ + +static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) +{ + if (src_is_undef(c->pixel_xy)) { + struct prog_dst_register pixel_xy = get_temp(c); + struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); + + + /* Emit the out calculations, and hold onto the results. Use + * two instructions as a temporary is required. + */ + /* pixel_xy.xy = PIXELXY payload[0]; + */ + emit_op(c, + WM_PIXELXY, + dst_mask(pixel_xy, WRITEMASK_XY), + 0, + payload_r0_depth, + src_undef(), + src_undef()); + + c->pixel_xy = src_reg_from_dst(pixel_xy); + } + + return c->pixel_xy; +} + +static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) +{ + if (src_is_undef(c->delta_xy)) { + struct prog_dst_register delta_xy = get_temp(c); + struct prog_src_register pixel_xy = get_pixel_xy(c); + struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); + + /* deltas.xy = DELTAXY pixel_xy, payload[0] + */ + emit_op(c, + WM_DELTAXY, + dst_mask(delta_xy, WRITEMASK_XY), + 0, + pixel_xy, + payload_r0_depth, + src_undef()); + + c->delta_xy = src_reg_from_dst(delta_xy); + } + + return c->delta_xy; +} + +static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) +{ + if (src_is_undef(c->pixel_w)) { + struct prog_dst_register pixel_w = get_temp(c); + struct prog_src_register deltas = get_delta_xy(c); + struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); + + /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x + */ + emit_op(c, + WM_PIXELW, + dst_mask(pixel_w, WRITEMASK_W), + 0, + interp_wpos, + deltas, + src_undef()); + + + c->pixel_w = src_reg_from_dst(pixel_w); + } + + return c->pixel_w; +} + +static void emit_interp( struct brw_wm_compile *c, + GLuint idx ) +{ + struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); + struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + struct prog_src_register deltas = get_delta_xy(c); + + /* Need to use PINTERP on attributes which have been + * multiplied by 1/W in the SF program, and LINTERP on those + * which have not: + */ + switch (idx) { + case FRAG_ATTRIB_WPOS: + /* Have to treat wpos.xy specially: + */ + emit_op(c, + WM_WPOSXY, + dst_mask(dst, WRITEMASK_XY), + 0, + get_pixel_xy(c), + src_undef(), + src_undef()); + + dst = dst_mask(dst, WRITEMASK_ZW); + + /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw + */ + emit_op(c, + WM_LINTERP, + dst, + 0, + interp, + deltas, + src_undef()); + break; + case FRAG_ATTRIB_COL0: + case FRAG_ATTRIB_COL1: + if (c->key.flat_shade) { + emit_op(c, + WM_CINTERP, + dst, + 0, + interp, + src_undef(), + src_undef()); + } + else { + if (c->key.linear_color) { + emit_op(c, + WM_LINTERP, + dst, + 0, + interp, + deltas, + src_undef()); + } + else { + /* perspective-corrected color interpolation */ + emit_op(c, + WM_PINTERP, + dst, + 0, + interp, + deltas, + get_pixel_w(c)); + } + } + break; + case FRAG_ATTRIB_FOGC: + /* Interpolate the fog coordinate */ + emit_op(c, + WM_PINTERP, + dst_mask(dst, WRITEMASK_X), + 0, + interp, + deltas, + get_pixel_w(c)); + + emit_op(c, + OPCODE_MOV, + dst_mask(dst, WRITEMASK_YZW), + 0, + src_swizzle(interp, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ONE), + src_undef(), + src_undef()); + break; + + case FRAG_ATTRIB_FACE: + /* XXX review/test this case */ + emit_op(c, + WM_FRONTFACING, + dst_mask(dst, WRITEMASK_X), + 0, + src_undef(), + src_undef(), + src_undef()); + break; + + case FRAG_ATTRIB_PNTC: + /* XXX review/test this case */ + emit_op(c, + WM_PINTERP, + dst_mask(dst, WRITEMASK_XY), + 0, + interp, + deltas, + get_pixel_w(c)); + + emit_op(c, + OPCODE_MOV, + dst_mask(dst, WRITEMASK_ZW), + 0, + src_swizzle(interp, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ZERO, + SWIZZLE_ONE), + src_undef(), + src_undef()); + break; + + default: + emit_op(c, + WM_PINTERP, + dst, + 0, + interp, + deltas, + get_pixel_w(c)); + break; + } + + c->fp_interp_emitted |= 1<fp->program.Base.Parameters; + gl_state_index tokens[STATE_LENGTH]; + GLuint idx; + tokens[0] = s0; + tokens[1] = s1; + tokens[2] = s2; + tokens[3] = s3; + tokens[4] = s4; + + for (idx = 0; idx < paramList->NumParameters; idx++) { + if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR && + memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0) + return src_reg(PROGRAM_STATE_VAR, idx); + } + + idx = _mesa_add_state_reference( paramList, tokens ); + + return src_reg(PROGRAM_STATE_VAR, idx); +} + + +static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, + GLfloat s0, + GLfloat s1, + GLfloat s2, + GLfloat s3) +{ + struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; + GLfloat values[4]; + GLuint idx; + GLuint swizzle; + + values[0] = s0; + values[1] = s1; + values[2] = s2; + values[3] = s3; + + /* Have to search, otherwise multiple compilations will each grow + * the parameter list. + */ + for (idx = 0; idx < paramList->NumParameters; idx++) { + if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT && + memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0) + + /* XXX: this mimics the mesa bug which puts all constants and + * parameters into the "PROGRAM_STATE_VAR" category: + */ + return src_reg(PROGRAM_STATE_VAR, idx); + } + + idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); + assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */ + return src_reg(PROGRAM_STATE_VAR, idx); +} + + + +/*********************************************************************** + * Expand various instructions here to simpler forms. + */ +static void precalc_dst( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + struct prog_src_register src0 = inst->SrcReg[0]; + struct prog_src_register src1 = inst->SrcReg[1]; + struct prog_dst_register dst = inst->DstReg; + + if (dst.WriteMask & WRITEMASK_Y) { + /* dst.y = mul src0.y, src1.y + */ + emit_op(c, + OPCODE_MUL, + dst_mask(dst, WRITEMASK_Y), + inst->SaturateMode, + src0, + src1, + src_undef()); + } + + if (dst.WriteMask & WRITEMASK_XZ) { + struct prog_instruction *swz; + GLuint z = GET_SWZ(src0.Swizzle, Z); + + /* dst.xz = swz src0.1zzz + */ + swz = emit_op(c, + OPCODE_SWZ, + dst_mask(dst, WRITEMASK_XZ), + inst->SaturateMode, + src_swizzle(src0, SWIZZLE_ONE, z, z, z), + src_undef(), + src_undef()); + /* Avoid letting negation flag of src0 affect our 1 constant. */ + swz->SrcReg[0].Negate &= ~NEGATE_X; + } + if (dst.WriteMask & WRITEMASK_W) { + /* dst.w = mov src1.w + */ + emit_op(c, + OPCODE_MOV, + dst_mask(dst, WRITEMASK_W), + inst->SaturateMode, + src1, + src_undef(), + src_undef()); + } +} + + +static void precalc_lit( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + struct prog_src_register src0 = inst->SrcReg[0]; + struct prog_dst_register dst = inst->DstReg; + + if (dst.WriteMask & WRITEMASK_XW) { + struct prog_instruction *swz; + + /* dst.xw = swz src0.1111 + */ + swz = emit_op(c, + OPCODE_SWZ, + dst_mask(dst, WRITEMASK_XW), + 0, + src_swizzle1(src0, SWIZZLE_ONE), + src_undef(), + src_undef()); + /* Avoid letting the negation flag of src0 affect our 1 constant. */ + swz->SrcReg[0].Negate = NEGATE_NONE; + } + + if (dst.WriteMask & WRITEMASK_YZ) { + emit_op(c, + OPCODE_LIT, + dst_mask(dst, WRITEMASK_YZ), + inst->SaturateMode, + src0, + src_undef(), + src_undef()); + } +} + + +/** + * Some TEX instructions require extra code, cube map coordinate + * normalization, or coordinate scaling for RECT textures, etc. + * This function emits those extra instructions and the TEX + * instruction itself. + */ +static void precalc_tex( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + struct prog_src_register coord; + struct prog_dst_register tmpcoord; + const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + + assert(unit < BRW_MAX_TEX_UNIT); + + if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { + struct prog_instruction *out; + struct prog_dst_register tmp0 = get_temp(c); + struct prog_src_register tmp0src = src_reg_from_dst(tmp0); + struct prog_dst_register tmp1 = get_temp(c); + struct prog_src_register tmp1src = src_reg_from_dst(tmp1); + struct prog_src_register src0 = inst->SrcReg[0]; + + /* find longest component of coord vector and normalize it */ + tmpcoord = get_temp(c); + coord = src_reg_from_dst(tmpcoord); + + /* tmpcoord = src0 (i.e.: coord = src0) */ + out = emit_op(c, OPCODE_MOV, + tmpcoord, + 0, + src0, + src_undef(), + src_undef()); + out->SrcReg[0].Negate = NEGATE_NONE; + out->SrcReg[0].Abs = 1; + + /* tmp0 = MAX(coord.X, coord.Y) */ + emit_op(c, OPCODE_MAX, + tmp0, + 0, + src_swizzle1(coord, X), + src_swizzle1(coord, Y), + src_undef()); + + /* tmp1 = MAX(tmp0, coord.Z) */ + emit_op(c, OPCODE_MAX, + tmp1, + 0, + tmp0src, + src_swizzle1(coord, Z), + src_undef()); + + /* tmp0 = 1 / tmp1 */ + emit_op(c, OPCODE_RCP, + dst_mask(tmp0, WRITEMASK_X), + 0, + tmp1src, + src_undef(), + src_undef()); + + /* tmpCoord = src0 * tmp0 */ + emit_op(c, OPCODE_MUL, + tmpcoord, + 0, + src0, + src_swizzle1(tmp0src, SWIZZLE_X), + src_undef()); + + release_temp(c, tmp0); + release_temp(c, tmp1); + } + else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { + struct prog_src_register scale = + search_or_add_param5( c, + STATE_INTERNAL, + STATE_TEXRECT_SCALE, + unit, + 0,0 ); + + tmpcoord = get_temp(c); + + /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } + */ + emit_op(c, + OPCODE_MUL, + tmpcoord, + 0, + inst->SrcReg[0], + src_swizzle(scale, + SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_ONE, + SWIZZLE_ONE), + src_undef()); + + coord = src_reg_from_dst(tmpcoord); + } + else { + coord = inst->SrcReg[0]; + } + + /* Need to emit YUV texture conversions by hand. Probably need to + * do this here - the alternative is in brw_wm_emit.c, but the + * conversion requires allocating a temporary variable which we + * don't have the facility to do that late in the compilation. + */ + if (c->key.yuvtex_mask & (1 << unit)) { + /* convert ycbcr to RGBA */ + GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<DstReg; + struct prog_dst_register tmp = get_temp(c); + struct prog_src_register tmpsrc = src_reg_from_dst(tmp); + struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); + struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 ); + + /* tmp = TEX ... + */ + emit_tex_op(c, + OPCODE_TEX, + tmp, + inst->SaturateMode, + unit, + inst->TexSrcTarget, + inst->TexShadow, + coord, + src_undef(), + src_undef()); + + /* tmp.xyz = ADD TMP, C0 + */ + emit_op(c, + OPCODE_ADD, + dst_mask(tmp, WRITEMASK_XYZ), + 0, + tmpsrc, + C0, + src_undef()); + + /* YUV.y = MUL YUV.y, C0.w + */ + + emit_op(c, + OPCODE_MUL, + dst_mask(tmp, WRITEMASK_Y), + 0, + tmpsrc, + src_swizzle1(C0, W), + src_undef()); + + /* + * if (UV swaped) + * RGB.xyz = MAD YUV.zzx, C1, YUV.y + * else + * RGB.xyz = MAD YUV.xxz, C1, YUV.y + */ + + emit_op(c, + OPCODE_MAD, + dst_mask(dst, WRITEMASK_XYZ), + 0, + swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), + C1, + src_swizzle1(tmpsrc, Y)); + + /* RGB.y = MAD YUV.z, C1.w, RGB.y + */ + emit_op(c, + OPCODE_MAD, + dst_mask(dst, WRITEMASK_Y), + 0, + src_swizzle1(tmpsrc, Z), + src_swizzle1(C1, W), + src_swizzle1(src_reg_from_dst(dst), Y)); + + release_temp(c, tmp); + } + else { + /* ordinary RGBA tex instruction */ + emit_tex_op(c, + OPCODE_TEX, + inst->DstReg, + inst->SaturateMode, + unit, + inst->TexSrcTarget, + inst->TexShadow, + coord, + src_undef(), + src_undef()); + } + + /* For GL_EXT_texture_swizzle: */ + if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) { + /* swizzle the result of the TEX instruction */ + struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg); + emit_op(c, OPCODE_SWZ, + inst->DstReg, + SATURATE_OFF, /* saturate already done above */ + src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), + src_undef(), + src_undef()); + } + + if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || + (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) + release_temp(c, tmpcoord); +} + + +/** + * Check if the given TXP instruction really needs the divide-by-W step. + */ +static GLboolean projtex( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + const struct prog_src_register src = inst->SrcReg[0]; + GLboolean retVal; + + assert(inst->Opcode == OPCODE_TXP); + + /* Only try to detect the simplest cases. Could detect (later) + * cases where we are trying to emit code like RCP {1.0}, MUL x, + * {1.0}, and so on. + * + * More complex cases than this typically only arise from + * user-provided fragment programs anyway: + */ + if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) + retVal = GL_FALSE; /* ut2004 gun rendering !?! */ + else if (src.File == PROGRAM_INPUT && + GET_SWZ(src.Swizzle, W) == W && + (c->key.proj_attrib_mask & (1 << src.Index)) == 0) + retVal = GL_FALSE; + else + retVal = GL_TRUE; + + return retVal; +} + + +/** + * Emit code for TXP. + */ +static void precalc_txp( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + struct prog_src_register src0 = inst->SrcReg[0]; + + if (projtex(c, inst)) { + struct prog_dst_register tmp = get_temp(c); + struct prog_instruction tmp_inst; + + /* tmp0.w = RCP inst.arg[0][3] + */ + emit_op(c, + OPCODE_RCP, + dst_mask(tmp, WRITEMASK_W), + 0, + src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), + src_undef(), + src_undef()); + + /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww + */ + emit_op(c, + OPCODE_MUL, + dst_mask(tmp, WRITEMASK_XYZ), + 0, + src0, + src_swizzle1(src_reg_from_dst(tmp), W), + src_undef()); + + /* dst = precalc(TEX tmp0) + */ + tmp_inst = *inst; + tmp_inst.SrcReg[0] = src_reg_from_dst(tmp); + precalc_tex(c, &tmp_inst); + + release_temp(c, tmp); + } + else + { + /* dst = precalc(TEX src0) + */ + precalc_tex(c, inst); + } +} + + + +static void emit_fb_write( struct brw_wm_compile *c ) +{ + struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); + struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); + struct prog_src_register outcolor; + GLuint i; + + struct prog_instruction *inst, *last_inst; + struct brw_context *brw = c->func.brw; + + /* The inst->Aux field is used for FB write target and the EOT marker */ + + if (brw->state.nr_color_regions > 1) { + for (i = 0 ; i < brw->state.nr_color_regions; i++) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); + last_inst = inst = emit_op(c, + WM_FB_WRITE, dst_mask(dst_undef(),0), 0, + outcolor, payload_r0_depth, outdepth); + inst->Aux = (i<<1); + if (c->fp_fragcolor_emitted) { + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); + last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = (i<<1); + } + } + last_inst->Aux |= 1; //eot + } + else { + /* if gl_FragData[0] is written, use it, else use gl_FragColor */ + if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); + else + outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); + + inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), + 0, outcolor, payload_r0_depth, outdepth); + inst->Aux = 1|(0<<1); + } +} + + + + +/*********************************************************************** + * Emit INTERP instructions ahead of first use of each attrib. + */ + +static void validate_src_regs( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + GLuint nr_args = brw_wm_nr_args( inst->Opcode ); + GLuint i; + + for (i = 0; i < nr_args; i++) { + if (inst->SrcReg[i].File == PROGRAM_INPUT) { + GLuint idx = inst->SrcReg[i].Index; + if (!(c->fp_interp_emitted & (1<DstReg.File == PROGRAM_OUTPUT) { + GLuint idx = inst->DstReg.Index; + if (idx == FRAG_RESULT_COLOR) + c->fp_fragcolor_emitted = 1; + } +} + +static void print_insns( const struct prog_instruction *insn, + GLuint nr ) +{ + GLuint i; + for (i = 0; i < nr; i++, insn++) { + _mesa_printf("%3d: ", i); + if (insn->Opcode < MAX_OPCODE) + _mesa_print_instruction(insn); + else if (insn->Opcode < MAX_WM_OPCODE) { + GLuint idx = insn->Opcode - MAX_OPCODE; + + _mesa_print_alu_instruction(insn, + wm_opcode_strings[idx], + 3); + } + else + _mesa_printf("965 Opcode %d\n", insn->Opcode); + } +} + + +/** + * Initial pass for fragment program code generation. + * This function is used by both the GLSL and non-GLSL paths. + */ +void brw_wm_pass_fp( struct brw_wm_compile *c ) +{ + struct brw_fragment_program *fp = c->fp; + GLuint insn; + + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("pre-fp:\n"); + _mesa_print_program(&fp->program.Base); + _mesa_printf("\n"); + } + + c->pixel_xy = src_undef(); + c->delta_xy = src_undef(); + c->pixel_w = src_undef(); + c->nr_fp_insns = 0; + c->fp->tex_units_used = 0x0; + + /* Emit preamble instructions. This is where special instructions such as + * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to + * compute shader inputs from varying vars. + */ + for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { + const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; + validate_src_regs(c, inst); + validate_dst_regs(c, inst); + } + + /* Loop over all instructions doing assorted simplifications and + * transformations. + */ + for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { + const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; + struct prog_instruction *out; + + /* Check for INPUT values, emit INTERP instructions where + * necessary: + */ + + switch (inst->Opcode) { + case OPCODE_SWZ: + out = emit_insn(c, inst); + out->Opcode = OPCODE_MOV; + break; + + case OPCODE_ABS: + out = emit_insn(c, inst); + out->Opcode = OPCODE_MOV; + out->SrcReg[0].Negate = NEGATE_NONE; + out->SrcReg[0].Abs = 1; + break; + + case OPCODE_SUB: + out = emit_insn(c, inst); + out->Opcode = OPCODE_ADD; + out->SrcReg[1].Negate ^= NEGATE_XYZW; + break; + + case OPCODE_SCS: + out = emit_insn(c, inst); + /* This should probably be done in the parser. + */ + out->DstReg.WriteMask &= WRITEMASK_XY; + break; + + case OPCODE_DST: + precalc_dst(c, inst); + break; + + case OPCODE_LIT: + precalc_lit(c, inst); + break; + + case OPCODE_TEX: + precalc_tex(c, inst); + break; + + case OPCODE_TXP: + precalc_txp(c, inst); + break; + + case OPCODE_TXB: + out = emit_insn(c, inst); + out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); + break; + + case OPCODE_XPD: + out = emit_insn(c, inst); + /* This should probably be done in the parser. + */ + out->DstReg.WriteMask &= WRITEMASK_XYZ; + break; + + case OPCODE_KIL: + out = emit_insn(c, inst); + /* This should probably be done in the parser. + */ + out->DstReg.WriteMask = 0; + break; + case OPCODE_END: + emit_fb_write(c); + break; + case OPCODE_PRINT: + break; + default: + if (brw_wm_is_scalar_result(inst->Opcode)) + emit_scalar_insn(c, inst); + else + emit_insn(c, inst); + break; + } + } + + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("pass_fp:\n"); + print_insns( c->prog_instructions, c->nr_fp_insns ); + _mesa_printf("\n"); + } +} + diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c new file mode 100644 index 0000000000..c9fe1dd8ad --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -0,0 +1,3046 @@ +#include "main/macros.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "shader/prog_optimize.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" + +enum _subroutine { + SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 +}; + +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint component); + +/** + * Determine if the given fragment program uses GLSL features such + * as flow conditionals, loops, subroutines. + * Some GLSL shaders may use these features, others might not. + */ +GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) +{ + int i; + + for (i = 0; i < fp->Base.NumInstructions; i++) { + const struct prog_instruction *inst = &fp->Base.Instructions[i]; + switch (inst->Opcode) { + case OPCODE_ARL: + case OPCODE_IF: + case OPCODE_ENDIF: + case OPCODE_CAL: + case OPCODE_BRK: + case OPCODE_RET: + case OPCODE_NOISE1: + case OPCODE_NOISE2: + case OPCODE_NOISE3: + case OPCODE_NOISE4: + case OPCODE_BGNLOOP: + return GL_TRUE; + default: + break; + } + } + return GL_FALSE; +} + + + +static void +reclaim_temps(struct brw_wm_compile *c); + + +/** Mark GRF register as used. */ +static void +prealloc_grf(struct brw_wm_compile *c, int r) +{ + c->used_grf[r] = GL_TRUE; +} + + +/** Mark given GRF register as not in use. */ +static void +release_grf(struct brw_wm_compile *c, int r) +{ + /*assert(c->used_grf[r]);*/ + c->used_grf[r] = GL_FALSE; + c->first_free_grf = MIN2(c->first_free_grf, r); +} + + +/** Return index of a free GRF, mark it as used. */ +static int +alloc_grf(struct brw_wm_compile *c) +{ + GLuint r; + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + /* no free temps, try to reclaim some */ + reclaim_temps(c); + c->first_free_grf = 0; + + /* try alloc again */ + for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) { + if (!c->used_grf[r]) { + c->used_grf[r] = GL_TRUE; + c->first_free_grf = r + 1; /* a guess */ + return r; + } + } + + for (r = 0; r < BRW_WM_MAX_GRF; r++) { + assert(c->used_grf[r]); + } + + /* really, no free GRF regs found */ + if (!c->out_of_regs) { + /* print warning once per compilation */ + _mesa_warning(NULL, "i965: ran out of registers for fragment program"); + c->out_of_regs = GL_TRUE; + } + + return -1; +} + + +/** Return number of GRF registers used */ +static int +num_grf_used(const struct brw_wm_compile *c) +{ + int r; + for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--) + if (c->used_grf[r]) + return r + 1; + return 0; +} + + + +/** + * Record the mapping of a Mesa register to a hardware register. + */ +static void set_reg(struct brw_wm_compile *c, int file, int index, + int component, struct brw_reg reg) +{ + c->wm_regs[file][index][component].reg = reg; + c->wm_regs[file][index][component].inited = GL_TRUE; +} + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + struct brw_reg reg; + + /* if we need to allocate another temp, grow the tmp_regs[] array */ + if (c->tmp_index == c->tmp_max) { + int r = alloc_grf(c); + if (r < 0) { + /*printf("Out of temps in %s\n", __FUNCTION__);*/ + r = 50; /* XXX random register! */ + } + c->tmp_regs[ c->tmp_max++ ] = r; + } + + /* form the GRF register */ + reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0); + /*printf("alloc_temp %d\n", reg.nr);*/ + assert(reg.nr < BRW_WM_MAX_GRF); + return reg; + +} + +/** + * Save current temp register info. + * There must be a matching call to release_tmps(). + */ +static int mark_tmps(struct brw_wm_compile *c) +{ + return c->tmp_index; +} + +static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index ) +{ + return brw_vec8_grf( c->tmp_regs[ index ], 0 ); +} + +static void release_tmps(struct brw_wm_compile *c, int mark) +{ + c->tmp_index = mark; +} + +/** + * Convert Mesa src register to brw register. + * + * Since we're running in SOA mode each Mesa register corresponds to four + * hardware registers. We allocate the hardware registers as needed here. + * + * \param file register file, one of PROGRAM_x + * \param index register number + * \param component src component (X=0, Y=1, Z=2, W=3) + * \param nr not used?!? + * \param neg negate value? + * \param abs take absolute value? + */ +static struct brw_reg +get_reg(struct brw_wm_compile *c, int file, int index, int component, + int nr, GLuint neg, GLuint abs) +{ + struct brw_reg reg; + switch (file) { + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + file = PROGRAM_STATE_VAR; + break; + case PROGRAM_UNDEFINED: + return brw_null_reg(); + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + case PROGRAM_PAYLOAD: + break; + default: + _mesa_problem(NULL, "Unexpected file in get_reg()"); + return brw_null_reg(); + } + + assert(index < 256); + assert(component < 4); + + /* see if we've already allocated a HW register for this Mesa register */ + if (c->wm_regs[file][index][component].inited) { + /* yes, re-use */ + reg = c->wm_regs[file][index][component].reg; + } + else { + /* no, allocate new register */ + int grf = alloc_grf(c); + /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/ + if (grf < 0) { + /* totally out of temps */ + grf = 51; /* XXX random register! */ + } + + reg = brw_vec8_grf(grf, 0); + /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/ + + set_reg(c, file, index, component, reg); + } + + if (neg & (1 << component)) { + reg = negate(reg); + } + if (abs) + reg = brw_abs(reg); + return reg; +} + + + +/** + * This is called if we run out of GRF registers. Examine the live intervals + * of temp regs in the program and free those which won't be used again. + */ +static void +reclaim_temps(struct brw_wm_compile *c) +{ + GLint intBegin[MAX_PROGRAM_TEMPS]; + GLint intEnd[MAX_PROGRAM_TEMPS]; + int index; + + /*printf("Reclaim temps:\n");*/ + + _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, + intBegin, intEnd); + + for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { + if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { + /* program temp[i] can be freed */ + int component; + /*printf(" temp[%d] is dead\n", index);*/ + for (component = 0; component < 4; component++) { + if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { + int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; + release_grf(c, r); + /* + printf(" Reclaim temp %d, reg %d at inst %d\n", + index, r, c->cur_inst); + */ + c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; + } + } + } + } +} + + + + +/** + * Preallocate registers. This sets up the Mesa to hardware register + * mapping for certain registers, such as constants (uniforms/state vars) + * and shader inputs. + */ +static void prealloc_reg(struct brw_wm_compile *c) +{ + int i, j; + struct brw_reg reg; + int urb_read_length = 0; + GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted; + GLuint reg_index = 0; + + memset(c->used_grf, GL_FALSE, sizeof(c->used_grf)); + c->first_free_grf = 0; + + for (i = 0; i < 4; i++) { + if (i < c->key.nr_depth_regs) + reg = brw_vec8_grf(i * 2, 0); + else + reg = brw_vec8_grf(0, 0); + set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); + } + reg_index += 2 * c->key.nr_depth_regs; + + /* constants */ + { + const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters; + const GLuint nr_temps = c->fp->program.Base.NumTemporaries; + + /* use a real constant buffer, or just use a section of the GRF? */ + /* XXX this heuristic may need adjustment... */ + if ((nr_params + nr_temps) * 4 + reg_index > 80) + c->fp->use_const_buffer = GL_TRUE; + else + c->fp->use_const_buffer = GL_FALSE; + /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/ + + if (c->fp->use_const_buffer) { + /* We'll use a real constant buffer and fetch constants from + * it with a dataport read message. + */ + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 0; + } + else { + const struct gl_program_parameter_list *plist = + c->fp->program.Base.Parameters; + int index = 0; + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 4 * nr_params; + + /* loop over program constants (float[4]) */ + for (i = 0; i < nr_params; i++) { + /* loop over XYZW channels */ + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(reg_index + index / 8, index % 8); + /* Save pointer to parameter/constant value. + * Constants will be copied in prepare_constant_buffer() + */ + c->prog_data.param[index] = &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + } + } + /* number of constant regs used (each reg is float[8]) */ + c->nr_creg = 2 * ((4 * nr_params + 15) / 16); + reg_index += c->nr_creg; + } + } + + /* fragment shader inputs */ + for (i = 0; i < VERT_RESULT_MAX; i++) { + int fp_input; + + if (i >= VERT_RESULT_VAR0) + fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0; + else if (i <= VERT_RESULT_TEX7) + fp_input = i; + else + fp_input = -1; + + if (fp_input >= 0 && inputs & (1 << fp_input)) { + urb_read_length = reg_index; + reg = brw_vec8_grf(reg_index, 0); + for (j = 0; j < 4; j++) + set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); + } + if (c->key.vp_outputs_written & (1 << i)) { + reg_index += 2; + } + } + + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = urb_read_length; + c->prog_data.curb_read_length = c->nr_creg; + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index++; + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0); + reg_index += 2; + + /* mark GRF regs [0..reg_index-1] as in-use */ + for (i = 0; i < reg_index; i++) + prealloc_grf(c, i); + + /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */ + prealloc_grf(c, 126); + prealloc_grf(c, 127); + + for (i = 0; i < c->nr_fp_insns; i++) { + const struct prog_instruction *inst = &c->prog_instructions[i]; + struct brw_reg dst[4]; + + switch (inst->Opcode) { + case OPCODE_TEX: + case OPCODE_TXB: + /* Allocate the channels of texture results contiguously, + * since they are written out that way by the sampler unit. + */ + for (j = 0; j < 4; j++) { + dst[j] = get_dst_reg(c, inst, j); + if (j != 0) + assert(dst[j].nr == dst[j - 1].nr + 1); + } + break; + default: + break; + } + } + + /* An instruction may reference up to three constants. + * They'll be found in these registers. + * XXX alloc these on demand! + */ + if (c->fp->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0); + } + } +#if 0 + printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer); + printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index); +#endif +} + + +/** + * Check if any of the instruction's src registers are constants, uniforms, + * or statevars. If so, fetch any constants that we don't already have in + * the three GRF slots. + */ +static void fetch_constants(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint i; + + /* loop over instruction src regs */ + for (i = 0; i < 3; i++) { + const struct prog_src_register *src = &inst->SrcReg[i]; + if (src->File == PROGRAM_STATE_VAR || + src->File == PROGRAM_CONSTANT || + src->File == PROGRAM_UNIFORM) { + c->current_const[i].index = src->Index; + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, i, c->current_const[i].reg.nr); +#endif + + /* need to fetch the constant now */ + brw_dp_READ_4(p, + c->current_const[i].reg, /* writeback dest */ + src->RelAddr, /* relative indexing? */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */ + ); + } + } +} + + +/** + * Convert Mesa dst register to brw register. + */ +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint component) +{ + const int nr = 1; + return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr, + 0, 0); +} + + +static struct brw_reg +get_src_reg_const(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint component) +{ + /* We should have already fetched the constant from the constant + * buffer in fetch_constants(). Now we just have to return a + * register description that extracts the needed component and + * smears it across all eight vector components. + */ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + struct brw_reg const_reg; + + assert(component < 4); + assert(srcRegIndex < 3); + assert(c->current_const[srcRegIndex].index != -1); + const_reg = c->current_const[srcRegIndex].reg; + + /* extract desired float from the const_reg, and smear */ + const_reg = stride(const_reg, 0, 1, 0); + const_reg.subnr = component * 4; + + if (src->Negate & (1 << component)) + const_reg = negate(const_reg); + if (src->Abs) + const_reg = brw_abs(const_reg); + +#if 0 + printf(" form const[%d].%d for arg %d, reg %d\n", + c->current_const[srcRegIndex].index, + component, + srcRegIndex, + const_reg.nr); +#endif + + return const_reg; +} + + +/** + * Convert Mesa src register to brw register. + */ +static struct brw_reg get_src_reg(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + const GLuint nr = 1; + const GLuint component = GET_SWZ(src->Swizzle, channel); + + /* Extended swizzle terms */ + if (component == SWIZZLE_ZERO) { + return brw_imm_f(0.0F); + } + else if (component == SWIZZLE_ONE) { + return brw_imm_f(1.0F); + } + + if (c->fp->use_const_buffer && + (src->File == PROGRAM_STATE_VAR || + src->File == PROGRAM_CONSTANT || + src->File == PROGRAM_UNIFORM)) { + return get_src_reg_const(c, inst, srcRegIndex, component); + } + else { + /* other type of source register */ + return get_reg(c, src->File, src->Index, component, nr, + src->Negate, src->Abs); + } +} + + +/** + * Same as \sa get_src_reg() but if the register is a literal, emit + * a brw_reg encoding the literal. + * Note that a brw instruction only allows one src operand to be a literal. + * For instructions with more than one operand, only the second can be a + * literal. This means that we treat some literals as constants/uniforms + * (which why PROGRAM_CONSTANT is checked in fetch_constants()). + * + */ +static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, + const struct prog_instruction *inst, + GLuint srcRegIndex, GLuint channel) +{ + const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; + if (src->File == PROGRAM_CONSTANT) { + /* a literal */ + const int component = GET_SWZ(src->Swizzle, channel); + const GLfloat *param = + c->fp->program.Base.Parameters->ParameterValues[src->Index]; + GLfloat value = param[component]; + if (src->Negate & (1 << channel)) + value = -value; + if (src->Abs) + value = FABSF(value); +#if 0 + printf(" form immed value %f for chan %d\n", value, channel); +#endif + return brw_imm_f(value); + } + else { + return get_src_reg(c, inst, srcRegIndex, channel); + } +} + + +/** + * Subroutines are minimal support for resusable instruction sequences. + * They are implemented as simply as possible to minimise overhead: there + * is no explicit support for communication between the caller and callee + * other than saving the return address in a temporary register, nor is + * there any automatic local storage. This implies that great care is + * required before attempting reentrancy or any kind of nested + * subroutine invocations. + */ +static void invoke_subroutine( struct brw_wm_compile *c, + enum _subroutine subroutine, + void (*emit)( struct brw_wm_compile * ) ) +{ + struct brw_compile *p = &c->func; + + assert( subroutine < BRW_WM_MAX_SUBROUTINE ); + + if( c->subroutines[ subroutine ] ) { + /* subroutine previously emitted: reuse existing instructions */ + + int mark = mark_tmps( c ); + struct brw_reg return_address = retype( alloc_tmp( c ), + BRW_REGISTER_TYPE_UD ); + int here = p->nr_insn; + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) ); + + brw_ADD( p, brw_ip_reg(), brw_ip_reg(), + brw_imm_d( ( c->subroutines[ subroutine ] - + here - 1 ) << 4 ) ); + brw_pop_insn_state(p); + + release_tmps( c, mark ); + } else { + /* previously unused subroutine: emit, and mark for later reuse */ + + int mark = mark_tmps( c ); + struct brw_reg return_address = retype( alloc_tmp( c ), + BRW_REGISTER_TYPE_UD ); + struct brw_instruction *calc; + int base = p->nr_insn; + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) ); + brw_pop_insn_state(p); + + c->subroutines[ subroutine ] = p->nr_insn; + + emit( c ); + + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV( p, brw_ip_reg(), return_address ); + brw_pop_insn_state(p); + + brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) ); + + release_tmps( c, mark ); + } +} + +static void emit_trunc( struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for (i = 0; i < 4; i++) { + if (mask & (1<func; + GLuint mask = inst->DstReg.WriteMask; + brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF); + for (i = 0; i < 4; i++) { + if (mask & (1<func; + GLuint mask = inst->DstReg.WriteMask; + + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + vec8(retype(dst0, BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + vec8(retype(dst1, BRW_REGISTER_TYPE_UW)), + stride(suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + } +} + +static void emit_delta_xy(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_reg r1 = brw_vec1_grf(1, 0); + struct brw_reg dst0, dst1, src0, src1; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + + dst0 = get_dst_reg(c, inst, 0); + dst1 = get_dst_reg(c, inst, 1); + src0 = get_src_reg(c, inst, 0, 0); + src1 = get_src_reg(c, inst, 0, 1); + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + if (mask & WRITEMASK_X) { + brw_ADD(p, + dst0, + retype(src0, BRW_REGISTER_TYPE_UW), + negate(r1)); + } + + if (mask & WRITEMASK_Y) { + brw_ADD(p, + dst1, + retype(src1, BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + + } +} + +static void fire_fb_write( struct brw_wm_compile *c, + GLuint base_reg, + GLuint nr, + GLuint target, + GLuint eot) +{ + struct brw_compile *p = &c->func; + /* Pass through control information: + */ + /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + /* Send framebuffer write message: */ + brw_fb_WRITE(p, + retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + target, + nr, + 0, + eot); +} + +static void emit_fb_write(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + int nr = 2; + int channel; + GLuint target, eot; + struct brw_reg src0; + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + src0 = get_src_reg(c, inst, 0, channel); + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); + } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); + + if (c->key.source_depth_to_render_target) { + if (c->key.computes_depth) { + src0 = get_src_reg(c, inst, 2, 2); + brw_MOV(p, brw_message_reg(nr), src0); + } + else { + src0 = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src0); + } + + nr += 2; + } + + if (c->key.dest_depth_reg) { + const GLuint comp = c->key.dest_depth_reg / 2; + const GLuint off = c->key.dest_depth_reg % 2; + + if (off != 0) { + /* XXX this code needs review/testing */ + struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp); + struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1); + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1)); + /* 2nd half? */ + brw_MOV(p, brw_message_reg(nr+1), arg1_1); + brw_pop_insn_state(p); + } + else + { + struct brw_reg src = get_src_reg(c, inst, 1, 1); + brw_MOV(p, brw_message_reg(nr), src); + } + nr += 2; + } + + target = inst->Aux >> 1; + eot = inst->Aux & 1; + fire_fb_write(c, 0, nr, target, eot); +} + +static void emit_pixel_w( struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + if (mask & WRITEMASK_W) { + struct brw_reg dst, src0, delta0, delta1; + struct brw_reg interp3; + + dst = get_dst_reg(c, inst, 3); + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + + interp3 = brw_vec1_grf(src0.nr+1, 4); + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + brw_LINE(p, brw_null_reg(), interp3, delta0); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1); + + /* Calc w */ + brw_math_16( p, dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} + +static void emit_linterp(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst, delta0, delta1; + struct brw_reg src0; + GLuint nr, i; + + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + nr = src0.nr; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<func; + GLuint mask = inst->DstReg.WriteMask; + + struct brw_reg interp[4]; + struct brw_reg dst, src0; + GLuint nr, i; + + src0 = get_src_reg(c, inst, 0, 0); + nr = src0.nr; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<func; + GLuint mask = inst->DstReg.WriteMask; + + struct brw_reg interp[4]; + struct brw_reg dst, delta0, delta1; + struct brw_reg src0, w; + GLuint nr, i; + + src0 = get_src_reg(c, inst, 0, 0); + delta0 = get_src_reg(c, inst, 1, 0); + delta1 = get_src_reg(c, inst, 1, 1); + w = get_src_reg(c, inst, 2, 3); + nr = src0.nr; + + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<func; + struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); + struct brw_reg dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<func; + GLuint mask = inst->DstReg.WriteMask; + for (i = 0; i < 4; i++) { + GLuint i2 = (i+2)%3; + GLuint i1 = (i+1)%3; + if (mask & (1<SaturateMode != SATURATE_OFF); + brw_MAC(p, dst, src0, src1); + brw_set_saturate(p, 0); + } + } + brw_set_saturate(p, 0); +} + +static void emit_dp3(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_reg src0[3], src1[3], dst; + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + for (i = 0; i < 3; i++) { + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); + } + + dst = get_dst_reg(c, inst, dst_chan); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, 0); +} + +static void emit_dp4(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); + } + dst = get_dst_reg(c, inst, dst_chan); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, brw_null_reg(), src0[2], src1[2]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_dph(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, inst, 0, i); + src1[i] = get_src_reg_imm(c, inst, 1, i); + } + dst = get_dst_reg(c, inst, dst_chan); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_ADD(p, dst, dst, src1[3]); + brw_set_saturate(p, 0); +} + +/** + * Emit a scalar instruction, like RCP, RSQ, LOG, EXP. + * Note that the result of the function is smeared across the dest + * register's X, Y, Z and W channels (subject to writemasking of course). + */ +static void emit_math1(struct brw_wm_compile *c, + const struct prog_instruction *inst, GLuint func) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + /* Get first component of source register */ + dst = get_dst_reg(c, inst, dst_chan); + src0 = get_src_reg(c, inst, 0, 0); + + brw_MOV(p, brw_message_reg(2), src0); + brw_math(p, + dst, + func, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_rcp(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_INV); +} + +static void emit_rsq(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); +} + +static void emit_sin(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); +} + +static void emit_cos(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_COS); +} + +static void emit_ex2(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); +} + +static void emit_lg2(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); +} + +static void emit_add(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<func; + struct brw_reg src0, addr_reg; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, 0); + src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */ + brw_MOV(p, addr_reg, src0); + brw_set_saturate(p, 0); +} + + +static void emit_mul(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<func; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<SaturateMode != SATURATE_OFF) + brw_set_saturate(p, 0); +} + +static void emit_flr(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<func; + const GLuint mask = inst->DstReg.WriteMask; + const int mark = mark_tmps(c); + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + if (inst->Opcode == OPCODE_MIN) + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + else + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0); + + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + if (use_temp) + brw_MOV(p, real_dst, dst); + } + } + brw_pop_insn_state(p); + release_tmps(c, mark); +} + +static void emit_pow(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst, src0, src1; + GLuint mask = inst->DstReg.WriteMask; + int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + + if (!(mask & WRITEMASK_XYZW)) + return; + + assert(is_power_of_two(mask & WRITEMASK_XYZW)); + + dst = get_dst_reg(c, inst, dst_chan); + src0 = get_src_reg_imm(c, inst, 0, 0); + src1 = get_src_reg_imm(c, inst, 1, 0); + + brw_MOV(p, brw_message_reg(2), src0); + brw_MOV(p, brw_message_reg(3), src1); + + brw_math(p, + dst, + BRW_MATH_FUNCTION_POW, + (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_lrp(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, tmp1, tmp2, src0, src1, src2; + int i; + int mark = mark_tmps(c); + for (i = 0; i < 4; i++) { + if (mask & (1<SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_MAC(p, dst, src0, tmp1); + brw_set_saturate(p, 0); + } + release_tmps(c, mark); + } +} + +/** + * For GLSL shaders, this KIL will be unconditional. + * It may be contained inside an IF/ENDIF structure of course. + */ +static void emit_kil(struct brw_wm_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, depth, c->emit_mask_reg, depth); + brw_pop_insn_state(p); +} + +static void emit_mad(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, src0, src1, src2; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<SaturateMode != SATURATE_OFF) ? 1 : 0); + brw_ADD(p, dst, dst, src2); + brw_set_saturate(p, 0); + } + } +} + +static void emit_sop(struct brw_wm_compile *c, + const struct prog_instruction *inst, GLuint cond) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg dst, src0, src1; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<func; + struct brw_reg param, + x0, x1, /* gradients at each end */ + t, tmp[ 2 ], /* float temporaries */ + itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */ + int i; + int mark = mark_tmps( c ); + + x0 = alloc_tmp( c ); + x1 = alloc_tmp( c ); + t = alloc_tmp( c ); + tmp[ 0 ] = alloc_tmp( c ); + tmp[ 1 ] = alloc_tmp( c ); + itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD ); + itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD ); + itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD ); + itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD ); + itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD ); + + param = lookup_tmp( c, mark - 2 ); + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ + + /* Arrange the two end coordinates into scalars (itmp0/itmp1) to + be hashed. Also compute the remainder (offset within the unit + length), interleaved to reduce register dependency penalties. */ + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param ); + brw_FRC( p, param, param ); + brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) ); + brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ + brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ + + /* We're now ready to perform the hashing. The two hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 32x16 + bit multiplication, and 16-bit swizzles (which we get for + free). We can't use immediate operands in the multiplies, + because immediates are permitted only in src1 and the 16-bit + factor is permitted only in src0. */ + for( i = 0; i < 2; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] ); + for( i = 0; i < 2; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 2; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] ); + for( i = 0; i < 2; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 2; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); + for( i = 0; i < 2; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + + /* Now we want to initialise the two gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 31 ), but + we correct for that right at the end. */ + brw_ADD( p, t, param, brw_imm_f( -1.0 ) ); + brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) ); + brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) ); + + brw_MUL( p, x0, x0, param ); + brw_MUL( p, x1, x1, t ); + + /* We interpolate between the gradients using the polynomial + 6t^5 - 15t^4 + 10t^3 (Perlin). */ + brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); + brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the + pipeline */ + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); + brw_MUL( p, param, tmp[ 0 ], param ); + brw_MUL( p, x1, x1, param ); + brw_ADD( p, x0, x0, x1 ); + /* scale by pow( 2, -30 ), to compensate for the format conversion + above and an extra factor of 2 so that a single gradient covers + the [-1,1] range */ + brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise1( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src, param, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src = get_src_reg( c, inst, 0, 0 ); + + param = alloc_tmp( c ); + + brw_MOV( p, param, src ); + + invoke_subroutine( c, SUB_NOISE1, noise1_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + +static void noise2_sub( struct brw_wm_compile *c ) { + + struct brw_compile *p = &c->func; + struct brw_reg param0, param1, + x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */ + t, tmp[ 4 ], /* float temporaries */ + itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */ + int i; + int mark = mark_tmps( c ); + + x0y0 = alloc_tmp( c ); + x0y1 = alloc_tmp( c ); + x1y0 = alloc_tmp( c ); + x1y1 = alloc_tmp( c ); + t = alloc_tmp( c ); + for( i = 0; i < 4; i++ ) { + tmp[ i ] = alloc_tmp( c ); + itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); + } + itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD ); + itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD ); + itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD ); + + param0 = lookup_tmp( c, mark - 3 ); + param1 = lookup_tmp( c, mark - 2 ); + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to + be hashed. Also compute the remainders (offsets within the unit + square), interleaved to reduce register dependency penalties. */ + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); + brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); + brw_FRC( p, param0, param0 ); + brw_FRC( p, param1, param1 ); + brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ + brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ), + low_words( itmp[ 1 ] ) ); + brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ + brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ + brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) ); + brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) ); + brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) ); + + /* We're now ready to perform the hashing. The four hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 32x16 + bit multiplication, and 16-bit swizzles (which we get for + free). We can't use immediate operands in the multiplies, + because immediates are permitted only in src1 and the 16-bit + factor is permitted only in src0. */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), + high_words( itmp[ i ] ) ); + + /* Now we want to initialise the four gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 15 ), but + we correct for that right at the end. */ + brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); + brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); + brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) ); + brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) ); + + brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param0 ); + brw_MUL( p, x0y1, x0y1, param0 ); + + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 ); + brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t ); + brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t ); + + brw_ADD( p, x0y0, x0y0, tmp[ 0 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 2 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 1 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 3 ] ); + + /* We interpolate between the gradients using the polynomial + 6t^5 - 15t^4 + 10t^3 (Perlin). */ + brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) ); + brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the + pipeline */ + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the + pipeline */ + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); + brw_MUL( p, param0, tmp[ 0 ], param0 ); + brw_MUL( p, param1, tmp[ 1 ], param1 ); + + /* Here we interpolate in the y dimension... */ + brw_MUL( p, x0y1, x0y1, param1 ); + brw_MUL( p, x1y1, x1y1, param1 ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. There are horrible register dependencies here, + but we have nothing else to do. */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, param0 ); + brw_ADD( p, x0y0, x0y0, x1y0 ); + + /* scale by pow( 2, -15 ), as described above */ + brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise2( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, param0, param1, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + + param0 = alloc_tmp( c ); + param1 = alloc_tmp( c ); + + brw_MOV( p, param0, src0 ); + brw_MOV( p, param1, src1 ); + + invoke_subroutine( c, SUB_NOISE2, noise2_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + +/** + * The three-dimensional case is much like the one- and two- versions above, + * but since the number of corners is rapidly growing we now pack 16 16-bit + * hashes into each register to extract more parallelism from the EUs. + */ +static void noise3_sub( struct brw_wm_compile *c ) { + + struct brw_compile *p = &c->func; + struct brw_reg param0, param1, param2, + x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ + xi, yi, zi, /* interpolation coefficients */ + t, tmp[ 8 ], /* float temporaries */ + itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ + wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ + int i; + int mark = mark_tmps( c ); + + x0y0 = alloc_tmp( c ); + x0y1 = alloc_tmp( c ); + x1y0 = alloc_tmp( c ); + x1y1 = alloc_tmp( c ); + xi = alloc_tmp( c ); + yi = alloc_tmp( c ); + zi = alloc_tmp( c ); + t = alloc_tmp( c ); + for( i = 0; i < 8; i++ ) { + tmp[ i ] = alloc_tmp( c ); + itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); + wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); + } + + param0 = lookup_tmp( c, mark - 4 ); + param1 = lookup_tmp( c, mark - 3 ); + param2 = lookup_tmp( c, mark - 2 ); + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to + be hashed. Also compute the remainders (offsets within the unit + cube), interleaved to reduce register dependency penalties. */ + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); + brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); + brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 ); + brw_FRC( p, param0, param0 ); + brw_FRC( p, param1, param1 ); + brw_FRC( p, param2, param2 ); + /* Since we now have only 16 bits of precision in the hash, we must + be more careful about thorough mixing to maintain entropy as we + squash the input vector into a small scalar. */ + brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) ); + brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) ); + brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ), + brw_imm_uw( 0x9B93 ) ); + brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), + brw_imm_uw( 0xBC8F ) ); + + /* Temporarily disable the execution mask while we work with ExecSize=16 + channels (the mask is set for ExecSize=8 and is probably incorrect). + Although this might cause execution of unwanted channels, the code + writes only to temporary registers and has no side effects, so + disabling the mask is harmless. */ + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); + brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); + brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); + + /* We're now ready to perform the hashing. The eight hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 16x16 + bit multiplication, and 8-bit swizzles (which we get for + free). */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + brw_pop_insn_state( p ); + + /* Now we want to initialise the four rear gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 15 ), but + we correct for that right at the end. */ + /* x component */ + brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); + brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param0 ); + brw_MUL( p, x0y1, x0y1, param0 ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* We interpolate between the gradients using the polynomial + 6t^5 - 15t^4 + 10t^3 (Perlin). */ + brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) ); + brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) ); + brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) ); + brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) ); + brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) ); + brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) ); + brw_MUL( p, xi, xi, param0 ); + brw_MUL( p, yi, yi, param1 ); + brw_MUL( p, zi, zi, param2 ); + brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) ); + brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) ); + brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) ); + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */ + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */ + brw_MUL( p, xi, xi, param0 ); + brw_MUL( p, yi, yi, param1 ); + brw_MUL( p, zi, zi, param2 ); + brw_MUL( p, xi, xi, param0 ); + brw_MUL( p, yi, yi, param1 ); + brw_MUL( p, zi, zi, param2 ); + brw_MUL( p, xi, xi, param0 ); + brw_MUL( p, yi, yi, param1 ); + brw_MUL( p, zi, zi, param2 ); + + /* Here we interpolate in the y dimension... */ + brw_MUL( p, x0y1, x0y1, yi ); + brw_MUL( p, x1y1, x1y1, yi ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. Leave the result in tmp[ 0 ] (see below)... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, xi ); + brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); + + /* Now do the same thing for the front four gradients... */ + /* x component */ + brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param0 ); + brw_MUL( p, x0y1, x0y1, param0 ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param2, brw_imm_f( -1.0 ) ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* The interpolation coefficients are still around from last time, so + again interpolate in the y dimension... */ + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); + brw_MUL( p, x0y1, x0y1, yi ); + brw_MUL( p, x1y1, x1y1, yi ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. The rear face is in tmp[ 0 ] (see above), so this + time put the front face in tmp[ 1 ] and we're nearly there... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, xi ); + brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); + + /* The final interpolation, in the z dimension: */ + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); + + /* scale by pow( 2, -15 ), as described above */ + brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise3( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, src2, param0, param1, param2, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); + + param0 = alloc_tmp( c ); + param1 = alloc_tmp( c ); + param2 = alloc_tmp( c ); + + brw_MOV( p, param0, src0 ); + brw_MOV( p, param1, src1 ); + brw_MOV( p, param2, src2 ); + + invoke_subroutine( c, SUB_NOISE3, noise3_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + +/** + * For the four-dimensional case, the little micro-optimisation benefits + * we obtain by unrolling all the loops aren't worth the massive bloat it + * now causes. Instead, we loop twice around performing a similar operation + * to noise3, once for the w=0 cube and once for the w=1, with a bit more + * code to glue it all together. + */ +static void noise4_sub( struct brw_wm_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg param[ 4 ], + x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ + w0, /* noise for the w=0 cube */ + floors[ 2 ], /* integer coordinates of base corner of hypercube */ + interp[ 4 ], /* interpolation coefficients */ + t, tmp[ 8 ], /* float temporaries */ + itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ + wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ + int i, j; + int mark = mark_tmps( c ); + GLuint loop, origin; + + x0y0 = alloc_tmp( c ); + x0y1 = alloc_tmp( c ); + x1y0 = alloc_tmp( c ); + x1y1 = alloc_tmp( c ); + t = alloc_tmp( c ); + w0 = alloc_tmp( c ); + floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); + floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); + + for( i = 0; i < 4; i++ ) { + param[ i ] = lookup_tmp( c, mark - 5 + i ); + interp[ i ] = alloc_tmp( c ); + } + + for( i = 0; i < 8; i++ ) { + tmp[ i ] = alloc_tmp( c ); + itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); + wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); + } + + brw_set_access_mode( p, BRW_ALIGN_1 ); + + /* We only want 16 bits of precision from the integral part of each + co-ordinate, but unfortunately the RNDD semantics would saturate + at 16 bits if we performed the operation directly to a 16-bit + destination. Therefore, we round to 32-bit temporaries where + appropriate, and then store only the lower 16 bits. */ + brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] ); + brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] ); + brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] ); + brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] ); + brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) ); + brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) ); + + /* Modify the flag register here, because the side effect is useful + later (see below). We know for certain that all flags will be + cleared, since the FRC instruction cannot possibly generate + negative results. Even for exceptional inputs (infinities, denormals, + NaNs), the architecture guarantees that the L conditional is false. */ + brw_set_conditionalmod( p, BRW_CONDITIONAL_L ); + brw_FRC( p, param[ 0 ], param[ 0 ] ); + brw_set_predicate_control( p, BRW_PREDICATE_NONE ); + for( i = 1; i < 4; i++ ) + brw_FRC( p, param[ i ], param[ i ] ); + + /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first + of all. */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) ); + for( i = 0; i < 4; i++ ) + brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); + for( i = 0; i < 4; i++ ) + brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) ); + for( j = 0; j < 3; j++ ) + for( i = 0; i < 4; i++ ) + brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); + + /* Mark the current address, as it will be a jump destination. The + following code will be executed twice: first, with the flag + register clear indicating the w=0 case, and second with flags + set for w=1. */ + loop = p->nr_insn; + + /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to + be hashed. Since we have only 16 bits of precision in the hash, we + must be careful about thorough mixing to maintain entropy as we + squash the input vector into a small scalar. */ + brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ), + brw_imm_uw( 0xBC8F ) ); + brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ), + brw_imm_uw( 0xD0BD ) ); + brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ), + brw_imm_uw( 0x9B93 ) ); + brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ), + brw_imm_uw( 0xA359 ) ); + brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), + brw_imm_uw( 0xBC8F ) ); + + /* Temporarily disable the execution mask while we work with ExecSize=16 + channels (the mask is set for ExecSize=8 and is probably incorrect). + Although this might cause execution of unwanted channels, the code + writes only to temporary registers and has no side effects, so + disabling the mask is harmless. */ + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); + brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); + brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); + + /* We're now ready to perform the hashing. The eight hashes are + interleaved for performance. The hash function used is + designed to rapidly achieve avalanche and require only 16x16 + bit multiplication, and 8-bit swizzles (which we get for + free). */ + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + for( i = 0; i < 4; i++ ) + brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); + for( i = 0; i < 4; i++ ) + brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), + odd_bytes( wtmp[ i ] ) ); + brw_pop_insn_state( p ); + + /* Now we want to initialise the four rear gradients based on the + hashes. Format conversion from signed integer to float leaves + everything scaled too high by a factor of pow( 2, 15 ), but + we correct for that right at the end. */ + /* x component */ + brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); + brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param[ 0 ] ); + brw_MUL( p, x0y1, x0y1, param[ 0 ] ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + /* prepare t for the w component (used below): w the first time through + the loop; w - 1 the second time) */ + brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); + brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); + p->current->header.predicate_inverse = 1; + brw_MOV( p, t, param[ 3 ] ); + p->current->header.predicate_inverse = 0; + brw_set_predicate_control( p, BRW_PREDICATE_NONE ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* w component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* Here we interpolate in the y dimension... */ + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); + brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); + brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. Leave the result in tmp[ 0 ] (see below)... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); + brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); + + /* Now do the same thing for the front four gradients... */ + /* x component */ + brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); + brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); + brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); + brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, x1y0, x1y0, t ); + brw_MUL( p, x1y1, x1y1, t ); + brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); + brw_MUL( p, x0y0, x0y0, param[ 0 ] ); + brw_MUL( p, x0y1, x0y1, param[ 0 ] ); + + /* y component */ + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) ); + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); + + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + + /* z component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); + brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); + brw_pop_insn_state( p ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + /* prepare t for the w component (used below): w the first time through + the loop; w - 1 the second time) */ + brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); + brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); + p->current->header.predicate_inverse = 1; + brw_MOV( p, t, param[ 3 ] ); + p->current->header.predicate_inverse = 0; + brw_set_predicate_control( p, BRW_PREDICATE_NONE ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* w component */ + brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); + brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); + brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); + + brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); + brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); + brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); + brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); + + brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); + brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); + brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); + brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); + + /* Interpolate in the y dimension: */ + brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); + brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); + brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); + brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); + brw_ADD( p, x0y0, x0y0, x0y1 ); + brw_ADD( p, x1y0, x1y0, x1y1 ); + + /* And now in x. The rear face is in tmp[ 0 ] (see above), so this + time put the front face in tmp[ 1 ] and we're nearly there... */ + brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); + brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); + brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); + + /* Another interpolation, in the z dimension: */ + brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) ); + brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); + + /* Exit the loop if we've computed both cubes... */ + origin = p->nr_insn; + brw_push_insn_state( p ); + brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) ); + brw_pop_insn_state( p ); + + /* Save the result for the w=0 case, and increment the w coordinate: */ + brw_MOV( p, w0, tmp[ 0 ] ); + brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ), + brw_imm_uw( 1 ) ); + + /* Loop around for the other cube. Explicitly set the flag register + (unfortunately we must spend an extra instruction to do this: we + can't rely on a side effect of the previous MOV or ADD because + conditional modifiers which are normally true might be false in + exceptional circumstances, e.g. given a NaN input; the add to + brw_ip_reg() is not suitable because the IP is not an 8-vector). */ + brw_push_insn_state( p ); + brw_set_mask_control( p, BRW_MASK_DISABLE ); + brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) ); + brw_ADD( p, brw_ip_reg(), brw_ip_reg(), + brw_imm_d( ( loop - p->nr_insn ) << 4 ) ); + brw_pop_insn_state( p ); + + /* Patch the previous conditional branch now that we know the + destination address. */ + brw_set_src1( p->store + origin, + brw_imm_d( ( p->nr_insn - origin ) << 4 ) ); + + /* The very last interpolation. */ + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) ); + brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] ); + brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 ); + + /* scale by pow( 2, -15 ), as described above */ + brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); + + release_tmps( c, mark ); +} + +static void emit_noise4( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; + GLuint mask = inst->DstReg.WriteMask; + int i; + int mark = mark_tmps( c ); + + assert( mark == 0 ); + + src0 = get_src_reg( c, inst, 0, 0 ); + src1 = get_src_reg( c, inst, 0, 1 ); + src2 = get_src_reg( c, inst, 0, 2 ); + src3 = get_src_reg( c, inst, 0, 3 ); + + param0 = alloc_tmp( c ); + param1 = alloc_tmp( c ); + param2 = alloc_tmp( c ); + param3 = alloc_tmp( c ); + + brw_MOV( p, param0, src0 ); + brw_MOV( p, param1, src1 ); + brw_MOV( p, param2, src2 ); + brw_MOV( p, param3, src3 ); + + invoke_subroutine( c, SUB_NOISE4, noise4_sub ); + + /* Fill in the result: */ + brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); + for (i = 0 ; i < 4; i++) { + if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) + brw_set_saturate( p, 0 ); + + release_tmps( c, mark ); +} + +static void emit_wpos_xy(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + GLuint mask = inst->DstReg.WriteMask; + struct brw_reg src0[2], dst[2]; + + dst[0] = get_dst_reg(c, inst, 0); + dst[1] = get_dst_reg(c, inst, 1); + + src0[0] = get_src_reg(c, inst, 0, 0); + src0[1] = get_src_reg(c, inst, 0, 1); + + /* Calculate the pixel offset from window bottom left into destination + * X and Y channels. + */ + if (mask & WRITEMASK_X) { + /* X' = X - origin_x */ + brw_ADD(p, + dst[0], + retype(src0[0], BRW_REGISTER_TYPE_W), + brw_imm_d(0 - c->key.origin_x)); + } + + if (mask & WRITEMASK_Y) { + /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + brw_ADD(p, + dst[1], + negate(retype(src0[1], BRW_REGISTER_TYPE_W)), + brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + } +} + +/* TODO + BIAS on SIMD8 not working yet... + */ +static void emit_txb(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst[4], src[4], payload_reg; + /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->TexSrcUnit; + GLuint i; + GLuint msg_type; + + assert(unit < BRW_MAX_TEX_UNIT); + + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, inst, 0, i); + + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); /* r coord */ + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), src[2]); + break; + default: + /* invalid target */ + abort(); + } + brw_MOV(p, brw_message_reg(5), src[3]); /* bias */ + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */ + + if (BRW_IS_IGDNG(p->brw)) { + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG; + } else { + /* Does it work well on SIMD8? */ + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; + } + + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + SURF_INDEX_TEXTURE(unit), + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + msg_type, /* msg_type */ + 4, /* response_length */ + 4, /* msg_length */ + 0, /* eot */ + 1, + BRW_SAMPLER_SIMD_MODE_SIMD8); +} + + +static void emit_tex(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst[4], src[4], payload_reg; + /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->TexSrcUnit; + GLuint msg_len; + GLuint i, nr; + GLuint emit; + GLboolean shadow = (c->key.shadowtex_mask & (1<TexSrcTarget) { + case TEXTURE_1D_INDEX: + emit = WRITEMASK_X; + nr = 1; + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + emit = WRITEMASK_XY; + nr = 2; + break; + case TEXTURE_3D_INDEX: + case TEXTURE_CUBE_INDEX: + emit = WRITEMASK_XYZ; + nr = 3; + break; + default: + /* invalid target */ + abort(); + } + msg_len = 1; + + /* move/load S, T, R coords */ + for (i = 0; i < nr; i++) { + static const GLuint swz[4] = {0,1,2,2}; + if (emit & (1<brw)) { + if (shadow) + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG; + else + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG; + } else { + /* Does it work for shadow on SIMD8 ? */ + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; + } + + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */ + 1, /* msg_reg_nr */ + retype(payload_reg, BRW_REGISTER_TYPE_UW), /* src0 */ + SURF_INDEX_TEXTURE(unit), + unit, /* sampler */ + inst->DstReg.WriteMask, /* writemask */ + msg_type, /* msg_type */ + 4, /* response_length */ + shadow ? 6 : 4, /* msg_length */ + 0, /* eot */ + 1, + BRW_SAMPLER_SIMD_MODE_SIMD8); + + if (shadow) + brw_MOV(p, dst[3], brw_imm_f(1.0)); +} + + +/** + * Resolve subroutine calls after code emit is done. + */ +static void post_wm_emit( struct brw_wm_compile *c ) +{ + brw_resolve_cals(&c->func); +} + +static void +get_argument_regs(struct brw_wm_compile *c, + const struct prog_instruction *inst, + int index, + struct brw_reg *regs, + int mask) +{ + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1 << i)) + regs[i] = get_src_reg(c, inst, index, i); + } +} + +static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) +{ +#define MAX_IF_DEPTH 32 +#define MAX_LOOP_DEPTH 32 + struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; + GLuint i, if_depth = 0, loop_depth = 0; + struct brw_compile *p = &c->func; + struct brw_indirect stack_index = brw_indirect(0, 0); + + c->out_of_regs = GL_FALSE; + + prealloc_reg(c); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + + for (i = 0; i < c->nr_fp_insns; i++) { + const struct prog_instruction *inst = &c->prog_instructions[i]; + int dst_flags; + struct brw_reg args[3][4], dst[4]; + int j; + + c->cur_inst = i; + +#if 0 + _mesa_printf("Inst %d: ", i); + _mesa_print_instruction(inst); +#endif + + /* fetch any constants that this instruction needs */ + if (c->fp->use_const_buffer) + fetch_constants(c, inst); + + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); + + dst_flags = inst->DstReg.WriteMask; + if (inst->SaturateMode == SATURATE_ZERO_ONE) + dst_flags |= SATURATE; + + switch (inst->Opcode) { + case WM_PIXELXY: + emit_pixel_xy(c, inst); + break; + case WM_DELTAXY: + emit_delta_xy(c, inst); + break; + case WM_PIXELW: + emit_pixel_w(c, inst); + break; + case WM_LINTERP: + emit_linterp(c, inst); + break; + case WM_PINTERP: + emit_pinterp(c, inst); + break; + case WM_CINTERP: + emit_cinterp(c, inst); + break; + case WM_WPOSXY: + emit_wpos_xy(c, inst); + break; + case WM_FB_WRITE: + emit_fb_write(c, inst); + break; + case WM_FRONTFACING: + emit_frontfacing(c, inst); + break; + case OPCODE_ADD: + emit_add(c, inst); + break; + case OPCODE_ARL: + emit_arl(c, inst); + break; + case OPCODE_FRC: + emit_frc(c, inst); + break; + case OPCODE_FLR: + emit_flr(c, inst); + break; + case OPCODE_LRP: + emit_lrp(c, inst); + break; + case OPCODE_TRUNC: + emit_trunc(c, inst); + break; + case OPCODE_MOV: + case OPCODE_SWZ: + emit_mov(c, inst); + break; + case OPCODE_DP3: + emit_dp3(c, inst); + break; + case OPCODE_DP4: + emit_dp4(c, inst); + break; + case OPCODE_XPD: + emit_xpd(c, inst); + break; + case OPCODE_DPH: + emit_dph(c, inst); + break; + case OPCODE_RCP: + emit_rcp(c, inst); + break; + case OPCODE_RSQ: + emit_rsq(c, inst); + break; + case OPCODE_SIN: + emit_sin(c, inst); + break; + case OPCODE_COS: + emit_cos(c, inst); + break; + case OPCODE_EX2: + emit_ex2(c, inst); + break; + case OPCODE_LG2: + emit_lg2(c, inst); + break; + case OPCODE_MIN: + case OPCODE_MAX: + emit_min_max(c, inst); + break; + case OPCODE_DDX: + case OPCODE_DDY: + for (j = 0; j < 4; j++) { + if (inst->DstReg.WriteMask & (1 << j)) + dst[j] = get_dst_reg(c, inst, j); + else + dst[j] = brw_null_reg(); + } + get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW); + emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX), + args[0]); + break; + case OPCODE_SLT: + emit_slt(c, inst); + break; + case OPCODE_SLE: + emit_sle(c, inst); + break; + case OPCODE_SGT: + emit_sgt(c, inst); + break; + case OPCODE_SGE: + emit_sge(c, inst); + break; + case OPCODE_SEQ: + emit_seq(c, inst); + break; + case OPCODE_SNE: + emit_sne(c, inst); + break; + case OPCODE_MUL: + emit_mul(c, inst); + break; + case OPCODE_POW: + emit_pow(c, inst); + break; + case OPCODE_MAD: + emit_mad(c, inst); + break; + case OPCODE_NOISE1: + emit_noise1(c, inst); + break; + case OPCODE_NOISE2: + emit_noise2(c, inst); + break; + case OPCODE_NOISE3: + emit_noise3(c, inst); + break; + case OPCODE_NOISE4: + emit_noise4(c, inst); + break; + case OPCODE_TEX: + emit_tex(c, inst); + break; + case OPCODE_TXB: + emit_txb(c, inst); + break; + case OPCODE_KIL_NV: + emit_kil(c); + break; + case OPCODE_IF: + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8); + break; + case OPCODE_ELSE: + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); + break; + case OPCODE_ENDIF: + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); + break; + case OPCODE_BGNSUB: + brw_save_label(p, inst->Comment, p->nr_insn); + break; + case OPCODE_ENDSUB: + /* no-op */ + break; + case OPCODE_CAL: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + brw_save_call(&c->func, inst->Comment, p->nr_insn); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_pop_insn_state(p); + break; + + case OPCODE_RET: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); + + break; + case OPCODE_BGNLOOP: + /* XXX may need to invalidate the current_constant regs */ + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + break; + case OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case OPCODE_ENDLOOP: + { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + if (BRW_IS_IGDNG(brw)) + br = 2; + + loop_depth--; + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BGNLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == BRW_OPCODE_BREAK) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + inst0->bits3.if_else.pop_count = 0; + } + else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + inst0->bits3.if_else.pop_count = 0; + } + } + } + break; + default: + _mesa_printf("unsupported IR in fragment shader %d\n", + inst->Opcode); + } + + if (inst->CondUpdate) + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + else + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + post_wm_emit(c); + + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("wm-native:\n"); + for (i = 0; i < p->nr_insn; i++) + brw_disasm(stderr, &p->store[i]); + _mesa_printf("\n"); + } +} + +/** + * Do GPU code generation for shaders that use GLSL features such as + * flow control. Other shaders will be compiled with the + */ +void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + if (INTEL_DEBUG & DEBUG_WM) { + _mesa_printf("brw_wm_glsl_emit:\n"); + } + + /* initial instruction translation/simplification */ + brw_wm_pass_fp(c); + + /* actual code generation */ + brw_wm_emit_glsl(brw, c); + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "brw_wm_glsl_emit done"); + } + + c->prog_data.total_grf = num_grf_used(c); + c->prog_data.total_scratch = 0; +} diff --git a/src/gallium/drivers/i965/brw_wm_iz.c b/src/gallium/drivers/i965/brw_wm_iz.c new file mode 100644 index 0000000000..5e399ac62a --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_iz.c @@ -0,0 +1,157 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/mtypes.h" +#include "brw_wm.h" + + +#undef P /* prompted depth */ +#undef C /* computed */ +#undef N /* non-promoted? */ + +#define P 0 +#define C 1 +#define N 2 + +const struct { + GLuint mode:2; + GLuint sd_present:1; + GLuint sd_to_rt:1; + GLuint dd_present:1; + GLuint ds_present:1; +} wm_iz_table[IZ_BIT_MAX] = +{ + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 0, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 } +}; + +/** + * \param line_aa AA_NEVER, AA_ALWAYS or AA_SOMETIMES + * \param lookup bitmask of IZ_* flags + */ +void brw_wm_lookup_iz( GLuint line_aa, + GLuint lookup, + GLboolean ps_uses_depth, + struct brw_wm_prog_key *key ) +{ + GLuint reg = 2; + + assert (lookup < IZ_BIT_MAX); + + if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) + key->computes_depth = 1; + + if (wm_iz_table[lookup].sd_present || ps_uses_depth) { + key->source_depth_reg = reg; + reg += 2; + } + + if (wm_iz_table[lookup].sd_to_rt) + key->source_depth_to_render_target = 1; + + if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { + key->aa_dest_stencil_reg = reg; + key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && + line_aa == AA_SOMETIMES); + reg++; + } + + if (wm_iz_table[lookup].dd_present) { + key->dest_depth_reg = reg; + reg+=2; + } + + key->nr_depth_regs = (reg+1)/2; +} + diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c new file mode 100644 index 0000000000..6279258339 --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -0,0 +1,442 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_wm.h" +#include "shader/prog_parameter.h" + + + +/*********************************************************************** + */ + +static struct brw_wm_ref *get_ref( struct brw_wm_compile *c ) +{ + assert(c->nr_refs < BRW_WM_MAX_REF); + return &c->refs[c->nr_refs++]; +} + +static struct brw_wm_value *get_value( struct brw_wm_compile *c) +{ + assert(c->nr_refs < BRW_WM_MAX_VREG); + return &c->vreg[c->nr_vreg++]; +} + +/** return pointer to a newly allocated instruction */ +static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c ) +{ + assert(c->nr_insns < BRW_WM_MAX_INSN); + return &c->instruction[c->nr_insns++]; +} + +/*********************************************************************** + */ + +/** Init the "undef" register */ +static void pass0_init_undef( struct brw_wm_compile *c) +{ + struct brw_wm_ref *ref = &c->undef_ref; + ref->value = &c->undef_value; + ref->hw_reg = brw_vec8_grf(0, 0); + ref->insn = 0; + ref->prevuse = NULL; +} + +/** Set a FP register to a value */ +static void pass0_set_fpreg_value( struct brw_wm_compile *c, + GLuint file, + GLuint idx, + GLuint component, + struct brw_wm_value *value ) +{ + struct brw_wm_ref *ref = get_ref(c); + ref->value = value; + ref->hw_reg = brw_vec8_grf(0, 0); + ref->insn = 0; + ref->prevuse = NULL; + c->pass0_fp_reg[file][idx][component] = ref; +} + +/** Set a FP register to a ref */ +static void pass0_set_fpreg_ref( struct brw_wm_compile *c, + GLuint file, + GLuint idx, + GLuint component, + const struct brw_wm_ref *src_ref ) +{ + c->pass0_fp_reg[file][idx][component] = src_ref; +} + +static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, + const GLfloat *param_ptr ) +{ + GLuint i = c->prog_data.nr_params++; + + if (i >= BRW_WM_MAX_PARAM) { + _mesa_printf("%s: out of params\n", __FUNCTION__); + c->prog_data.error = 1; + return NULL; + } + else { + struct brw_wm_ref *ref = get_ref(c); + + c->prog_data.param[i] = param_ptr; + c->nr_creg = (i+16)/16; + + /* Push the offsets into hw_reg. These will be added to the + * real register numbers once one is allocated in pass2. + */ + ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8); + ref->value = &c->creg[i/16]; + ref->insn = 0; + ref->prevuse = NULL; + + return ref; + } +} + + +/** Return a ref to a constant/literal value */ +static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, + const GLfloat *constval ) +{ + GLuint i; + + /* Search for an existing const value matching the request: + */ + for (i = 0; i < c->nr_constrefs; i++) { + if (c->constref[i].constval == *constval) + return c->constref[i].ref; + } + + /* Else try to add a new one: + */ + if (c->nr_constrefs < BRW_WM_MAX_CONST) { + GLuint i = c->nr_constrefs++; + + /* A constant is a special type of parameter: + */ + c->constref[i].constval = *constval; + c->constref[i].ref = get_param_ref(c, constval); + + return c->constref[i].ref; + } + else { + _mesa_printf("%s: out of constrefs\n", __FUNCTION__); + c->prog_data.error = 1; + return NULL; + } +} + + +/* Lookup our internal registers + */ +static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, + GLuint file, + GLuint idx, + GLuint component ) +{ + const struct brw_wm_ref *ref = c->pass0_fp_reg[file][idx][component]; + + if (!ref) { + switch (file) { + case PROGRAM_INPUT: + case PROGRAM_PAYLOAD: + case PROGRAM_TEMPORARY: + case PROGRAM_OUTPUT: + case PROGRAM_VARYING: + break; + + case PROGRAM_LOCAL_PARAM: + ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]); + break; + + case PROGRAM_ENV_PARAM: + ref = get_param_ref(c, &c->env_param[idx][component]); + break; + + case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: + case PROGRAM_CONSTANT: + case PROGRAM_NAMED_PARAM: { + struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; + + /* There's something really hokey about parameters parsed in + * arb programs - they all end up in here, whether they be + * state values, parameters or constants. This duplicates the + * structure above & also seems to subvert the limits set for + * each type of constant/param. + */ + switch (plist->Parameters[idx].Type) { + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + /* These are invarient: + */ + ref = get_const_ref(c, &plist->ParameterValues[idx][component]); + break; + + case PROGRAM_STATE_VAR: + case PROGRAM_UNIFORM: + /* These may change from run to run: + */ + ref = get_param_ref(c, &plist->ParameterValues[idx][component] ); + break; + + default: + assert(0); + break; + } + break; + } + + default: + assert(0); + break; + } + + c->pass0_fp_reg[file][idx][component] = ref; + } + + if (!ref) + ref = &c->undef_ref; + + return ref; +} + + + +/*********************************************************************** + * Straight translation to internal instruction format + */ + +static void pass0_set_dst( struct brw_wm_compile *c, + struct brw_wm_instruction *out, + const struct prog_instruction *inst, + GLuint writemask ) +{ + const struct prog_dst_register *dst = &inst->DstReg; + GLuint i; + + for (i = 0; i < 4; i++) { + if (writemask & (1<dst[i] = get_value(c); + pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]); + } + } + + out->writemask = writemask; +} + + +static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, + struct prog_src_register src, + GLuint i ) +{ + GLuint component = GET_SWZ(src.Swizzle,i); + const struct brw_wm_ref *src_ref; + static const GLfloat const_zero = 0.0; + static const GLfloat const_one = 1.0; + + if (component == SWIZZLE_ZERO) + src_ref = get_const_ref(c, &const_zero); + else if (component == SWIZZLE_ONE) + src_ref = get_const_ref(c, &const_one); + else + src_ref = pass0_get_reg(c, src.File, src.Index, component); + + return src_ref; +} + + +static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, + struct prog_src_register src, + GLuint i, + struct brw_wm_instruction *insn) +{ + const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i); + struct brw_wm_ref *newref = get_ref(c); + + newref->value = ref->value; + newref->hw_reg = ref->hw_reg; + + if (insn) { + newref->insn = insn - c->instruction; + newref->prevuse = newref->value->lastuse; + newref->value->lastuse = newref; + } + + if (src.Negate & (1 << i)) + newref->hw_reg.negate ^= 1; + + if (src.Abs) { + newref->hw_reg.negate = 0; + newref->hw_reg.abs = 1; + } + + return newref; +} + + +static void +translate_insn(struct brw_wm_compile *c, + const struct prog_instruction *inst) +{ + struct brw_wm_instruction *out = get_instruction(c); + GLuint writemask = inst->DstReg.WriteMask; + GLuint nr_args = brw_wm_nr_args(inst->Opcode); + GLuint i, j; + + /* Copy some data out of the instruction + */ + out->opcode = inst->Opcode; + out->saturate = (inst->SaturateMode != SATURATE_OFF); + out->tex_unit = inst->TexSrcUnit; + out->tex_idx = inst->TexSrcTarget; + out->tex_shadow = inst->TexShadow; + out->eot = inst->Aux & 1; + out->target = inst->Aux >> 1; + + /* Args: + */ + for (i = 0; i < nr_args; i++) { + for (j = 0; j < 4; j++) { + out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out); + } + } + + /* Dst: + */ + pass0_set_dst(c, out, inst, writemask); +} + + + +/*********************************************************************** + * Optimize moves and swizzles away: + */ +static void pass0_precalc_mov( struct brw_wm_compile *c, + const struct prog_instruction *inst ) +{ + const struct prog_dst_register *dst = &inst->DstReg; + GLuint writemask = inst->DstReg.WriteMask; + struct brw_wm_ref *refs[4]; + GLuint i; + + /* Get the effect of a MOV by manipulating our register table: + * First get all refs, then assign refs. This ensures that "in-place" + * swizzles such as: + * MOV t, t.xxyx + * are handled correctly. Previously, these two steps were done in + * one loop and the above case was incorrectly handled. + */ + for (i = 0; i < 4; i++) { + refs[i] = get_new_ref(c, inst->SrcReg[0], i, NULL); + } + for (i = 0; i < 4; i++) { + if (writemask & (1 << i)) { + pass0_set_fpreg_ref( c, dst->File, dst->Index, i, refs[i]); + } + } +} + + +/* Initialize payload "registers". + */ +static void pass0_init_payload( struct brw_wm_compile *c ) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + GLuint j = i >= c->key.nr_depth_regs ? 0 : i; + pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, + &c->payload.depth[j] ); + } + +#if 0 + /* This seems to be an alternative to the INTERP_WPOS stuff I do + * elsewhere: + */ + if (c->key.source_depth_reg) + pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2, + &c->payload.depth[c->key.source_depth_reg/2]); +#endif + + for (i = 0; i < FRAG_ATTRIB_MAX; i++) + pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0, + &c->payload.input_interp[i] ); +} + + +/*********************************************************************** + * PASS 0 + * + * Work forwards to give each calculated value a unique number. Where + * an instruction produces duplicate values (eg DP3), all are given + * the same number. + * + * Translate away swizzling and eliminate non-saturating moves. + */ +void brw_wm_pass0( struct brw_wm_compile *c ) +{ + GLuint insn; + + c->nr_vreg = 0; + c->nr_insns = 0; + + pass0_init_undef(c); + pass0_init_payload(c); + + for (insn = 0; insn < c->nr_fp_insns; insn++) { + const struct prog_instruction *inst = &c->prog_instructions[insn]; + + /* Optimize away moves, otherwise emit translated instruction: + */ + switch (inst->Opcode) { + case OPCODE_MOV: + case OPCODE_SWZ: + if (!inst->SaturateMode) { + pass0_precalc_mov(c, inst); + } + else { + translate_insn(c, inst); + } + break; + default: + translate_insn(c, inst); + break; + } + } + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "pass0"); + } +} diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c new file mode 100644 index 0000000000..b449394029 --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -0,0 +1,291 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_wm.h" + + +static GLuint get_tracked_mask(struct brw_wm_compile *c, + struct brw_wm_instruction *inst) +{ + GLuint i; + for (i = 0; i < 4; i++) { + if (inst->writemask & (1<dst[i]->contributes_to_output) { + inst->writemask &= ~(1<dst[i] = 0; + } + } + } + + return inst->writemask; +} + +/* Remove a reference from a value's usage chain. + */ +static void unlink_ref(struct brw_wm_ref *ref) +{ + struct brw_wm_value *value = ref->value; + + if (ref == value->lastuse) { + value->lastuse = ref->prevuse; + } + else { + struct brw_wm_ref *i = value->lastuse; + while (i->prevuse != ref) i = i->prevuse; + i->prevuse = ref->prevuse; + } +} + +static void track_arg(struct brw_wm_compile *c, + struct brw_wm_instruction *inst, + GLuint arg, + GLuint readmask) +{ + GLuint i; + + for (i = 0; i < 4; i++) { + struct brw_wm_ref *ref = inst->src[arg][i]; + if (ref) { + if (readmask & (1<value->contributes_to_output = 1; + } + else { + unlink_ref(ref); + inst->src[arg][i] = NULL; + } + } + } +} + +static GLuint get_texcoord_mask( GLuint tex_idx ) +{ + switch (tex_idx) { + case TEXTURE_1D_INDEX: + return WRITEMASK_X; + case TEXTURE_2D_INDEX: + return WRITEMASK_XY; + case TEXTURE_3D_INDEX: + return WRITEMASK_XYZ; + case TEXTURE_CUBE_INDEX: + return WRITEMASK_XYZ; + case TEXTURE_RECT_INDEX: + return WRITEMASK_XY; + default: return 0; + } +} + + +/* Step two: Basically this is dead code elimination. + * + * Iterate backwards over instructions, noting which values + * contribute to the final result. Adjust writemasks to only + * calculate these values. + */ +void brw_wm_pass1( struct brw_wm_compile *c ) +{ + GLint insn; + + for (insn = c->nr_insns-1; insn >= 0; insn--) { + struct brw_wm_instruction *inst = &c->instruction[insn]; + GLuint writemask; + GLuint read0, read1, read2; + + if (inst->opcode == OPCODE_KIL) { + track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */ + continue; + } + + if (inst->opcode == WM_FB_WRITE) { + track_arg(c, inst, 0, WRITEMASK_XYZW); + track_arg(c, inst, 1, WRITEMASK_XYZW); + if (c->key.source_depth_to_render_target && + c->key.computes_depth) + track_arg(c, inst, 2, WRITEMASK_Z); + else + track_arg(c, inst, 2, 0); + continue; + } + + /* Lookup all the registers which were written by this + * instruction and get a mask of those that contribute to the output: + */ + writemask = get_tracked_mask(c, inst); + if (!writemask) { + GLuint arg; + for (arg = 0; arg < 3; arg++) + track_arg(c, inst, arg, 0); + continue; + } + + read0 = 0; + read1 = 0; + read2 = 0; + + /* Mark all inputs which contribute to the marked outputs: + */ + switch (inst->opcode) { + case OPCODE_ABS: + case OPCODE_FLR: + case OPCODE_FRC: + case OPCODE_MOV: + case OPCODE_SWZ: + case OPCODE_TRUNC: + read0 = writemask; + break; + + case OPCODE_SUB: + case OPCODE_SLT: + case OPCODE_SLE: + case OPCODE_SGE: + case OPCODE_SGT: + case OPCODE_SEQ: + case OPCODE_SNE: + case OPCODE_ADD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MUL: + read0 = writemask; + read1 = writemask; + break; + + case OPCODE_DDX: + case OPCODE_DDY: + read0 = writemask; + break; + + case OPCODE_MAD: + case OPCODE_CMP: + case OPCODE_LRP: + read0 = writemask; + read1 = writemask; + read2 = writemask; + break; + + case OPCODE_XPD: + if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ; + if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ; + if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY; + read1 = read0; + break; + + case OPCODE_COS: + case OPCODE_EX2: + case OPCODE_LG2: + case OPCODE_RCP: + case OPCODE_RSQ: + case OPCODE_SIN: + case OPCODE_SCS: + case WM_CINTERP: + case WM_PIXELXY: + read0 = WRITEMASK_X; + break; + + case OPCODE_POW: + read0 = WRITEMASK_X; + read1 = WRITEMASK_X; + break; + + case OPCODE_TEX: + case OPCODE_TXP: + read0 = get_texcoord_mask(inst->tex_idx); + + if (inst->tex_shadow) + read0 |= WRITEMASK_Z; + break; + + case OPCODE_TXB: + /* Shadow ignored for txb. + */ + read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W; + break; + + case WM_WPOSXY: + read0 = writemask & WRITEMASK_XY; + break; + + case WM_DELTAXY: + read0 = writemask & WRITEMASK_XY; + read1 = WRITEMASK_X; + break; + + case WM_PIXELW: + read0 = WRITEMASK_X; + read1 = WRITEMASK_XY; + break; + + case WM_LINTERP: + read0 = WRITEMASK_X; + read1 = WRITEMASK_XY; + break; + + case WM_PINTERP: + read0 = WRITEMASK_X; /* interpolant */ + read1 = WRITEMASK_XY; /* deltas */ + read2 = WRITEMASK_W; /* pixel w */ + break; + + case OPCODE_DP3: + read0 = WRITEMASK_XYZ; + read1 = WRITEMASK_XYZ; + break; + + case OPCODE_DPH: + read0 = WRITEMASK_XYZ; + read1 = WRITEMASK_XYZW; + break; + + case OPCODE_DP4: + read0 = WRITEMASK_XYZW; + read1 = WRITEMASK_XYZW; + break; + + case OPCODE_LIT: + read0 = WRITEMASK_XYW; + break; + + case OPCODE_DST: + case WM_FRONTFACING: + case OPCODE_KIL_NV: + default: + break; + } + + track_arg(c, inst, 0, read0); + track_arg(c, inst, 1, read1); + track_arg(c, inst, 2, read2); + } + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "pass1"); + } +} diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c new file mode 100644 index 0000000000..6faea018fb --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_pass2.c @@ -0,0 +1,343 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_wm.h" + + +/* Use these to force spilling so that that functionality can be + * tested with known-good examples rather than having to construct new + * tests. + */ +#define TEST_PAYLOAD_SPILLS 0 +#define TEST_DST_SPILLS 0 + +static void spill_value(struct brw_wm_compile *c, + struct brw_wm_value *value); + +static void prealloc_reg(struct brw_wm_compile *c, + struct brw_wm_value *value, + GLuint reg) +{ + if (value->lastuse) { + /* Set nextuse to zero, it will be corrected by + * update_register_usage(). + */ + c->pass2_grf[reg].value = value; + c->pass2_grf[reg].nextuse = 0; + + value->resident = &c->pass2_grf[reg]; + value->hw_reg = brw_vec8_grf(reg*2, 0); + + if (TEST_PAYLOAD_SPILLS) + spill_value(c, value); + } +} + + +/* Initialize all the register values. Do the initial setup + * calculations for interpolants. + */ +static void init_registers( struct brw_wm_compile *c ) +{ + GLuint nr_interp_regs = 0; + GLuint i = 0; + GLuint j; + + for (j = 0; j < c->grf_limit; j++) + c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN; + + for (j = 0; j < c->key.nr_depth_regs; j++) + prealloc_reg(c, &c->payload.depth[j], i++); + + for (j = 0; j < c->nr_creg; j++) + prealloc_reg(c, &c->creg[j], i++); + + for (j = 0; j < FRAG_ATTRIB_MAX; j++) { + if (c->key.vp_outputs_written & (1<= VERT_RESULT_VAR0) + fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); + else if (j <= VERT_RESULT_TEX7) + fp_index = j; + else + fp_index = -1; + + nr_interp_regs++; + if (fp_index >= 0) + prealloc_reg(c, &c->payload.input_interp[fp_index], i++); + } + } + + assert(nr_interp_regs >= 1); + + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = nr_interp_regs * 2; + c->prog_data.curb_read_length = c->nr_creg * 2; + + c->max_wm_grf = i * 2; +} + + +/* Update the nextuse value for each register in our file. + */ +static void update_register_usage(struct brw_wm_compile *c, + GLuint thisinsn) +{ + GLuint i; + + for (i = 1; i < c->grf_limit; i++) { + struct brw_wm_grf *grf = &c->pass2_grf[i]; + + /* Only search those which can change: + */ + if (grf->nextuse < thisinsn) { + const struct brw_wm_ref *ref = grf->value->lastuse; + + /* Has last use of value been passed? + */ + if (ref->insn < thisinsn) { + grf->value->resident = 0; + grf->value = 0; + grf->nextuse = BRW_WM_MAX_INSN; + } + else { + /* Else loop through chain to update: + */ + while (ref->prevuse && ref->prevuse->insn >= thisinsn) + ref = ref->prevuse; + + grf->nextuse = ref->insn; + } + } + } +} + + +static void spill_value(struct brw_wm_compile *c, + struct brw_wm_value *value) +{ + /* Allocate a spill slot. Note that allocations start from 0x40 - + * the first slot is reserved to mean "undef" in brw_wm_emit.c + */ + if (!value->spill_slot) { + c->last_scratch += 0x40; + value->spill_slot = c->last_scratch; + } + + /* The spill will be done in brw_wm_emit.c immediately after the + * value is calculated, so we can just take this reg without any + * further work. + */ + value->resident->value = NULL; + value->resident->nextuse = BRW_WM_MAX_INSN; + value->resident = NULL; +} + + + +/* Search for contiguous region with the most distant nearest + * member. Free regs count as very distant. + * + * TODO: implement spill-to-reg so that we can rearrange discontigous + * free regs and then spill the oldest non-free regs in sequence. + * This would mean inserting instructions in this pass. + */ +static GLuint search_contiguous_regs(struct brw_wm_compile *c, + GLuint nr, + GLuint thisinsn) +{ + struct brw_wm_grf *grf = c->pass2_grf; + GLuint furthest = 0; + GLuint reg = 0; + GLuint i, j; + + /* Start search at 1: r0 is special and can't be used or spilled. + */ + for (i = 1; i < c->grf_limit && furthest < BRW_WM_MAX_INSN; i++) { + GLuint group_nextuse = BRW_WM_MAX_INSN; + + for (j = 0; j < nr; j++) { + if (grf[i+j].nextuse < group_nextuse) + group_nextuse = grf[i+j].nextuse; + } + + if (group_nextuse > furthest) { + furthest = group_nextuse; + reg = i; + } + } + + assert(furthest != thisinsn); + + /* Any non-empty regs will need to be spilled: + */ + for (j = 0; j < nr; j++) + if (grf[reg+j].value) + spill_value(c, grf[reg+j].value); + + return reg; +} + + +static void alloc_contiguous_dest(struct brw_wm_compile *c, + struct brw_wm_value *dst[], + GLuint nr, + GLuint thisinsn) +{ + GLuint reg = search_contiguous_regs(c, nr, thisinsn); + GLuint i; + + for (i = 0; i < nr; i++) { + if (!dst[i]) { + /* Need to grab a dummy value in TEX case. Don't introduce + * it into the tracking scheme. + */ + dst[i] = &c->vreg[c->nr_vreg++]; + } + else { + assert(!dst[i]->resident); + assert(c->pass2_grf[reg+i].nextuse != thisinsn); + + c->pass2_grf[reg+i].value = dst[i]; + c->pass2_grf[reg+i].nextuse = thisinsn; + + dst[i]->resident = &c->pass2_grf[reg+i]; + } + + dst[i]->hw_reg = brw_vec8_grf((reg+i)*2, 0); + } + + if ((reg+nr)*2 > c->max_wm_grf) + c->max_wm_grf = (reg+nr) * 2; +} + + +static void load_args(struct brw_wm_compile *c, + struct brw_wm_instruction *inst) +{ + GLuint thisinsn = inst - c->instruction; + GLuint i,j; + + for (i = 0; i < 3; i++) { + for (j = 0; j < 4; j++) { + struct brw_wm_ref *ref = inst->src[i][j]; + + if (ref) { + if (!ref->value->resident) { + /* Need to bring the value in from scratch space. The code for + * this will be done in brw_wm_emit.c, here we just do the + * register allocation and mark the ref as requiring a fill. + */ + GLuint reg = search_contiguous_regs(c, 1, thisinsn); + + c->pass2_grf[reg].value = ref->value; + c->pass2_grf[reg].nextuse = thisinsn; + + ref->value->resident = &c->pass2_grf[reg]; + + /* Note that a fill is required: + */ + ref->unspill_reg = reg*2; + } + + /* Adjust the hw_reg to point at the value's current location: + */ + assert(ref->value == ref->value->resident->value); + ref->hw_reg.nr += (ref->value->resident - c->pass2_grf) * 2; + } + } + } +} + + + +/* Step 3: Work forwards once again. Perform register allocations, + * taking into account instructions like TEX which require contiguous + * result registers. Where necessary spill registers to scratch space + * and reload later. + */ +void brw_wm_pass2( struct brw_wm_compile *c ) +{ + GLuint insn; + GLuint i; + + init_registers(c); + + for (insn = 0; insn < c->nr_insns; insn++) { + struct brw_wm_instruction *inst = &c->instruction[insn]; + + /* Update registers' nextuse values: + */ + update_register_usage(c, insn); + + /* May need to unspill some args. + */ + load_args(c, inst); + + /* Allocate registers to hold results: + */ + switch (inst->opcode) { + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + alloc_contiguous_dest(c, inst->dst, 4, insn); + break; + + default: + for (i = 0; i < 4; i++) { + if (inst->writemask & (1<dst[i]); + alloc_contiguous_dest(c, &inst->dst[i], 1, insn); + } + } + break; + } + + if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) { + for (i = 0; i < 4; i++) + if (inst->dst[i]) + spill_value(c, inst->dst[i]); + } + } + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "pass2"); + } + + c->state = PASS2_DONE; + + if (INTEL_DEBUG & DEBUG_WM) { + brw_wm_print_program(c, "pass2/done"); + } +} diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c new file mode 100644 index 0000000000..dff466587a --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c @@ -0,0 +1,369 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +#include "main/macros.h" + + + +/* Samplers aren't strictly wm state from the hardware's perspective, + * but that is the only situation in which we use them in this driver. + */ + + + +/* The brw (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static GLuint translate_wrap_mode( GLenum wrap ) +{ + switch( wrap ) { + case GL_REPEAT: + return BRW_TEXCOORDMODE_WRAP; + case GL_CLAMP: + return BRW_TEXCOORDMODE_CLAMP; + case GL_CLAMP_TO_EDGE: + return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */ + case GL_CLAMP_TO_BORDER: + return BRW_TEXCOORDMODE_CLAMP_BORDER; + case GL_MIRRORED_REPEAT: + return BRW_TEXCOORDMODE_MIRROR; + default: + return BRW_TEXCOORDMODE_WRAP; + } +} + + +static GLuint U_FIXED(GLfloat value, GLuint frac_bits) +{ + value *= (1<cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc, + NULL, 0 ); +} + + +struct wm_sampler_key { + int sampler_count; + + struct wm_sampler_entry { + GLenum tex_target; + GLenum wrap_r, wrap_s, wrap_t; + float maxlod, minlod; + float lod_bias; + float max_aniso; + GLenum minfilter, magfilter; + GLenum comparemode, comparefunc; + dri_bo *sdc_bo; + + /** If target is cubemap, take context setting. + */ + GLboolean seamless_cube_map; + } sampler[BRW_MAX_TEX_UNIT]; +}; + +/** + * Sets the sampler state for a single unit based off of the sampler key + * entry. + */ +static void brw_update_sampler_state(struct wm_sampler_entry *key, + dri_bo *sdc_bo, + struct brw_sampler_state *sampler) +{ + _mesa_memset(sampler, 0, sizeof(*sampler)); + + switch (key->minfilter) { + case GL_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + break; + case GL_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + break; + case GL_NEAREST_MIPMAP_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; + break; + case GL_LINEAR_MIPMAP_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; + break; + case GL_LINEAR_MIPMAP_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; + break; + default: + break; + } + + /* Set Anisotropy: + */ + if (key->max_aniso > 1.0) { + sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; + sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; + + if (key->max_aniso > 2.0) { + sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2, + BRW_ANISORATIO_16); + } + } + else { + switch (key->magfilter) { + case GL_NEAREST: + sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + break; + case GL_LINEAR: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + default: + break; + } + } + + sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); + sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); + + /* Cube-maps on 965 and later must use the same wrap mode for all 3 + * coordinate dimensions. Futher, only CUBE and CLAMP are valid. + */ + if (key->tex_target == GL_TEXTURE_CUBE_MAP) { + if (key->seamless_cube_map && + (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) { + sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; + } else { + sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + } + } else if (key->tex_target == GL_TEXTURE_1D) { + /* There's a bug in 1D texture sampling - it actually pays + * attention to the wrap_t value, though it should not. + * Override the wrap_t value here to GL_REPEAT to keep + * any nonexistent border pixels from floating in. + */ + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + } + + + /* Set shadow function: + */ + if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) { + /* Shadowing is "enabled" by emitting a particular sampler + * message (sample_c). So need to recompile WM program when + * shadow comparison is enabled on each/any texture unit. + */ + sampler->ss0.shadow_function = + intel_translate_shadow_compare_func(key->comparefunc); + } + + /* Set LOD bias: + */ + sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6); + + sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ + sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ + + /* Set BaseMipLevel, MaxLOD, MinLOD: + * + * XXX: I don't think that using firstLevel, lastLevel works, + * because we always setup the surface state as if firstLevel == + * level zero. Probably have to subtract firstLevel from each of + * these: + */ + sampler->ss0.base_level = U_FIXED(0, 1); + + sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6); + sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6); + + sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ +} + + +/** Sets up the cache key for sampler state for all texture units */ +static void +brw_wm_sampler_populate_key(struct brw_context *brw, + struct wm_sampler_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + int unit; + + memset(key, 0, sizeof(*key)); + + for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + if (ctx->Texture.Unit[unit]._ReallyEnabled) { + struct wm_sampler_entry *entry = &key->sampler[unit]; + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *texObj = texUnit->_Current; + struct intel_texture_object *intelObj = intel_texture_object(texObj); + struct gl_texture_image *firstImage = + texObj->Image[0][intelObj->firstLevel]; + + entry->tex_target = texObj->Target; + + entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP) + ? ctx->Texture.CubeMapSeamless : GL_FALSE; + + entry->wrap_r = texObj->WrapR; + entry->wrap_s = texObj->WrapS; + entry->wrap_t = texObj->WrapT; + + entry->maxlod = texObj->MaxLod; + entry->minlod = texObj->MinLod; + entry->lod_bias = texUnit->LodBias + texObj->LodBias; + entry->max_aniso = texObj->MaxAnisotropy; + entry->minfilter = texObj->MinFilter; + entry->magfilter = texObj->MagFilter; + entry->comparemode = texObj->CompareMode; + entry->comparefunc = texObj->CompareFunc; + + dri_bo_unreference(brw->wm.sdc_bo[unit]); + if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { + float bordercolor[4] = { + texObj->BorderColor[0], + texObj->BorderColor[0], + texObj->BorderColor[0], + texObj->BorderColor[0] + }; + /* GL specs that border color for depth textures is taken from the + * R channel, while the hardware uses A. Spam R into all the + * channels for safety. + */ + brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor); + } else { + brw->wm.sdc_bo[unit] = upload_default_color(brw, + texObj->BorderColor); + } + key->sampler_count = unit + 1; + } + } +} + +/* All samplers must be uploaded in a single contiguous array, which + * complicates various things. However, this is still too confusing - + * FIXME: simplify all the different new texture state flags. + */ +static void upload_wm_samplers( struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct wm_sampler_key key; + int i; + + brw_wm_sampler_populate_key(brw, &key); + + if (brw->wm.sampler_count != key.sampler_count) { + brw->wm.sampler_count = key.sampler_count; + brw->state.dirty.cache |= CACHE_NEW_SAMPLER; + } + + dri_bo_unreference(brw->wm.sampler_bo); + brw->wm.sampler_bo = NULL; + if (brw->wm.sampler_count == 0) + return; + + brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER, + &key, sizeof(key), + brw->wm.sdc_bo, key.sampler_count, + NULL); + + /* If we didnt find it in the cache, compute the state and put it in the + * cache. + */ + if (brw->wm.sampler_bo == NULL) { + struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + + memset(sampler, 0, sizeof(sampler)); + for (i = 0; i < key.sampler_count; i++) { + if (brw->wm.sdc_bo[i] == NULL) + continue; + + brw_update_sampler_state(&key.sampler[i], brw->wm.sdc_bo[i], + &sampler[i]); + } + + brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER, + &key, sizeof(key), + brw->wm.sdc_bo, key.sampler_count, + &sampler, sizeof(sampler), + NULL, NULL); + + /* Emit SDC relocations */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + if (!ctx->Texture.Unit[i]._ReallyEnabled) + continue; + + dri_bo_emit_reloc(brw->wm.sampler_bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + i * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + brw->wm.sdc_bo[i]); + } + } +} + +const struct brw_tracked_state brw_wm_samplers = { + .dirty = { + .mesa = _NEW_TEXTURE, + .brw = 0, + .cache = 0 + }, + .prepare = upload_wm_samplers, +}; + + diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c new file mode 100644 index 0000000000..361f91292b --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -0,0 +1,317 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_wm.h" + +/*********************************************************************** + * WM unit - fragment programs and rasterization + */ + +struct brw_wm_unit_key { + unsigned int total_grf, total_scratch; + unsigned int urb_entry_read_length; + unsigned int curb_entry_read_length; + unsigned int dispatch_grf_start_reg; + + unsigned int curbe_offset; + unsigned int urb_size; + + unsigned int max_threads; + + unsigned int nr_surfaces, sampler_count; + GLboolean uses_depth, computes_depth, uses_kill, is_glsl; + GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable; + GLfloat offset_units, offset_factor; +}; + +static void +wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) +{ + GLcontext *ctx = &brw->intel.ctx; + const struct gl_fragment_program *fp = brw->fragment_program; + const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; + struct intel_context *intel = &brw->intel; + + memset(key, 0, sizeof(*key)); + + if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + key->max_threads = 1; + else { + /* WM maximum threads is number of EUs times number of threads per EU. */ + if (BRW_IS_IGDNG(brw)) + key->max_threads = 12 * 6; + else if (BRW_IS_G4X(brw)) + key->max_threads = 10 * 5; + else + key->max_threads = 8 * 4; + } + + /* CACHE_NEW_WM_PROG */ + key->total_grf = brw->wm.prog_data->total_grf; + key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; + key->curb_entry_read_length = brw->wm.prog_data->curb_read_length; + key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; + key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024); + + /* BRW_NEW_URB_FENCE */ + key->urb_size = brw->urb.vsize; + + /* BRW_NEW_CURBE_OFFSETS */ + key->curbe_offset = brw->curbe.wm_start; + + /* BRW_NEW_NR_SURFACEs */ + key->nr_surfaces = brw->wm.nr_surfaces; + + /* CACHE_NEW_SAMPLER */ + key->sampler_count = brw->wm.sampler_count; + + /* _NEW_POLYGONSTIPPLE */ + key->polygon_stipple = ctx->Polygon.StippleFlag; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; + + /* as far as we can tell */ + key->computes_depth = + (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; + /* BRW_NEW_DEPTH_BUFFER + * Override for NULL depthbuffer case, required by the Pixel Shader Computed + * Depth field. + */ + if (brw->state.depth_region == NULL) + key->computes_depth = 0; + + /* _NEW_COLOR */ + key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; + key->is_glsl = bfp->isGLSL; + + /* temporary sanity check assertion */ + ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); + + /* _NEW_DEPTH */ + key->stats_wm = intel->stats_wm; + + /* _NEW_LINE */ + key->line_stipple = ctx->Line.StippleFlag; + + /* _NEW_POLYGON */ + key->offset_enable = ctx->Polygon.OffsetFill; + key->offset_units = ctx->Polygon.OffsetUnits; + key->offset_factor = ctx->Polygon.OffsetFactor; +} + +/** + * Setup wm hardware state. See page 225 of Volume 2 + */ +static dri_bo * +wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, + dri_bo **reloc_bufs) +{ + struct brw_wm_unit_state wm; + dri_bo *bo; + + memset(&wm, 0, sizeof(wm)); + + wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ + wm.thread1.depth_coef_urb_read_offset = 1; + wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + + if (BRW_IS_IGDNG(brw)) + wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ + else + wm.thread1.binding_table_entry_count = key->nr_surfaces; + + if (key->total_scratch != 0) { + wm.thread2.scratch_space_base_pointer = + brw->wm.scratch_bo->offset >> 10; /* reloc */ + wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; + } else { + wm.thread2.scratch_space_base_pointer = 0; + wm.thread2.per_thread_scratch_space = 0; + } + + wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; + wm.thread3.urb_entry_read_length = key->urb_entry_read_length; + wm.thread3.urb_entry_read_offset = 0; + wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; + wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + + if (BRW_IS_IGDNG(brw)) + wm.wm4.sampler_count = 0; /* hardware requirement */ + else + wm.wm4.sampler_count = (key->sampler_count + 1) / 4; + + if (brw->wm.sampler_bo != NULL) { + /* reloc */ + wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; + } else { + wm.wm4.sampler_state_pointer = 0; + } + + wm.wm5.program_uses_depth = key->uses_depth; + wm.wm5.program_computes_depth = key->computes_depth; + wm.wm5.program_uses_killpixel = key->uses_kill; + + if (key->is_glsl) + wm.wm5.enable_8_pix = 1; + else + wm.wm5.enable_16_pix = 1; + + wm.wm5.max_threads = key->max_threads - 1; + wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ + wm.wm5.legacy_line_rast = 0; + wm.wm5.legacy_global_depth_bias = 0; + wm.wm5.early_depth_test = 1; /* never need to disable */ + wm.wm5.line_aa_region_width = 0; + wm.wm5.line_endcap_aa_region_width = 1; + + wm.wm5.polygon_stipple = key->polygon_stipple; + + if (key->offset_enable) { + wm.wm5.depth_offset = 1; + /* Something wierd going on with legacy_global_depth_bias, + * offset_constant, scaling and MRD. This value passes glean + * but gives some odd results elsewere (eg. the + * quad-offset-units test). + */ + wm.global_depth_offset_constant = key->offset_units * 2; + + /* This is the only value that passes glean: + */ + wm.global_depth_offset_scale = key->offset_factor; + } + + wm.wm5.line_stipple = key->line_stipple; + + if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm) + wm.wm4.stats_enable = 1; + + bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, + key, sizeof(*key), + reloc_bufs, 3, + &wm, sizeof(wm), + NULL, NULL); + + /* Emit WM program relocation */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + wm.thread0.grf_reg_count << 1, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); + + /* Emit scratch space relocation */ + if (key->total_scratch != 0) { + dri_bo_emit_reloc(bo, + 0, 0, + wm.thread2.per_thread_scratch_space, + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.scratch_bo); + } + + /* Emit sampler state relocation */ + if (key->sampler_count != 0) { + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); + } + + return bo; +} + + +static void upload_wm_unit( struct brw_context *brw ) +{ + struct intel_context *intel = &brw->intel; + struct brw_wm_unit_key key; + dri_bo *reloc_bufs[3]; + wm_unit_populate_key(brw, &key); + + /* Allocate the necessary scratch space if we haven't already. Don't + * bother reducing the allocation later, since we use scratch so + * rarely. + */ + assert(key.total_scratch <= 12 * 1024); + if (key.total_scratch) { + GLuint total = key.total_scratch * key.max_threads; + + if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) { + dri_bo_unreference(brw->wm.scratch_bo); + brw->wm.scratch_bo = NULL; + } + if (brw->wm.scratch_bo == NULL) { + brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr, + "wm scratch", + total, + 4096); + } + } + + reloc_bufs[0] = brw->wm.prog_bo; + reloc_bufs[1] = brw->wm.scratch_bo; + reloc_bufs[2] = brw->wm.sampler_bo; + + dri_bo_unreference(brw->wm.state_bo); + brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT, + &key, sizeof(key), + reloc_bufs, 3, + NULL); + if (brw->wm.state_bo == NULL) { + brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs); + } +} + +const struct brw_tracked_state brw_wm_unit = { + .dirty = { + .mesa = (_NEW_POLYGON | + _NEW_POLYGONSTIPPLE | + _NEW_LINE | + _NEW_COLOR | + _NEW_DEPTH), + + .brw = (BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_CURBE_OFFSETS | + BRW_NEW_DEPTH_BUFFER | + BRW_NEW_NR_WM_SURFACES), + + .cache = (CACHE_NEW_WM_PROG | + CACHE_NEW_SAMPLER) + }, + .prepare = upload_wm_unit, +}; + diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c new file mode 100644 index 0000000000..f7cc5153a8 --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -0,0 +1,752 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#include "main/mtypes.h" +#include "main/texformat.h" +#include "main/texstore.h" +#include "shader/prog_parameter.h" + +#include "intel_mipmap_tree.h" +#include "intel_batchbuffer.h" +#include "intel_tex.h" +#include "intel_fbo.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + + +static GLuint translate_tex_target( GLenum target ) +{ + switch (target) { + case GL_TEXTURE_1D: + return BRW_SURFACE_1D; + + case GL_TEXTURE_RECTANGLE_NV: + return BRW_SURFACE_2D; + + case GL_TEXTURE_2D: + return BRW_SURFACE_2D; + + case GL_TEXTURE_3D: + return BRW_SURFACE_3D; + + case GL_TEXTURE_CUBE_MAP: + return BRW_SURFACE_CUBE; + + default: + assert(0); + return 0; + } +} + + +static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, + GLenum depth_mode ) +{ + switch( mesa_format ) { + case MESA_FORMAT_L8: + return BRW_SURFACEFORMAT_L8_UNORM; + + case MESA_FORMAT_I8: + return BRW_SURFACEFORMAT_I8_UNORM; + + case MESA_FORMAT_A8: + return BRW_SURFACEFORMAT_A8_UNORM; + + case MESA_FORMAT_AL88: + return BRW_SURFACEFORMAT_L8A8_UNORM; + + case MESA_FORMAT_RGB888: + assert(0); /* not supported for sampling */ + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + + case MESA_FORMAT_ARGB8888: + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; + else + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + + case MESA_FORMAT_RGBA8888_REV: + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; + else + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case MESA_FORMAT_RGB565: + return BRW_SURFACEFORMAT_B5G6R5_UNORM; + + case MESA_FORMAT_ARGB1555: + return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + + case MESA_FORMAT_ARGB4444: + return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + + case MESA_FORMAT_YCBCR_REV: + return BRW_SURFACEFORMAT_YCRCB_NORMAL; + + case MESA_FORMAT_YCBCR: + return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; + + case MESA_FORMAT_RGB_FXT1: + case MESA_FORMAT_RGBA_FXT1: + return BRW_SURFACEFORMAT_FXT1; + + case MESA_FORMAT_Z16: + if (depth_mode == GL_INTENSITY) + return BRW_SURFACEFORMAT_I16_UNORM; + else if (depth_mode == GL_ALPHA) + return BRW_SURFACEFORMAT_A16_UNORM; + else + return BRW_SURFACEFORMAT_L16_UNORM; + + case MESA_FORMAT_RGB_DXT1: + return BRW_SURFACEFORMAT_DXT1_RGB; + + case MESA_FORMAT_RGBA_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM; + + case MESA_FORMAT_RGBA_DXT3: + return BRW_SURFACEFORMAT_BC2_UNORM; + + case MESA_FORMAT_RGBA_DXT5: + return BRW_SURFACEFORMAT_BC3_UNORM; + + case MESA_FORMAT_SARGB8: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; + + case MESA_FORMAT_SLA8: + return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; + + case MESA_FORMAT_SL8: + return BRW_SURFACEFORMAT_L8_UNORM_SRGB; + + case MESA_FORMAT_SRGB_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; + + case MESA_FORMAT_S8_Z24: + /* XXX: these different surface formats don't seem to + * make any difference for shadow sampler/compares. + */ + if (depth_mode == GL_INTENSITY) + return BRW_SURFACEFORMAT_I24X8_UNORM; + else if (depth_mode == GL_ALPHA) + return BRW_SURFACEFORMAT_A24X8_UNORM; + else + return BRW_SURFACEFORMAT_L24X8_UNORM; + + case MESA_FORMAT_DUDV8: + return BRW_SURFACEFORMAT_R8G8_SNORM; + + case MESA_FORMAT_SIGNED_RGBA8888_REV: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + default: + assert(0); + return 0; + } +} + +static void +brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) +{ + switch (tiling) { + case I915_TILING_NONE: + surf->ss3.tiled_surface = 0; + surf->ss3.tile_walk = 0; + break; + case I915_TILING_X: + surf->ss3.tiled_surface = 1; + surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR; + break; + case I915_TILING_Y: + surf->ss3.tiled_surface = 1; + surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR; + break; + } +} + +static dri_bo * +brw_create_texture_surface( struct brw_context *brw, + struct brw_surface_key *key ) +{ + struct brw_surface_state surf; + dri_bo *bo; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = translate_tex_target(key->target); + if (key->bo) { + surf.ss0.surface_format = translate_tex_format(key->format, + key->internal_format, + key->depthmode); + } + else { + switch (key->depth) { + case 32: + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + break; + default: + case 24: + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM; + break; + case 16: + surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + break; + } + } + + /* This is ok for all textures with channel width 8bit or less: + */ +/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + if (key->bo) + surf.ss1.base_addr = key->bo->offset; /* reloc */ + else + surf.ss1.base_addr = key->offset; + + surf.ss2.mip_count = key->last_level - key->first_level; + surf.ss2.width = key->width - 1; + surf.ss2.height = key->height - 1; + brw_set_surface_tiling(&surf, key->tiling); + surf.ss3.pitch = (key->pitch * key->cpp) - 1; + surf.ss3.depth = key->depth - 1; + + surf.ss4.min_lod = 0; + + if (key->target == GL_TEXTURE_CUBE_MAP) { + surf.ss0.cube_pos_x = 1; + surf.ss0.cube_pos_y = 1; + surf.ss0.cube_pos_z = 1; + surf.ss0.cube_neg_x = 1; + surf.ss0.cube_neg_y = 1; + surf.ss0.cube_neg_z = 1; + } + + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, key->bo ? 1 : 0, + &surf, sizeof(surf), + NULL, NULL); + + if (key->bo) { + /* Emit relocation to surface contents */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + } + return bo; +} + +static void +brw_update_texture_surface( GLcontext *ctx, GLuint unit ) +{ + struct brw_context *brw = brw_context(ctx); + struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; + struct brw_surface_key key; + const GLuint surf = SURF_INDEX_TEXTURE(unit); + + memset(&key, 0, sizeof(key)); + + if (intelObj->imageOverride) { + key.pitch = intelObj->pitchOverride / intelObj->mt->cpp; + key.depth = intelObj->depthOverride; + key.bo = NULL; + key.offset = intelObj->textureOffset; + } else { + key.format = firstImage->TexFormat->MesaFormat; + key.internal_format = firstImage->InternalFormat; + key.pitch = intelObj->mt->pitch; + key.depth = firstImage->Depth; + key.bo = intelObj->mt->region->buffer; + key.offset = 0; + } + + key.target = tObj->Target; + key.depthmode = tObj->DepthMode; + key.first_level = intelObj->firstLevel; + key.last_level = intelObj->lastLevel; + key.width = firstImage->Width; + key.height = firstImage->Height; + key.cpp = intelObj->mt->cpp; + key.tiling = intelObj->mt->region->tiling; + + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); + } +} + + + +/** + * Create the constant buffer surface. Vertex/fragment shader constants will be + * read from this buffer with Data Port Read instructions/messages. + */ +dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_surface_key *key ) +{ + const GLint w = key->width - 1; + struct brw_surface_state surf; + dri_bo *bo; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = BRW_SURFACE_BUFFER; + surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + assert(key->bo); + if (key->bo) + surf.ss1.base_addr = key->bo->offset; /* reloc */ + else + surf.ss1.base_addr = key->offset; + + surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ + surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ + surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ + surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ + + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, key->bo ? 1 : 0, + &surf, sizeof(surf), + NULL, NULL); + + if (key->bo) { + /* Emit relocation to surface contents */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + } + + return bo; +} + +/* Creates a new WM constant buffer reflecting the current fragment program's + * constants, if needed by the fragment program. + * + * Otherwise, constants go through the CURBEs using the brw_constant_buffer + * state atom. + */ +static drm_intel_bo * +brw_wm_update_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = fp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + drm_intel_bo *const_buffer; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + if (!fp->use_const_buffer) + return NULL; + + const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", + size, 64); + + /* _NEW_PROGRAM_CONSTANTS */ + dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + + return const_buffer; +} + +/** + * Update the surface state for a WM constant buffer. + * The constant buffer will be (re)allocated here if needed. + */ +static void +brw_update_wm_constant_surface( GLcontext *ctx, + GLuint surf) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = + fp->program.Base.Parameters; + + /* If we're in this state update atom, we need to update WM constants, so + * free the old buffer and create a new one for the new contents. + */ + dri_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + return; + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = fp->const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; +} + +/** + * Updates surface / buffer for fragment shader constant buffer, if + * one is required. + * + * This consumes the state updates for the constant buffer, and produces + * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for + * inclusion in the binding table. + */ +static void prepare_wm_constant_surface(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; + + drm_intel_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + if (brw->wm.surf_bo[surf] != NULL) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + } + return; + } + + brw_update_wm_constant_surface(ctx, surf); +} + +const struct brw_tracked_state brw_wm_constant_surface = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_FRAGMENT_PROGRAM), + .cache = 0 + }, + .prepare = prepare_wm_constant_surface, +}; + + +/** + * Sets up a surface state structure to point at the given region. + * While it is only used for the front/back buffer currently, it should be + * usable for further buffers when doing ARB_draw_buffer support. + */ +static void +brw_update_renderbuffer_surface(struct brw_context *brw, + struct gl_renderbuffer *rb, + unsigned int unit) +{ + GLcontext *ctx = &brw->intel.ctx; + dri_bo *region_bo = NULL; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_region *region = irb ? irb->region : NULL; + struct { + unsigned int surface_type; + unsigned int surface_format; + unsigned int width, height, pitch, cpp; + GLubyte color_mask[4]; + GLboolean color_blend; + uint32_t tiling; + uint32_t draw_offset; + } key; + + memset(&key, 0, sizeof(key)); + + if (region != NULL) { + region_bo = region->buffer; + + key.surface_type = BRW_SURFACE_2D; + switch (irb->texformat->MesaFormat) { + case MESA_FORMAT_ARGB8888: + key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + break; + case MESA_FORMAT_RGB565: + key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + break; + case MESA_FORMAT_ARGB1555: + key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + break; + case MESA_FORMAT_ARGB4444: + key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + break; + default: + _mesa_problem(ctx, "Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + } + key.tiling = region->tiling; + if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) { + key.width = rb->Width; + key.height = rb->Height; + } else { + key.width = region->width; + key.height = region->height; + } + key.pitch = region->pitch; + key.cpp = region->cpp; + key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */ + } else { + key.surface_type = BRW_SURFACE_NULL; + key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + key.tiling = I915_TILING_X; + key.width = 1; + key.height = 1; + key.cpp = 4; + key.draw_offset = 0; + } + memcpy(key.color_mask, ctx->Color.ColorMask, + sizeof(key.color_mask)); + key.color_blend = (!ctx->Color._LogicOpEnabled && + ctx->Color.BlendEnabled); + + dri_bo_unreference(brw->wm.surf_bo[unit]); + brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + NULL); + + if (brw->wm.surf_bo[unit] == NULL) { + struct brw_surface_state surf; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.surface_format = key.surface_format; + surf.ss0.surface_type = key.surface_type; + if (key.tiling == I915_TILING_NONE) { + surf.ss1.base_addr = key.draw_offset; + } else { + uint32_t tile_offset = key.draw_offset % 4096; + + surf.ss1.base_addr = key.draw_offset - tile_offset; + + assert(BRW_IS_G4X(brw) || tile_offset == 0); + if (BRW_IS_G4X(brw)) { + if (key.tiling == I915_TILING_X) { + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 512 / 2; + } else { + surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 128 / 2; + } + } + } + if (region_bo != NULL) + surf.ss1.base_addr += region_bo->offset; /* reloc */ + + surf.ss2.width = key.width - 1; + surf.ss2.height = key.height - 1; + brw_set_surface_tiling(&surf, key.tiling); + surf.ss3.pitch = (key.pitch * key.cpp) - 1; + + /* _NEW_COLOR */ + surf.ss0.color_blend = key.color_blend; + surf.ss0.writedisable_red = !key.color_mask[0]; + surf.ss0.writedisable_green = !key.color_mask[1]; + surf.ss0.writedisable_blue = !key.color_mask[2]; + surf.ss0.writedisable_alpha = !key.color_mask[3]; + + /* Key size will never match key size for textures, so we're safe. */ + brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + &surf, sizeof(surf), + NULL, NULL); + if (region_bo != NULL) { + /* We might sample from it, and we might render to it, so flag + * them both. We might be able to figure out from other state + * a more restrictive relocation to emit. + */ + drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], + offsetof(struct brw_surface_state, ss1), + region_bo, + surf.ss1.base_addr - region_bo->offset, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); + } + } +} + + +/** + * Constructs the binding table for the WM surface state, which maps unit + * numbers to surface state objects. + */ +static dri_bo * +brw_wm_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); + + bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->wm.surf_bo, brw->wm.nr_surfaces, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint); + uint32_t data[BRW_WM_MAX_SURF]; + int i; + + for (i = 0; i < brw->wm.nr_surfaces; i++) + if (brw->wm.surf_bo[i]) + data[i] = brw->wm.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->wm.surf_bo, brw->wm.nr_surfaces, + data, data_size, + NULL, NULL); + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_WM_MAX_SURF; i++) { + if (brw->wm.surf_bo[i] != NULL) { + dri_bo_emit_reloc(bind_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + i * sizeof(GLuint), + brw->wm.surf_bo[i]); + } + } + } + + return bind_bo; +} + +static void prepare_wm_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + GLuint i; + int old_nr_surfaces; + + /* _NEW_BUFFERS */ + /* Update surfaces for drawing buffers */ + if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + brw_update_renderbuffer_surface(brw, + ctx->DrawBuffer->_ColorDrawBuffers[i], + i); + } + } else { + brw_update_renderbuffer_surface(brw, NULL, 0); + } + + old_nr_surfaces = brw->wm.nr_surfaces; + brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; + + if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) + brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; + + /* Update surfaces for textures */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; + const GLuint surf = SURF_INDEX_TEXTURE(i); + + /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ + if (texUnit->_ReallyEnabled) { + brw_update_texture_surface(ctx, i); + brw->wm.nr_surfaces = surf + 1; + } else { + dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + } + } + + dri_bo_unreference(brw->wm.bind_bo); + brw->wm.bind_bo = brw_wm_get_binding_table(brw); + + if (brw->wm.nr_surfaces != old_nr_surfaces) + brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; +} + +const struct brw_tracked_state brw_wm_surfaces = { + .dirty = { + .mesa = (_NEW_COLOR | + _NEW_TEXTURE | + _NEW_BUFFERS), + .brw = (BRW_NEW_CONTEXT | + BRW_NEW_WM_SURFACES), + .cache = 0 + }, + .prepare = prepare_wm_surfaces, +}; + + + diff --git a/src/gallium/drivers/i965/intel_batchbuffer.h b/src/gallium/drivers/i965/intel_batchbuffer.h new file mode 100644 index 0000000000..d4899aab7f --- /dev/null +++ b/src/gallium/drivers/i965/intel_batchbuffer.h @@ -0,0 +1,184 @@ +#ifndef INTEL_BATCHBUFFER_H +#define INTEL_BATCHBUFFER_H + +#include "main/mtypes.h" + +#include "intel_context.h" +#include "intel_bufmgr.h" +#include "intel_reg.h" + +#define BATCH_SZ 16384 +#define BATCH_RESERVED 16 + +enum cliprect_mode { + /** + * Batchbuffer contents may be looped over per cliprect, but do not + * require it. + */ + IGNORE_CLIPRECTS, + /** + * Batchbuffer contents require looping over per cliprect at batch submit + * time. + * + * This will be upgraded to NO_LOOP_CLIPRECTS when there's a single + * constant cliprect, as in DRI2 or FBO rendering. + */ + LOOP_CLIPRECTS, + /** + * Batchbuffer contents contain drawing that should not be executed multiple + * times. + */ + NO_LOOP_CLIPRECTS, + /** + * Batchbuffer contents contain drawing that already handles cliprects, such + * as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE. + * + * Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch + * outside of LOCK/UNLOCK. This is upgraded to just NO_LOOP_CLIPRECTS when + * there's a constant cliprect, as in DRI2 or FBO rendering. + */ + REFERENCES_CLIPRECTS +}; + +struct intel_batchbuffer +{ + struct intel_context *intel; + + dri_bo *buf; + + GLubyte *buffer; + + GLubyte *map; + GLubyte *ptr; + + enum cliprect_mode cliprect_mode; + + GLuint size; + + /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */ + struct { + GLuint total; + GLubyte *start_ptr; + } emit; + + GLuint dirty_state; +}; + +struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context + *intel); + +void intel_batchbuffer_free(struct intel_batchbuffer *batch); + + +void _intel_batchbuffer_flush(struct intel_batchbuffer *batch, + const char *file, int line); + +#define intel_batchbuffer_flush(batch) \ + _intel_batchbuffer_flush(batch, __FILE__, __LINE__) + +void intel_batchbuffer_reset(struct intel_batchbuffer *batch); + + +/* Unlike bmBufferData, this currently requires the buffer be mapped. + * Consider it a convenience function wrapping multple + * intel_buffer_dword() calls. + */ +void intel_batchbuffer_data(struct intel_batchbuffer *batch, + const void *data, GLuint bytes, + enum cliprect_mode cliprect_mode); + +void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, + GLuint bytes); + +GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, + dri_bo *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); + +/* Inline functions - might actually be better off with these + * non-inlined. Certainly better off switching all command packets to + * be passed as structs rather than dwords, but that's a little bit of + * work... + */ +static INLINE GLint +intel_batchbuffer_space(struct intel_batchbuffer *batch) +{ + return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); +} + + +static INLINE void +intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword) +{ + assert(batch->map); + assert(intel_batchbuffer_space(batch) >= 4); + *(GLuint *) (batch->ptr) = dword; + batch->ptr += 4; +} + +static INLINE void +intel_batchbuffer_require_space(struct intel_batchbuffer *batch, + GLuint sz, + enum cliprect_mode cliprect_mode) +{ + assert(sz < batch->size - 8); + if (intel_batchbuffer_space(batch) < sz) + intel_batchbuffer_flush(batch); + + if ((cliprect_mode == LOOP_CLIPRECTS || + cliprect_mode == REFERENCES_CLIPRECTS) && + batch->intel->constant_cliprect) + cliprect_mode = NO_LOOP_CLIPRECTS; + + if (cliprect_mode != IGNORE_CLIPRECTS) { + if (batch->cliprect_mode == IGNORE_CLIPRECTS) { + batch->cliprect_mode = cliprect_mode; + } else { + if (batch->cliprect_mode != cliprect_mode) { + intel_batchbuffer_flush(batch); + batch->cliprect_mode = cliprect_mode; + } + } + } +} + +/* Here are the crusty old macros, to be removed: + */ +#define BATCH_LOCALS + +#define BEGIN_BATCH(n, cliprect_mode) do { \ + intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \ + assert(intel->batch->emit.start_ptr == NULL); \ + intel->batch->emit.total = (n) * 4; \ + intel->batch->emit.start_ptr = intel->batch->ptr; \ +} while (0) + +#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) + +#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ + assert((unsigned) (delta) < buf->size); \ + intel_batchbuffer_emit_reloc(intel->batch, buf, \ + read_domains, write_domain, delta); \ +} while (0) + +#define ADVANCE_BATCH() do { \ + unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \ + assert(intel->batch->emit.start_ptr != NULL); \ + if (_n != intel->batch->emit.total) { \ + fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", \ + _n, intel->batch->emit.total); \ + abort(); \ + } \ + intel->batch->emit.start_ptr = NULL; \ +} while(0) + + +static INLINE void +intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) +{ + intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS); + intel_batchbuffer_emit_dword(batch, MI_FLUSH); +} + +#endif diff --git a/src/gallium/drivers/i965/intel_chipset.h b/src/gallium/drivers/i965/intel_chipset.h new file mode 100644 index 0000000000..3dc8653a73 --- /dev/null +++ b/src/gallium/drivers/i965/intel_chipset.h @@ -0,0 +1,118 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +#define PCI_CHIP_I810 0x7121 +#define PCI_CHIP_I810_DC100 0x7123 +#define PCI_CHIP_I810_E 0x7125 +#define PCI_CHIP_I815 0x1132 + +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_E7221_G 0x258A +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE + +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q33_G 0x29D2 + +#define PCI_CHIP_IGD_GM 0xA011 +#define PCI_CHIP_IGD_G 0xA001 + +#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM) +#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G) +#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid)) + +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + +#define PCI_CHIP_GM45_GM 0x2A42 + +#define PCI_CHIP_IGD_E_G 0x2E02 +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_G45_G 0x2E22 +#define PCI_CHIP_G41_G 0x2E32 +#define PCI_CHIP_B43_G 0x2E42 + +#define PCI_CHIP_ILD_G 0x0042 +#define PCI_CHIP_ILM_G 0x0046 + +#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ + devid == PCI_CHIP_I915_GM || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_GM45_GM || \ + IS_IGD(devid) || \ + devid == PCI_CHIP_ILM_G) + +#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ + devid == PCI_CHIP_Q45_G || \ + devid == PCI_CHIP_G45_G || \ + devid == PCI_CHIP_G41_G || \ + devid == PCI_CHIP_B43_G) +#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) +#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) + +#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G) +#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G) +#define IS_IGDNG(devid) (IS_ILD(devid) || IS_ILM(devid)) + +#define IS_915(devid) (devid == PCI_CHIP_I915_G || \ + devid == PCI_CHIP_E7221_G || \ + devid == PCI_CHIP_I915_GM) + +#define IS_945(devid) (devid == PCI_CHIP_I945_G || \ + devid == PCI_CHIP_I945_GM || \ + devid == PCI_CHIP_I945_GME || \ + devid == PCI_CHIP_G33_G || \ + devid == PCI_CHIP_Q33_G || \ + devid == PCI_CHIP_Q35_G || IS_IGD(devid)) + +#define IS_965(devid) (devid == PCI_CHIP_I965_G || \ + devid == PCI_CHIP_I965_Q || \ + devid == PCI_CHIP_I965_G_1 || \ + devid == PCI_CHIP_I965_GM || \ + devid == PCI_CHIP_I965_GME || \ + devid == PCI_CHIP_I946_GZ || \ + IS_G4X(devid) || \ + IS_IGDNG(devid)) + +#define IS_9XX(devid) (IS_915(devid) || \ + IS_945(devid) || \ + IS_965(devid)) diff --git a/src/gallium/drivers/i965/intel_structs.h b/src/gallium/drivers/i965/intel_structs.h new file mode 100644 index 0000000000..522e3bd92c --- /dev/null +++ b/src/gallium/drivers/i965/intel_structs.h @@ -0,0 +1,132 @@ +#ifndef INTEL_STRUCTS_H +#define INTEL_STRUCTS_H + +struct br0 { + GLuint length:8; + GLuint pad0:3; + GLuint dst_tiled:1; + GLuint pad1:8; + GLuint write_rgb:1; + GLuint write_alpha:1; + GLuint opcode:7; + GLuint client:3; +}; + + +struct br13 { + GLint dest_pitch:16; + GLuint rop:8; + GLuint color_depth:2; + GLuint pad1:3; + GLuint mono_source_transparency:1; + GLuint clipping_enable:1; + GLuint pad0:1; +}; + + + +/* This is an attempt to move some of the 2D interaction in this + * driver to using structs for packets rather than a bunch of #defines + * and dwords. + */ +struct xy_color_blit { + struct br0 br0; + struct br13 br13; + + struct { + GLuint dest_x1:16; + GLuint dest_y1:16; + } dw2; + + struct { + GLuint dest_x2:16; + GLuint dest_y2:16; + } dw3; + + GLuint dest_base_addr; + GLuint color; +}; + +struct xy_src_copy_blit { + struct br0 br0; + struct br13 br13; + + struct { + GLuint dest_x1:16; + GLuint dest_y1:16; + } dw2; + + struct { + GLuint dest_x2:16; + GLuint dest_y2:16; + } dw3; + + GLuint dest_base_addr; + + struct { + GLuint src_x1:16; + GLuint src_y1:16; + } dw5; + + struct { + GLint src_pitch:16; + GLuint pad:16; + } dw6; + + GLuint src_base_addr; +}; + +struct xy_setup_blit { + struct br0 br0; + struct br13 br13; + + struct { + GLuint clip_x1:16; + GLuint clip_y1:16; + } dw2; + + struct { + GLuint clip_x2:16; + GLuint clip_y2:16; + } dw3; + + GLuint dest_base_addr; + GLuint background_color; + GLuint foreground_color; + GLuint pattern_base_addr; +}; + + +struct xy_text_immediate_blit { + struct { + GLuint length:8; + GLuint pad2:3; + GLuint dst_tiled:1; + GLuint pad1:4; + GLuint byte_packed:1; + GLuint pad0:5; + GLuint opcode:7; + GLuint client:3; + } dw0; + + struct { + GLuint dest_x1:16; + GLuint dest_y1:16; + } dw1; + + struct { + GLuint dest_x2:16; + GLuint dest_y2:16; + } dw2; + + /* Src bitmap data follows as inline dwords. + */ +}; + + +#define CLIENT_2D 0x2 +#define OPCODE_XY_SETUP_BLT 0x1 +#define OPCODE_XY_COLOR_BLT 0x50 +#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31 + +#endif diff --git a/src/gallium/drivers/i965/intel_tex_format.c b/src/gallium/drivers/i965/intel_tex_format.c new file mode 100644 index 0000000000..3322a71130 --- /dev/null +++ b/src/gallium/drivers/i965/intel_tex_format.c @@ -0,0 +1,225 @@ +#include "intel_context.h" +#include "intel_tex.h" +#include "intel_chipset.h" +#include "main/texformat.h" +#include "main/enums.h" + + +/** + * Choose hardware texture format given the user's glTexImage parameters. + * + * It works out that this function is fine for all the supported + * hardware. However, there is still a need to map the formats onto + * hardware descriptors. + * + * Note that the i915 can actually support many more formats than + * these if we take the step of simply swizzling the colors + * immediately after sampling... + */ +const struct gl_texture_format * +intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, + GLenum format, GLenum type) +{ + struct intel_context *intel = intel_context(ctx); + const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24); + +#if 0 + printf("%s intFmt=0x%x format=0x%x type=0x%x\n", + __FUNCTION__, internalFormat, format, type); +#endif + + switch (internalFormat) { + case 4: + case GL_RGBA: + case GL_COMPRESSED_RGBA: + if (format == GL_BGRA) { + if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) { + return &_mesa_texformat_argb8888; + } + else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) { + return &_mesa_texformat_argb4444; + } + else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) { + return &_mesa_texformat_argb1555; + } + } + return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444; + + case 3: + case GL_RGB: + case GL_COMPRESSED_RGB: + if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) { + return &_mesa_texformat_rgb565; + } + return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565; + + case GL_RGBA8: + case GL_RGB10_A2: + case GL_RGBA12: + case GL_RGBA16: + return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444; + + case GL_RGBA4: + case GL_RGBA2: + return &_mesa_texformat_argb4444; + + case GL_RGB5_A1: + return &_mesa_texformat_argb1555; + + case GL_RGB8: + case GL_RGB10: + case GL_RGB12: + case GL_RGB16: + return &_mesa_texformat_argb8888; + + case GL_RGB5: + case GL_RGB4: + case GL_R3_G3_B2: + return &_mesa_texformat_rgb565; + + case GL_ALPHA: + case GL_ALPHA4: + case GL_ALPHA8: + case GL_ALPHA12: + case GL_ALPHA16: + case GL_COMPRESSED_ALPHA: + return &_mesa_texformat_a8; + + case 1: + case GL_LUMINANCE: + case GL_LUMINANCE4: + case GL_LUMINANCE8: + case GL_LUMINANCE12: + case GL_LUMINANCE16: + case GL_COMPRESSED_LUMINANCE: + return &_mesa_texformat_l8; + + case 2: + case GL_LUMINANCE_ALPHA: + case GL_LUMINANCE4_ALPHA4: + case GL_LUMINANCE6_ALPHA2: + case GL_LUMINANCE8_ALPHA8: + case GL_LUMINANCE12_ALPHA4: + case GL_LUMINANCE12_ALPHA12: + case GL_LUMINANCE16_ALPHA16: + case GL_COMPRESSED_LUMINANCE_ALPHA: + return &_mesa_texformat_al88; + + case GL_INTENSITY: + case GL_INTENSITY4: + case GL_INTENSITY8: + case GL_INTENSITY12: + case GL_INTENSITY16: + case GL_COMPRESSED_INTENSITY: + return &_mesa_texformat_i8; + + case GL_YCBCR_MESA: + if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE) + return &_mesa_texformat_ycbcr; + else + return &_mesa_texformat_ycbcr_rev; + + case GL_COMPRESSED_RGB_FXT1_3DFX: + return &_mesa_texformat_rgb_fxt1; + case GL_COMPRESSED_RGBA_FXT1_3DFX: + return &_mesa_texformat_rgba_fxt1; + + case GL_RGB_S3TC: + case GL_RGB4_S3TC: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + return &_mesa_texformat_rgb_dxt1; + + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + return &_mesa_texformat_rgba_dxt1; + + case GL_RGBA_S3TC: + case GL_RGBA4_S3TC: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + return &_mesa_texformat_rgba_dxt3; + + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + return &_mesa_texformat_rgba_dxt5; + + case GL_DEPTH_COMPONENT: + case GL_DEPTH_COMPONENT16: + case GL_DEPTH_COMPONENT24: + case GL_DEPTH_COMPONENT32: +#if 0 + return &_mesa_texformat_z16; +#else + /* fall-through. + * 16bpp depth texture can't be paired with a stencil buffer so + * always used combined depth/stencil format. + */ +#endif + case GL_DEPTH_STENCIL_EXT: + case GL_DEPTH24_STENCIL8_EXT: + return &_mesa_texformat_s8_z24; + +#ifndef I915 + case GL_SRGB_EXT: + case GL_SRGB8_EXT: + case GL_SRGB_ALPHA_EXT: + case GL_SRGB8_ALPHA8_EXT: + case GL_COMPRESSED_SRGB_EXT: + case GL_COMPRESSED_SRGB_ALPHA_EXT: + case GL_COMPRESSED_SLUMINANCE_EXT: + case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT: + return &_mesa_texformat_sargb8; + case GL_SLUMINANCE_EXT: + case GL_SLUMINANCE8_EXT: + if (IS_G4X(intel->intelScreen->deviceID)) + return &_mesa_texformat_sl8; + else + return &_mesa_texformat_sargb8; + case GL_SLUMINANCE_ALPHA_EXT: + case GL_SLUMINANCE8_ALPHA8_EXT: + if (IS_G4X(intel->intelScreen->deviceID)) + return &_mesa_texformat_sla8; + else + return &_mesa_texformat_sargb8; + case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: + return &_mesa_texformat_srgb_dxt1; + + /* i915 could also do this */ + case GL_DUDV_ATI: + case GL_DU8DV8_ATI: + return &_mesa_texformat_dudv8; + case GL_RGBA_SNORM: + case GL_RGBA8_SNORM: + return &_mesa_texformat_signed_rgba8888_rev; +#endif + + default: + fprintf(stderr, "unexpected texture format %s in %s\n", + _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__); + return NULL; + } + + return NULL; /* never get here */ +} + +int intel_compressed_num_bytes(GLuint mesaFormat) +{ + int bytes = 0; + switch(mesaFormat) { + + case MESA_FORMAT_RGB_FXT1: + case MESA_FORMAT_RGBA_FXT1: + case MESA_FORMAT_RGB_DXT1: + case MESA_FORMAT_RGBA_DXT1: + bytes = 2; + break; + + case MESA_FORMAT_RGBA_DXT3: + case MESA_FORMAT_RGBA_DXT5: + bytes = 4; + default: + break; + } + + return bytes; +} diff --git a/src/gallium/drivers/i965/intel_tex_layout.c b/src/gallium/drivers/i965/intel_tex_layout.c new file mode 100644 index 0000000000..7d69ea4484 --- /dev/null +++ b/src/gallium/drivers/i965/intel_tex_layout.c @@ -0,0 +1,140 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell + * Michel Dänzer + */ + +#include "intel_mipmap_tree.h" +#include "intel_tex_layout.h" +#include "intel_context.h" +#include "main/macros.h" + +void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h) +{ + switch (internalFormat) { + case GL_COMPRESSED_RGB_FXT1_3DFX: + case GL_COMPRESSED_RGBA_FXT1_3DFX: + *w = 8; + *h = 4; + break; + + case GL_RGB_S3TC: + case GL_RGB4_S3TC: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + case GL_RGBA_S3TC: + case GL_RGBA4_S3TC: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + *w = 4; + *h = 4; + break; + + default: + *w = 4; + *h = 2; + break; + } +} + +void i945_miptree_layout_2d( struct intel_context *intel, + struct intel_mipmap_tree *mt, + uint32_t tiling ) +{ + GLuint align_h = 2, align_w = 4; + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = mt->width0; + GLuint height = mt->height0; + + mt->pitch = mt->width0; + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + + if (mt->compressed) { + mt->pitch = ALIGN(mt->width0, align_w); + } + + /* May need to adjust pitch to accomodate the placement of + * the 2nd mipmap. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap out past the width of its parent. + */ + if (mt->first_level != mt->last_level) { + GLuint mip1_width; + + if (mt->compressed) { + mip1_width = ALIGN(minify(mt->width0), align_w) + + ALIGN(minify(minify(mt->width0)), align_w); + } else { + mip1_width = ALIGN(minify(mt->width0), align_w) + + minify(minify(mt->width0)); + } + + if (mip1_width > mt->pitch) { + mt->pitch = mip1_width; + } + } + + /* Pitch must be a whole number of dwords, even though we + * express it in texels. + */ + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch); + mt->total_height = 0; + + for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { + GLuint img_height; + + intel_miptree_set_level_info(mt, level, 1, x, y, width, + height, 1); + + if (mt->compressed) + img_height = MAX2(1, height/4); + else + img_height = ALIGN(height, align_h); + + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + mt->total_height = MAX2(mt->total_height, y + img_height); + + /* Layout_below: step right after second mipmap. + */ + if (level == mt->first_level + 1) { + x += ALIGN(width, align_w); + } + else { + y += img_height; + } + + width = minify(width); + height = minify(height); + } +} -- cgit v1.2.3 From 57a920cb1a0b6051068e730747b3fb475de88aca Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 23 Oct 2009 17:01:32 +0100 Subject: i965g: wip --- src/gallium/drivers/i965/brw_bo.c | 12 + src/gallium/drivers/i965/brw_cc.c | 180 +---- src/gallium/drivers/i965/brw_clip.c | 127 +-- src/gallium/drivers/i965/brw_clip.h | 5 +- src/gallium/drivers/i965/brw_clip_line.c | 7 - src/gallium/drivers/i965/brw_clip_point.c | 7 - src/gallium/drivers/i965/brw_clip_state.c | 7 +- src/gallium/drivers/i965/brw_clip_tri.c | 7 - src/gallium/drivers/i965/brw_clip_unfilled.c | 5 - src/gallium/drivers/i965/brw_clip_util.c | 7 - src/gallium/drivers/i965/brw_context.c | 135 ++-- src/gallium/drivers/i965/brw_context.h | 7 +- src/gallium/drivers/i965/brw_curbe.c | 89 +-- src/gallium/drivers/i965/brw_defines.h | 4 +- src/gallium/drivers/i965/brw_disasm.c | 2 - src/gallium/drivers/i965/brw_draw.c | 244 +----- src/gallium/drivers/i965/brw_draw_upload.c | 566 ++++--------- src/gallium/drivers/i965/brw_gs.c | 2 +- src/gallium/drivers/i965/brw_pipe_blend.c | 41 + src/gallium/drivers/i965/brw_pipe_debug.c | 2 + src/gallium/drivers/i965/brw_pipe_depth.c | 52 ++ src/gallium/drivers/i965/brw_pipe_fb.c | 25 + src/gallium/drivers/i965/brw_pipe_flush.c | 64 ++ src/gallium/drivers/i965/brw_screen_surface.c | 27 + src/gallium/drivers/i965/brw_sf.c | 4 +- src/gallium/drivers/i965/brw_sf_emit.c | 4 +- src/gallium/drivers/i965/brw_state_upload.c | 63 +- src/gallium/drivers/i965/brw_swtnl.c | 114 +++ src/gallium/drivers/i965/brw_types.h | 11 + src/gallium/drivers/i965/brw_util.c | 8 - src/gallium/drivers/i965/brw_vs.c | 12 +- src/gallium/drivers/i965/brw_vs_emit.c | 250 ++---- src/gallium/drivers/i965/brw_wm.c | 59 +- src/gallium/drivers/i965/brw_wm.h | 1 - src/gallium/drivers/i965/brw_wm_emit.c | 17 +- src/gallium/drivers/i965/brw_wm_fp.c | 193 ++--- src/gallium/drivers/i965/brw_wm_glsl.c | 1060 +------------------------ src/gallium/drivers/i965/brw_wm_pass0.c | 1 - src/gallium/drivers/i965/brw_wm_pass1.c | 81 +- src/gallium/drivers/i965/intel_chipset.h | 4 +- 40 files changed, 907 insertions(+), 2599 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_bo.c create mode 100644 src/gallium/drivers/i965/brw_pipe_blend.c create mode 100644 src/gallium/drivers/i965/brw_pipe_debug.c create mode 100644 src/gallium/drivers/i965/brw_pipe_depth.c create mode 100644 src/gallium/drivers/i965/brw_pipe_fb.c create mode 100644 src/gallium/drivers/i965/brw_pipe_flush.c create mode 100644 src/gallium/drivers/i965/brw_screen_surface.c create mode 100644 src/gallium/drivers/i965/brw_swtnl.c create mode 100644 src/gallium/drivers/i965/brw_types.h (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_bo.c b/src/gallium/drivers/i965/brw_bo.c new file mode 100644 index 0000000000..e7a4dac666 --- /dev/null +++ b/src/gallium/drivers/i965/brw_bo.c @@ -0,0 +1,12 @@ + + +void brw_buffer_subdata() +{ + if (intel->intelScreen->kernel_exec_fencing) { + drm_intel_gem_bo_map_gtt(bo); + memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); + drm_intel_gem_bo_unmap_gtt(bo); + } else { + dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } +} diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index 1088a7a607..9ab5638137 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -62,84 +62,21 @@ const struct brw_tracked_state brw_cc_vp = { }; struct brw_cc_unit_key { - GLboolean stencil, stencil_two_side, color_blend, alpha_enabled; - - GLenum stencil_func[2], stencil_fail_op[2]; - GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2]; - GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2]; - GLenum logic_op; - - GLenum blend_eq_rgb, blend_eq_a; - GLenum blend_src_rgb, blend_src_a; - GLenum blend_dst_rgb, blend_dst_a; - - GLenum alpha_func; - GLclampf alpha_ref; - - GLboolean dither; - - GLboolean depth_test, depth_write; - GLenum depth_func; + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_blend_state blend; /* no color mask */ }; static void cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) { - GLcontext *ctx = &brw->intel.ctx; - const unsigned back = ctx->Stencil._BackFace; - memset(key, 0, sizeof(*key)); + + key->dsa = brw->curr.dsa.base; + key->blend = brw->curr.blend.base; - key->stencil = ctx->Stencil._Enabled; - key->stencil_two_side = ctx->Stencil._TestTwoSide; - - if (key->stencil) { - key->stencil_func[0] = ctx->Stencil.Function[0]; - key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0]; - key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0]; - key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0]; - key->stencil_ref[0] = ctx->Stencil.Ref[0]; - key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0]; - key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0]; - } - if (key->stencil_two_side) { - key->stencil_func[1] = ctx->Stencil.Function[back]; - key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back]; - key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back]; - key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back]; - key->stencil_ref[1] = ctx->Stencil.Ref[back]; - key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back]; - key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back]; - } - - if (ctx->Color._LogicOpEnabled) - key->logic_op = ctx->Color.LogicOp; - else - key->logic_op = GL_COPY; - - key->color_blend = ctx->Color.BlendEnabled; - if (key->color_blend) { - key->blend_eq_rgb = ctx->Color.BlendEquationRGB; - key->blend_eq_a = ctx->Color.BlendEquationA; - key->blend_src_rgb = ctx->Color.BlendSrcRGB; - key->blend_dst_rgb = ctx->Color.BlendDstRGB; - key->blend_src_a = ctx->Color.BlendSrcA; - key->blend_dst_a = ctx->Color.BlendDstA; - } - - key->alpha_enabled = ctx->Color.AlphaEnabled; - if (key->alpha_enabled) { - key->alpha_func = ctx->Color.AlphaFunc; - key->alpha_ref = ctx->Color.AlphaRef; - } - - key->dither = ctx->Color.DitherFlag; - - key->depth_test = ctx->Depth.Test; - if (key->depth_test) { - key->depth_func = ctx->Depth.Func; - key->depth_write = ctx->Depth.Mask; - } + /* Clear non-respected values: + */ + key->blend.colormask = 0xf; } /** @@ -153,103 +90,16 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) memset(&cc, 0, sizeof(cc)); - /* _NEW_STENCIL */ - if (key->stencil) { - cc.cc0.stencil_enable = 1; - cc.cc0.stencil_func = - intel_translate_compare_func(key->stencil_func[0]); - cc.cc0.stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[0]); - cc.cc0.stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); - cc.cc0.stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); - cc.cc1.stencil_ref = key->stencil_ref[0]; - cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; - cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; - - if (key->stencil_two_side) { - cc.cc0.bf_stencil_enable = 1; - cc.cc0.bf_stencil_func = - intel_translate_compare_func(key->stencil_func[1]); - cc.cc0.bf_stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[1]); - cc.cc0.bf_stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); - cc.cc0.bf_stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); - cc.cc1.bf_stencil_ref = key->stencil_ref[1]; - cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; - cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; - } - - /* Not really sure about this: - */ - if (key->stencil_write_mask[0] || - (key->stencil_two_side && key->stencil_write_mask[1])) - cc.cc0.stencil_write_enable = 1; - } - - /* _NEW_COLOR */ - if (key->logic_op != GL_COPY) { - cc.cc2.logicop_enable = 1; - cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); - } else if (key->color_blend) { - GLenum eqRGB = key->blend_eq_rgb; - GLenum eqA = key->blend_eq_a; - GLenum srcRGB = key->blend_src_rgb; - GLenum dstRGB = key->blend_dst_rgb; - GLenum srcA = key->blend_src_a; - GLenum dstA = key->blend_dst_a; - - if (eqRGB == GL_MIN || eqRGB == GL_MAX) { - srcRGB = dstRGB = GL_ONE; - } - - if (eqA == GL_MIN || eqA == GL_MAX) { - srcA = dstA = GL_ONE; - } - - cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); - cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); - cc.cc6.blend_function = brw_translate_blend_equation(eqRGB); - - cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); - cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); - cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA); - - cc.cc3.blend_enable = 1; - cc.cc3.ia_blend_enable = (srcA != srcRGB || - dstA != dstRGB || - eqA != eqRGB); - } - - if (key->alpha_enabled) { - cc.cc3.alpha_test = 1; - cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); - cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; - - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); - } - - if (key->dither) { - cc.cc5.dither_enable = 1; - cc.cc6.y_dither_offset = 0; - cc.cc6.x_dither_offset = 0; - } - - /* _NEW_DEPTH */ - if (key->depth_test) { - cc.cc2.depth_test = 1; - cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); - cc.cc2.depth_write_enable = key->depth_write; - } + cc.cc0 = brw->dsa.cc0; + cc.cc1 = brw->dsa.cc1; + cc.cc2 = brw->dsa.cc2; + cc.cc3 = brw->dsa.cc3 | brw->blend.cc3; /* CACHE_NEW_CC_VP */ cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */ - if (INTEL_DEBUG & DEBUG_STATS) - cc.cc5.statistics_enable = 1; + cc.cc5 = brw->blend.cc5 | brw->debug.cc5; + bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, key, sizeof(*key), @@ -286,7 +136,7 @@ static void prepare_cc_unit( struct brw_context *brw ) const struct brw_tracked_state brw_cc_unit = { .dirty = { - .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH, + .mesa = PIPE_NEW_DEPTH_STENCIL_ALPHA | PIPE_NEW_BLEND, .brw = 0, .cache = CACHE_NEW_CC_VP }, diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 20a927cf38..df1b3718d0 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -29,9 +29,9 @@ * Keith Whitwell */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" +#include "pipe/p_state.h" + +#include "util/u_math.h" #include "intel_batchbuffer.h" @@ -83,7 +83,7 @@ static void compile_clip_prog( struct brw_context *brw, delta += ATTR_SIZE; } - c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_attrs = util_count_bits(c.key.attrs); if (BRW_IS_IGDNG(brw)) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ @@ -104,16 +104,16 @@ static void compile_clip_prog( struct brw_context *brw, * do all three: */ switch (key->primitive) { - case GL_TRIANGLES: + case PIPE_PRIM_TRIANGLES: if (key->do_unfilled) brw_emit_unfilled_clip( &c ); else brw_emit_tri_clip( &c ); break; - case GL_LINES: + case PIPE_PRIM_LINES: brw_emit_line_clip( &c ); break; - case GL_POINTS: + case PIPE_PRIM_POINTS: brw_emit_point_clip( &c ); break; default: @@ -143,7 +143,6 @@ static void compile_clip_prog( struct brw_context *brw, */ static void upload_clip_prog(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; struct brw_clip_prog_key key; memset(&key, 0, sizeof(key)); @@ -151,101 +150,51 @@ static void upload_clip_prog(struct brw_context *brw) /* Populate the key: */ /* BRW_NEW_REDUCED_PRIMITIVE */ - key.primitive = brw->intel.reduced_primitive; + key.primitive = brw->reduced_primitive; /* CACHE_NEW_VS_PROG */ key.attrs = brw->vs.prog_data->outputs_written; - /* _NEW_LIGHT */ - key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); - /* _NEW_TRANSFORM */ - key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); + /* PIPE_NEW_RAST */ + key.do_flat_shading = brw->rast.base.flatshade; + /* PIPE_NEW_UCP */ + key.nr_userclip = brw->nr_ucp; if (BRW_IS_IGDNG(brw)) key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; else key.clip_mode = BRW_CLIPMODE_NORMAL; - /* _NEW_POLYGON */ - if (key.primitive == GL_TRIANGLES) { - if (ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) + /* PIPE_NEW_RAST */ + if (key.primitive == PIPE_PRIM_TRIANGLES) { + if (brw->rast->cull_mode = PIPE_WINDING_BOTH) key.clip_mode = BRW_CLIPMODE_REJECT_ALL; else { - GLuint fill_front = CLIP_CULL; - GLuint fill_back = CLIP_CULL; - GLuint offset_front = 0; - GLuint offset_back = 0; - - if (!ctx->Polygon.CullFlag || - ctx->Polygon.CullFaceMode != GL_FRONT) { - switch (ctx->Polygon.FrontMode) { - case GL_FILL: - fill_front = CLIP_FILL; - offset_front = 0; - break; - case GL_LINE: - fill_front = CLIP_LINE; - offset_front = ctx->Polygon.OffsetLine; - break; - case GL_POINT: - fill_front = CLIP_POINT; - offset_front = ctx->Polygon.OffsetPoint; - break; - } + key.fill_ccw = CLIP_CULL; + key.fill_cw = CLIP_CULL; + + if (!(brw->rast->cull_mode & PIPE_WINDING_CCW)) { + key.fill_ccw = translate_fill(brw->rast.fill_ccw); } - if (!ctx->Polygon.CullFlag || - ctx->Polygon.CullFaceMode != GL_BACK) { - switch (ctx->Polygon.BackMode) { - case GL_FILL: - fill_back = CLIP_FILL; - offset_back = 0; - break; - case GL_LINE: - fill_back = CLIP_LINE; - offset_back = ctx->Polygon.OffsetLine; - break; - case GL_POINT: - fill_back = CLIP_POINT; - offset_back = ctx->Polygon.OffsetPoint; - break; - } + if (!(brw->rast->cull_mode & PIPE_WINDING_CW)) { + key.fill_cw = translate_fill(brw->rast.fill_cw); } - if (ctx->Polygon.BackMode != GL_FILL || - ctx->Polygon.FrontMode != GL_FILL) { + if (key.fill_cw != CLIP_FILL || + key.fill_ccw != CLIP_FILL) { key.do_unfilled = 1; - - /* Most cases the fixed function units will handle. Cases where - * one or more polygon faces are unfilled will require help: - */ key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + } + + key.offset_ccw = brw->rast.offset_ccw; + key.offset_cw = brw->rast.offset_cw; + + if (brw->rast.light_twoside && + key.fill_cw != CLIP_CULL) + key.copy_bfc_cw = 1; - if (offset_back || offset_front) { - /* _NEW_POLYGON, _NEW_BUFFERS */ - key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale; - key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD; - } - - switch (ctx->Polygon.FrontFace) { - case GL_CCW: - key.fill_ccw = fill_front; - key.fill_cw = fill_back; - key.offset_ccw = offset_front; - key.offset_cw = offset_back; - if (ctx->Light.Model.TwoSide && - key.fill_cw != CLIP_CULL) - key.copy_bfc_cw = 1; - break; - case GL_CW: - key.fill_cw = fill_front; - key.fill_ccw = fill_back; - key.offset_cw = offset_front; - key.offset_ccw = offset_back; - if (ctx->Light.Model.TwoSide && - key.fill_ccw != CLIP_CULL) - key.copy_bfc_ccw = 1; - break; - } + if (brw->rast.light_twoside && + key.fill_ccw != CLIP_CULL) + key.copy_bfc_ccw = 1; } } } @@ -262,10 +211,8 @@ static void upload_clip_prog(struct brw_context *brw) const struct brw_tracked_state brw_clip_prog = { .dirty = { - .mesa = (_NEW_LIGHT | - _NEW_TRANSFORM | - _NEW_POLYGON | - _NEW_BUFFERS), + .mesa = (PIPE_NEW_RAST | + PIPE_NEW_UCP), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index 957df441ab..d80ec819b9 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -43,6 +43,7 @@ */ struct brw_clip_prog_key { GLuint attrs:32; + GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; @@ -51,12 +52,10 @@ struct brw_clip_prog_key { GLuint fill_ccw:2; /* includes cull information */ GLuint offset_cw:1; GLuint offset_ccw:1; - GLuint pad0:17; - GLuint copy_bfc_cw:1; GLuint copy_bfc_ccw:1; GLuint clip_mode:3; - GLuint pad1:27; + GLuint pad1:12; GLfloat offset_factor; GLfloat offset_units; diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c index 048ca620fa..6b4da25644 100644 --- a/src/gallium/drivers/i965/brw_clip_line.c +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -29,13 +29,6 @@ * Keith Whitwell */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c index 8458f61c5a..b2cf7b2011 100644 --- a/src/gallium/drivers/i965/brw_clip_point.c +++ b/src/gallium/drivers/i965/brw_clip_point.c @@ -29,13 +29,6 @@ * Keith Whitwell */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 234b3744bf..72e27205e2 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -32,7 +32,6 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "main/macros.h" struct brw_clip_unit_key { unsigned int total_grf; @@ -66,8 +65,8 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) key->nr_urb_entries = brw->urb.nr_clip_entries; key->urb_size = brw->urb.vsize; - /* _NEW_TRANSOFORM */ - key->depth_clamp = ctx->Transform.DepthClamp; + /* */ + key->depth_clamp = 0; // XXX: add this to gallium: ctx->Transform.DepthClamp; } static dri_bo * @@ -175,7 +174,7 @@ static void upload_clip_unit( struct brw_context *brw ) const struct brw_tracked_state brw_clip_unit = { .dirty = { - .mesa = _NEW_TRANSFORM, + .mesa = 0, .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c index 0efd77225e..d8feca6a87 100644 --- a/src/gallium/drivers/i965/brw_clip_tri.c +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -29,13 +29,6 @@ * Keith Whitwell */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c index ad1bfa435f..4baff55806 100644 --- a/src/gallium/drivers/i965/brw_clip_unfilled.c +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -29,11 +29,6 @@ * Keith Whitwell */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - #include "intel_batchbuffer.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 5a73abdfee..7a6c46ce07 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -30,13 +30,6 @@ */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/program.h" - -#include "intel_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index c300c33adc..bf0ec89e13 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -52,122 +52,77 @@ #include "utils.h" -/*************************************** - * Mesa's Driver Functions - ***************************************/ - -static void brwUseProgram(GLcontext *ctx, GLuint program) -{ - _mesa_use_program(ctx, program); -} - -static void brwInitProgFuncs( struct dd_function_table *functions ) -{ - functions->UseProgram = brwUseProgram; -} -static void brwInitDriverFunctions( struct dd_function_table *functions ) -{ - intelInitDriverFunctions( functions ); - - brwInitFragProgFuncs( functions ); - brwInitProgFuncs( functions ); - brw_init_queryobj_functions(functions); - - functions->Viewport = intel_viewport; -} GLboolean brwCreateContext( const __GLcontextModes *mesaVis, __DRIcontextPrivate *driContextPriv, void *sharedContextPrivate) { - struct dd_function_table functions; struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); - struct intel_context *intel = &brw->intel; - GLcontext *ctx = &intel->ctx; if (!brw) { - _mesa_printf("%s: failed to alloc context\n", __FUNCTION__); - return GL_FALSE; - } - - brwInitVtbl( brw ); - brwInitDriverFunctions( &functions ); - - if (!intelInitContext( intel, mesaVis, driContextPriv, - sharedContextPrivate, &functions )) { - _mesa_printf("%s: failed to init intel context\n", __FUNCTION__); - FREE(brw); + debug_printf("%s: failed to alloc context\n", __FUNCTION__); return GL_FALSE; } - /* Initialize swrast, tnl driver tables: */ - intelInitSpanFuncs(ctx); - - TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline; - - ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT; - ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */ - ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits, - ctx->Const.MaxTextureImageUnits); - ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */ - - /* Mesa limits textures to 4kx4k; it would be nice to fix that someday - */ - ctx->Const.MaxTextureLevels = 13; - ctx->Const.Max3DTextureLevels = 9; - ctx->Const.MaxCubeTextureLevels = 12; - ctx->Const.MaxTextureRectSize = (1<<12); - - ctx->Const.MaxTextureMaxAnisotropy = 16.0; - - /* if conformance mode is set, swrast can handle any size AA point */ - ctx->Const.MaxPointSizeAA = 255.0; - /* We want the GLSL compiler to emit code that uses condition codes */ ctx->Shader.EmitCondCodes = GL_TRUE; ctx->Shader.EmitNVTempInitialization = GL_TRUE; - ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024); - ctx->Const.VertexProgram.MaxAluInstructions = 0; - ctx->Const.VertexProgram.MaxTexInstructions = 0; - ctx->Const.VertexProgram.MaxTexIndirections = 0; - ctx->Const.VertexProgram.MaxNativeAluInstructions = 0; - ctx->Const.VertexProgram.MaxNativeTexInstructions = 0; - ctx->Const.VertexProgram.MaxNativeTexIndirections = 0; - ctx->Const.VertexProgram.MaxNativeAttribs = 16; - ctx->Const.VertexProgram.MaxNativeTemps = 256; - ctx->Const.VertexProgram.MaxNativeAddressRegs = 1; - ctx->Const.VertexProgram.MaxNativeParameters = 1024; - ctx->Const.VertexProgram.MaxEnvParams = - MIN2(ctx->Const.VertexProgram.MaxNativeParameters, - ctx->Const.VertexProgram.MaxEnvParams); - - ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024); - ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024); - ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024); - ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024); - ctx->Const.FragmentProgram.MaxNativeAttribs = 12; - ctx->Const.FragmentProgram.MaxNativeTemps = 256; - ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; - ctx->Const.FragmentProgram.MaxNativeParameters = 1024; - ctx->Const.FragmentProgram.MaxEnvParams = - MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, - ctx->Const.FragmentProgram.MaxEnvParams); + brw_init_query( brw ); brw_init_state( brw ); + brw_draw_init( brw ); brw->state.dirty.mesa = ~0; brw->state.dirty.brw = ~0; brw->emit_state_always = 0; - ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; - ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; - make_empty_list(&brw->query.active_head); - brw_draw_init( brw ); return GL_TRUE; } +/** + * called from intelDestroyContext() + */ +static void brw_destroy_context( struct intel_context *intel ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + int i; + + brw_destroy_state(brw); + brw_draw_destroy( brw ); + + _mesa_free(brw->wm.compile_data); + + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); + brw->state.nr_color_regions = 0; + intel_region_release(&brw->state.depth_region); + + dri_bo_unreference(brw->curbe.curbe_bo); + dri_bo_unreference(brw->vs.prog_bo); + dri_bo_unreference(brw->vs.state_bo); + dri_bo_unreference(brw->vs.bind_bo); + dri_bo_unreference(brw->gs.prog_bo); + dri_bo_unreference(brw->gs.state_bo); + dri_bo_unreference(brw->clip.prog_bo); + dri_bo_unreference(brw->clip.state_bo); + dri_bo_unreference(brw->clip.vp_bo); + dri_bo_unreference(brw->sf.prog_bo); + dri_bo_unreference(brw->sf.state_bo); + dri_bo_unreference(brw->sf.vp_bo); + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) + dri_bo_unreference(brw->wm.sdc_bo[i]); + dri_bo_unreference(brw->wm.bind_bo); + for (i = 0; i < BRW_WM_MAX_SURF; i++) + dri_bo_unreference(brw->wm.surf_bo[i]); + dri_bo_unreference(brw->wm.sampler_bo); + dri_bo_unreference(brw->wm.prog_bo); + dri_bo_unreference(brw->wm.state_bo); + dri_bo_unreference(brw->cc.prog_bo); + dri_bo_unreference(brw->cc.state_bo); + dri_bo_unreference(brw->cc.vp_bo); +} diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index fa3e32c7ff..009e28b227 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -115,7 +115,6 @@ * Handles blending and (presumably) depth and stencil testing. */ -#define BRW_FALLBACK_TEXTURE 0x1 #define BRW_MAX_CURBE (32*16) struct brw_context; @@ -450,11 +449,9 @@ struct brw_query_object { */ struct brw_context { - struct intel_context intel; /**< base class, must be first field */ GLuint primitive; GLboolean emit_state_always; - GLboolean tmp_fallback; GLboolean no_batch_wrap; struct { @@ -692,7 +689,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /*====================================================================== * brw_queryobj.c */ -void brw_init_queryobj_functions(struct dd_function_table *functions); +void brw_init_query(struct brw_context *brw); void brw_prepare_query_begin(struct brw_context *brw); void brw_emit_query_begin(struct brw_context *brw); void brw_emit_query_end(struct brw_context *brw); @@ -730,7 +727,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst); * macros used previously: */ static INLINE struct brw_context * -brw_context( GLcontext *ctx ) +brw_context( struct pipe_context *ctx ) { return (struct brw_context *)ctx; } diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 4be6c77aa1..3e32c4983d 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -30,14 +30,6 @@ */ - -#include "main/glheader.h" -#include "main/context.h" -#include "main/macros.h" -#include "main/enums.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/prog_statevars.h" #include "intel_batchbuffer.h" #include "intel_regions.h" #include "brw_context.h" @@ -64,31 +56,17 @@ static void calculate_curbe_offsets( struct brw_context *brw ) GLuint nr_clip_regs = 0; GLuint total_regs; - /* _NEW_TRANSFORM */ - if (ctx->Transform.ClipPlanesEnabled) { - GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled); + /* PIPE_NEW_UCP */ + if (brw->nr_ucp) { + GLuint nr_planes = 6 + brw->nr_ucp; nr_clip_regs = (nr_planes * 4 + 15) / 16; } total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; - /* This can happen - what to do? Probably rather than falling - * back, the best thing to do is emit programs which code the - * constants as immediate values. Could do this either as a static - * cap on WM and VS, or adaptively. - * - * Unfortunately, this is currently dependent on the results of the - * program generation process (in the case of wm), so this would - * introduce the need to re-generate programs in the event of a - * curbe allocation failure. - */ - /* Max size is 32 - just large enough to - * hold the 128 parameters allowed by - * the fragment and vertex program - * api's. It's not clear what happens - * when both VP and FP want to use 128 - * parameters, though. + /* When this is > 32, want to use a true constant buffer to hold + * the extra constants. */ assert(total_regs <= 32); @@ -113,8 +91,8 @@ static void calculate_curbe_offsets( struct brw_context *brw ) brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; brw->curbe.total_size = reg; - if (0) - _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", + if (BRW_DEBUG & DEBUG_CURBE) + debug_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", brw->curbe.wm_start, brw->curbe.wm_size, brw->curbe.clip_start, @@ -129,7 +107,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) const struct brw_tracked_state brw_curbe_offsets = { .dirty = { - .mesa = _NEW_TRANSFORM, + .mesa = PIPE_NEW_UCP, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = CACHE_NEW_WM_PROG }, @@ -204,11 +182,13 @@ static void prepare_constant_buffer(struct brw_context *brw) if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; - _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); + /* map fs constant buffer */ /* copy float constants */ for (i = 0; i < brw->wm.prog_data->nr_params; i++) buf[offset + i] = *brw->wm.prog_data->param[i]; + + /* unmap fs constant buffer */ } @@ -228,18 +208,15 @@ static void prepare_constant_buffer(struct brw_context *brw) buf[offset + i * 4 + 3] = fixed_plane[i][3]; } - /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to - * clip-space: + /* Clip planes: */ - assert(MAX_CLIP_PLANES == 6); - for (j = 0; j < MAX_CLIP_PLANES; j++) { - if (ctx->Transform.ClipPlanesEnabled & (1<Transform._ClipUserPlane[j][0]; - buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1]; - buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2]; - buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3]; - i++; - } + assert(brw->nr_ucp <= 6); + for (j = 0; j < brw->nr_ucp; j++) { + buf[offset + i * 4 + 0] = brw->ucp[j][0]; + buf[offset + i * 4 + 1] = brw->ucp[j][1]; + buf[offset + i * 4 + 2] = brw->ucp[j][2]; + buf[offset + i * 4 + 3] = brw->ucp[j][3]; + i++; } } @@ -248,13 +225,7 @@ static void prepare_constant_buffer(struct brw_context *brw) GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->vs.prog_data->nr_params / 4; - if (brw->vertex_program->IsNVProgram) - _mesa_load_tracked_matrices(ctx); - - /* Updates the ParamaterValues[i] pointers for all parameters of the - * basic type of PROGRAM_STATE_VAR. - */ - _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + /* map vs constant buffer */ /* XXX just use a memcpy here */ for (i = 0; i < nr; i++) { @@ -264,14 +235,16 @@ static void prepare_constant_buffer(struct brw_context *brw) buf[offset + i * 4 + 2] = value[2]; buf[offset + i * 4 + 3] = value[3]; } + + /* unmap vs constant buffer */ } if (0) { for (i = 0; i < sz*16; i+=4) - _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); - _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n", + debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", brw->curbe.last_buf, buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); @@ -282,12 +255,12 @@ static void prepare_constant_buffer(struct brw_context *brw) bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { /* constants have not changed */ - _mesa_free(buf); + FREE(buf); } else { /* constants have changed */ if (brw->curbe.last_buf) - _mesa_free(brw->curbe.last_buf); + FREE(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; @@ -353,15 +326,11 @@ static void emit_constant_buffer(struct brw_context *brw) ADVANCE_BATCH(); } -/* This tracked state is unique in that the state it monitors varies - * dynamically depending on the parameters tracked by the fragment and - * vertex programs. This is the template used as a starting point, - * each context will maintain a copy of this internally and update as - * required. - */ const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = _NEW_PROGRAM_CONSTANTS, + .mesa = (PIPE_NEW_FS_CONSTANTS | + PIPE_NEW_VS_CONSTANTS | + PIPE_NEW_UCP), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h index 78d457ad2b..282c5b18f4 100644 --- a/src/gallium/drivers/i965/brw_defines.h +++ b/src/gallium/drivers/i965/brw_defines.h @@ -840,8 +840,8 @@ #include "intel_chipset.h" -#define BRW_IS_G4X(brw) (IS_G4X((brw)->intel.intelScreen->deviceID)) -#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->intel.intelScreen->deviceID)) +#define BRW_IS_G4X(brw) (IS_G4X((brw)->brw_screen->deviceID)) +#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->brw_screen->deviceID)) #define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) #define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) #define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index 9fef230507..a84c581c03 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -27,8 +27,6 @@ #include #include -#include "main/mtypes.h" - #include "brw_context.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 44bb7bd588..8cd117c24f 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -39,14 +39,13 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_state.h" -#include "brw_fallback.h" #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" #define FILE_DEBUG_FLAG DEBUG_BATCH -static GLuint prim_to_hw_prim[GL_POLYGON+1] = { +static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = { _3DPRIM_POINTLIST, _3DPRIM_LINELIST, _3DPRIM_LINELOOP, @@ -60,19 +59,6 @@ static GLuint prim_to_hw_prim[GL_POLYGON+1] = { }; -static const GLenum reduced_prim[GL_POLYGON+1] = { - GL_POINTS, - GL_LINES, - GL_LINES, - GL_LINES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES -}; - /* When the primitive changes, set a state bit and re-validate. Not * the nicest and would rather deal with this by having all the @@ -196,102 +182,6 @@ static void brw_merge_inputs( struct brw_context *brw, brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; } -/* XXX: could split the primitive list to fallback only on the - * non-conformant primitives. - */ -static GLboolean check_fallbacks( struct brw_context *brw, - const struct _mesa_prim *prim, - GLuint nr_prims ) -{ - GLcontext *ctx = &brw->intel.ctx; - GLuint i; - - /* If we don't require strict OpenGL conformance, never - * use fallbacks. If we're forcing fallbacks, always - * use fallfacks. - */ - if (brw->intel.conformance_mode == 0) - return GL_FALSE; - - if (brw->intel.conformance_mode == 2) - return GL_TRUE; - - if (ctx->Polygon.SmoothFlag) { - for (i = 0; i < nr_prims; i++) - if (reduced_prim[prim[i].mode] == GL_TRIANGLES) - return GL_TRUE; - } - - /* BRW hardware will do AA lines, but they are non-conformant it - * seems. TBD whether we keep this fallback: - */ - if (ctx->Line.SmoothFlag) { - for (i = 0; i < nr_prims; i++) - if (reduced_prim[prim[i].mode] == GL_LINES) - return GL_TRUE; - } - - /* Stipple -- these fallbacks could be resolved with a little - * bit of work? - */ - if (ctx->Line.StippleFlag) { - for (i = 0; i < nr_prims; i++) { - /* GS doesn't get enough information to know when to reset - * the stipple counter?!? - */ - if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) - return GL_TRUE; - - if (prim[i].mode == GL_POLYGON && - (ctx->Polygon.FrontMode == GL_LINE || - ctx->Polygon.BackMode == GL_LINE)) - return GL_TRUE; - } - } - - if (ctx->Point.SmoothFlag) { - for (i = 0; i < nr_prims; i++) - if (prim[i].mode == GL_POINTS) - return GL_TRUE; - } - - /* BRW hardware doesn't handle GL_CLAMP texturing correctly; - * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP - * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and - * we want strict conformance, force the fallback. - * Right now, we only do this for 2D textures. - */ - { - int u; - for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; - if (texUnit->Enabled) { - if (texUnit->Enabled & TEXTURE_1D_BIT) { - if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { - return GL_TRUE; - } - } - if (texUnit->Enabled & TEXTURE_2D_BIT) { - if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { - return GL_TRUE; - } - } - if (texUnit->Enabled & TEXTURE_3D_BIT) { - if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { - return GL_TRUE; - } - } - } - } - } - - /* Nothing stopping us from the fast path now */ - return GL_FALSE; -} - /* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ @@ -308,23 +198,12 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, GLboolean retval = GL_FALSE; GLboolean warn = GL_FALSE; GLboolean first_time = GL_TRUE; + uint32_t hw_prim; GLuint i; if (ctx->NewState) _mesa_update_state( ctx ); - /* We have to validate the textures *before* checking for fallbacks; - * otherwise, the software fallback won't be able to rely on the - * texture state, the firstLevel and lastLevel fields won't be - * set in the intel texture object (they'll both be 0), and the - * software fallback will segfault if it attempts to access any - * texture level other than level 0. - */ - brw_validate_textures( brw ); - - if (check_fallbacks(brw, prim, nr_prims)) - return GL_FALSE; - /* Bind all inputs, derive varying and size information: */ brw_merge_inputs( brw, arrays ); @@ -336,90 +215,30 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, brw->vb.max_index = max_index; brw->state.dirty.brw |= BRW_NEW_VERTICES; - /* Have to validate state quite late. Will rebuild tnl_program, - * which depends on varying information. - * - * Note this is where brw->vs->prog_data.inputs_read is calculated, - * so can't access it earlier. - */ - - LOCK_HARDWARE(intel); - - if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) { - UNLOCK_HARDWARE(intel); - return GL_TRUE; - } - - for (i = 0; i < nr_prims; i++) { - uint32_t hw_prim; - - /* Flush the batch if it's approaching full, so that we don't wrap while - * we've got validated state that needs to be in the same batch as the - * primitives. This fraction is just a guess (minimal full state plus - * a primitive is around 512 bytes), and would be better if we had - * an upper bound of how much we might emit in a single - * brw_try_draw_prims(). - */ - intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4, - LOOP_CLIPRECTS); - - hw_prim = brw_set_prim(brw, prim[i].mode); - - if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) { - first_time = GL_FALSE; - - brw_validate_state(brw); - - /* Various fallback checks: */ - if (brw->intel.Fallback) - goto out; - - /* Check that we can fit our state in with our existing batchbuffer, or - * flush otherwise. - */ - if (dri_bufmgr_check_aperture_space(brw->state.validated_bos, - brw->state.validated_bo_count)) { - static GLboolean warned; - intel_batchbuffer_flush(intel->batch); - - /* Validate the state after we flushed the batch (which would have - * changed the set of dirty state). If we still fail to - * check_aperture, warn of what's happening, but attempt to continue - * on since it may succeed anyway, and the user would probably rather - * see a failure and a warning than a fallback. - */ - brw_validate_state(brw); - if (!warned && - dri_bufmgr_check_aperture_space(brw->state.validated_bos, - brw->state.validated_bo_count)) { - warn = GL_TRUE; - warned = GL_TRUE; - } - } - - brw_upload_state(brw); - } + hw_prim = brw_set_prim(brw, prim[i].mode); - brw_emit_prim(brw, &prim[i], hw_prim); + brw_validate_state(brw); - retval = GL_TRUE; - } + /* Check that we can fit our state in with our existing batchbuffer, or + * flush otherwise. + */ + ret = dri_bufmgr_check_aperture_space(brw->state.validated_bos, + brw->state.validated_bo_count); + if (ret) + return ret; + + ret = brw_upload_state(brw); + if (ret) + return ret; + + ret = brw_emit_prim(brw, &prim[i], hw_prim); + if (ret) + return ret; if (intel->always_flush_batch) intel_batchbuffer_flush(intel->batch); - out: - UNLOCK_HARDWARE(intel); - - brw_state_cache_check_size(brw); - - if (warn) - fprintf(stderr, "i965: Single primitive emit potentially exceeded " - "available aperture space\n"); - if (!retval) - DBG("%s failed\n", __FUNCTION__); - - return retval; + return 0; } void brw_draw_prims( GLcontext *ctx, @@ -431,37 +250,26 @@ void brw_draw_prims( GLcontext *ctx, GLuint min_index, GLuint max_index ) { - GLboolean retval; + enum pipe_error ret; if (!vbo_all_varyings_in_vbos(arrays)) { if (!index_bounds_valid) vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); - - /* Decide if we want to rebase. If so we end up recursing once - * only into this function. - */ - if (min_index != 0) { - vbo_rebase_prims(ctx, arrays, - prim, nr_prims, - ib, min_index, max_index, - brw_draw_prims ); - return; - } } /* Make a first attempt at drawing: */ - retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); /* Otherwise, we really are out of memory. Pass the drawing * command to the software tnl module and which will in turn call * swrast to do the drawing. */ - if (!retval) { - _swsetup_Wakeup(ctx); - _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + if (ret != 0) { + intel_batchbuffer_flush(intel->batch); + ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + assert(ret == 0); } - } void brw_draw_init( struct brw_context *brw ) diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index a3ff6c58d8..ad3ef6b7dd 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -25,13 +25,9 @@ * **************************************************************************/ +#include "pipe/p_context.h" -#include "main/glheader.h" -#include "main/bufferobj.h" -#include "main/context.h" -#include "main/state.h" -#include "main/api_validate.h" -#include "main/enums.h" +#include "util/u_upload_mgr.h" #include "brw_draw.h" #include "brw_defines.h" @@ -43,303 +39,157 @@ #include "intel_buffer_objects.h" #include "intel_tex.h" -static GLuint double_types[5] = { - 0, - BRW_SURFACEFORMAT_R64_FLOAT, - BRW_SURFACEFORMAT_R64G64_FLOAT, - BRW_SURFACEFORMAT_R64G64B64_FLOAT, - BRW_SURFACEFORMAT_R64G64B64A64_FLOAT -}; - -static GLuint float_types[5] = { - 0, - BRW_SURFACEFORMAT_R32_FLOAT, - BRW_SURFACEFORMAT_R32G32_FLOAT, - BRW_SURFACEFORMAT_R32G32B32_FLOAT, - BRW_SURFACEFORMAT_R32G32B32A32_FLOAT -}; - -static GLuint uint_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R32_UNORM, - BRW_SURFACEFORMAT_R32G32_UNORM, - BRW_SURFACEFORMAT_R32G32B32_UNORM, - BRW_SURFACEFORMAT_R32G32B32A32_UNORM -}; - -static GLuint uint_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R32_USCALED, - BRW_SURFACEFORMAT_R32G32_USCALED, - BRW_SURFACEFORMAT_R32G32B32_USCALED, - BRW_SURFACEFORMAT_R32G32B32A32_USCALED -}; - -static GLuint int_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R32_SNORM, - BRW_SURFACEFORMAT_R32G32_SNORM, - BRW_SURFACEFORMAT_R32G32B32_SNORM, - BRW_SURFACEFORMAT_R32G32B32A32_SNORM -}; - -static GLuint int_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R32_SSCALED, - BRW_SURFACEFORMAT_R32G32_SSCALED, - BRW_SURFACEFORMAT_R32G32B32_SSCALED, - BRW_SURFACEFORMAT_R32G32B32A32_SSCALED -}; - -static GLuint ushort_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R16_UNORM, - BRW_SURFACEFORMAT_R16G16_UNORM, - BRW_SURFACEFORMAT_R16G16B16_UNORM, - BRW_SURFACEFORMAT_R16G16B16A16_UNORM -}; - -static GLuint ushort_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R16_USCALED, - BRW_SURFACEFORMAT_R16G16_USCALED, - BRW_SURFACEFORMAT_R16G16B16_USCALED, - BRW_SURFACEFORMAT_R16G16B16A16_USCALED -}; - -static GLuint short_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R16_SNORM, - BRW_SURFACEFORMAT_R16G16_SNORM, - BRW_SURFACEFORMAT_R16G16B16_SNORM, - BRW_SURFACEFORMAT_R16G16B16A16_SNORM -}; - -static GLuint short_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R16_SSCALED, - BRW_SURFACEFORMAT_R16G16_SSCALED, - BRW_SURFACEFORMAT_R16G16B16_SSCALED, - BRW_SURFACEFORMAT_R16G16B16A16_SSCALED -}; -static GLuint ubyte_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R8_UNORM, - BRW_SURFACEFORMAT_R8G8_UNORM, - BRW_SURFACEFORMAT_R8G8B8_UNORM, - BRW_SURFACEFORMAT_R8G8B8A8_UNORM -}; -static GLuint ubyte_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R8_USCALED, - BRW_SURFACEFORMAT_R8G8_USCALED, - BRW_SURFACEFORMAT_R8G8B8_USCALED, - BRW_SURFACEFORMAT_R8G8B8A8_USCALED -}; - -static GLuint byte_types_norm[5] = { - 0, - BRW_SURFACEFORMAT_R8_SNORM, - BRW_SURFACEFORMAT_R8G8_SNORM, - BRW_SURFACEFORMAT_R8G8B8_SNORM, - BRW_SURFACEFORMAT_R8G8B8A8_SNORM -}; -static GLuint byte_types_scale[5] = { - 0, - BRW_SURFACEFORMAT_R8_SSCALED, - BRW_SURFACEFORMAT_R8G8_SSCALED, - BRW_SURFACEFORMAT_R8G8B8_SSCALED, - BRW_SURFACEFORMAT_R8G8B8A8_SSCALED -}; - - -/** - * Given vertex array type/size/format/normalized info, return - * the appopriate hardware surface type. - * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays. - */ -static GLuint get_surface_type( GLenum type, GLuint size, - GLenum format, GLboolean normalized ) +unsigned brw_translate_surface_format( unsigned id ) { - if (INTEL_DEBUG & DEBUG_VERTS) - _mesa_printf("type %s size %d normalized %d\n", - _mesa_lookup_enum_by_nr(type), size, normalized); - - if (normalized) { - switch (type) { - case GL_DOUBLE: return double_types[size]; - case GL_FLOAT: return float_types[size]; - case GL_INT: return int_types_norm[size]; - case GL_SHORT: return short_types_norm[size]; - case GL_BYTE: return byte_types_norm[size]; - case GL_UNSIGNED_INT: return uint_types_norm[size]; - case GL_UNSIGNED_SHORT: return ushort_types_norm[size]; - case GL_UNSIGNED_BYTE: - if (format == GL_BGRA) { - /* See GL_EXT_vertex_array_bgra */ - assert(size == 4); - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - } - else { - return ubyte_types_norm[size]; - } - default: assert(0); return 0; - } - } - else { - assert(format == GL_RGBA); /* sanity check */ - switch (type) { - case GL_DOUBLE: return double_types[size]; - case GL_FLOAT: return float_types[size]; - case GL_INT: return int_types_scale[size]; - case GL_SHORT: return short_types_scale[size]; - case GL_BYTE: return byte_types_scale[size]; - case GL_UNSIGNED_INT: return uint_types_scale[size]; - case GL_UNSIGNED_SHORT: return ushort_types_scale[size]; - case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; - default: assert(0); return 0; - } + switch (id) { + case PIPE_FORMAT_R64_FLOAT: + return BRW_SURFACEFORMAT_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return BRW_SURFACEFORMAT_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return BRW_SURFACEFORMAT_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return BRW_SURFACEFORMAT_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return BRW_SURFACEFORMAT_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return BRW_SURFACEFORMAT_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return BRW_SURFACEFORMAT_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return BRW_SURFACEFORMAT_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return BRW_SURFACEFORMAT_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return BRW_SURFACEFORMAT_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return BRW_SURFACEFORMAT_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return BRW_SURFACEFORMAT_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return BRW_SURFACEFORMAT_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return BRW_SURFACEFORMAT_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return BRW_SURFACEFORMAT_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return BRW_SURFACEFORMAT_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return BRW_SURFACEFORMAT_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return BRW_SURFACEFORMAT_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return BRW_SURFACEFORMAT_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return BRW_SURFACEFORMAT_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return BRW_SURFACEFORMAT_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return BRW_SURFACEFORMAT_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return BRW_SURFACEFORMAT_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return BRW_SURFACEFORMAT_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return BRW_SURFACEFORMAT_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return BRW_SURFACEFORMAT_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return BRW_SURFACEFORMAT_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return BRW_SURFACEFORMAT_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; + + default: + assert(0); + return 0; } } - -static GLuint get_size( GLenum type ) -{ - switch (type) { - case GL_DOUBLE: return sizeof(GLdouble); - case GL_FLOAT: return sizeof(GLfloat); - case GL_INT: return sizeof(GLint); - case GL_SHORT: return sizeof(GLshort); - case GL_BYTE: return sizeof(GLbyte); - case GL_UNSIGNED_INT: return sizeof(GLuint); - case GL_UNSIGNED_SHORT: return sizeof(GLushort); - case GL_UNSIGNED_BYTE: return sizeof(GLubyte); - default: return 0; - } -} - -static GLuint get_index_type(GLenum type) +static unsigned get_index_type(int type) { switch (type) { - case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE; - case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD; - case GL_UNSIGNED_INT: return BRW_INDEX_DWORD; + case 1: return BRW_INDEX_BYTE; + case 2: return BRW_INDEX_WORD; + case 4: return BRW_INDEX_DWORD; default: assert(0); return 0; } } -static void wrap_buffers( struct brw_context *brw, - GLuint size ) -{ - if (size < BRW_UPLOAD_INIT_SIZE) - size = BRW_UPLOAD_INIT_SIZE; - - brw->vb.upload.offset = 0; - - if (brw->vb.upload.bo != NULL) - dri_bo_unreference(brw->vb.upload.bo); - brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO", - size, 1); - - /* Set the internal VBO\ to no-backing-store. We only use them as a - * temporary within a brw_try_draw_prims while the lock is held. - */ - /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH - FAKE TO PUSH THIS STUFF */ -// if (!brw->intel.ttm) -// dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL); -} - -static void get_space( struct brw_context *brw, - GLuint size, - dri_bo **bo_return, - GLuint *offset_return ) -{ - size = ALIGN(size, 64); - - if (brw->vb.upload.bo == NULL || - brw->vb.upload.offset + size > brw->vb.upload.bo->size) { - wrap_buffers(brw, size); - } - - assert(*bo_return == NULL); - dri_bo_reference(brw->vb.upload.bo); - *bo_return = brw->vb.upload.bo; - *offset_return = brw->vb.upload.offset; - brw->vb.upload.offset += size; -} - -static void -copy_array_to_vbo_array( struct brw_context *brw, - struct brw_vertex_element *element, - GLuint dst_stride) -{ - struct intel_context *intel = &brw->intel; - GLuint size = element->count * dst_stride; - - get_space(brw, size, &element->bo, &element->offset); - if (element->glarray->StrideB == 0) { - assert(element->count == 1); - element->stride = 0; - } else { - element->stride = dst_stride; - } - - if (dst_stride == element->glarray->StrideB) { - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - memcpy((char *)element->bo->virtual + element->offset, - element->glarray->Ptr, size); - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - dri_bo_subdata(element->bo, - element->offset, - size, - element->glarray->Ptr); - } - } else { - char *dest; - const unsigned char *src = element->glarray->Ptr; - int i; - - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(element->bo); - dest = element->bo->virtual; - dest += element->offset; - - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } - - drm_intel_gem_bo_unmap_gtt(element->bo); - } else { - void *data; - - data = _mesa_malloc(dst_stride * element->count); - dest = data; - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } - - dri_bo_subdata(element->bo, - element->offset, - size, - data); - - _mesa_free(data); - } - } -} -static void brw_prepare_vertices(struct brw_context *brw) +static boolean brw_prepare_vertices(struct brw_context *brw) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); @@ -358,123 +208,38 @@ static void brw_prepare_vertices(struct brw_context *brw) if (0) _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); - /* Accumulate the list of enabled arrays. */ - brw->vb.nr_enabled = 0; - while (vs_inputs) { - GLuint i = _mesa_ffsll(vs_inputs) - 1; - struct brw_vertex_element *input = &brw->vb.inputs[i]; - vs_inputs &= ~(1 << i); - brw->vb.enabled[brw->vb.nr_enabled++] = input; - } - - /* XXX: In the rare cases where this happens we fallback all - * the way to software rasterization, although a tnl fallback - * would be sufficient. I don't know of *any* real world - * cases with > 17 vertex attributes enabled, so it probably - * isn't an issue at this point. - */ - if (brw->vb.nr_enabled >= BRW_VEP_MAX) { - intel->Fallback = 1; - return; - } for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; input->element_size = get_size(input->glarray->Type) * input->glarray->Size; - if (_mesa_is_bufferobj(input->glarray->BufferObj)) { - struct intel_buffer_object *intel_buffer = - intel_buffer_object(input->glarray->BufferObj); - - /* Named buffer object: Just reference its contents directly. */ - dri_bo_unreference(input->bo); - input->bo = intel_bufferobj_buffer(intel, intel_buffer, - INTEL_READ); - dri_bo_reference(input->bo); - input->offset = (unsigned long)input->glarray->Ptr; - input->stride = input->glarray->StrideB; - input->count = input->glarray->_MaxElement; - - /* This is a common place to reach if the user mistakenly supplies - * a pointer in place of a VBO offset. If we just let it go through, - * we may end up dereferencing a pointer beyond the bounds of the - * GTT. We would hope that the VBO's max_index would save us, but - * Mesa appears to hand us min/max values not clipped to the - * array object's _MaxElement, and _MaxElement frequently appears - * to be wrong anyway. - * - * The VBO spec allows application termination in this case, and it's - * probably a service to the poor programmer to do so rather than - * trying to just not render. - */ - assert(input->offset < input->bo->size); - } else { - input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1; - if (input->bo != NULL) { - /* Already-uploaded vertex data is present from a previous - * prepare_vertices, but we had to re-validate state due to - * check_aperture failing and a new batch being produced. - */ - continue; - } - - /* Queue the buffer object up to be uploaded in the next pass, - * when we've decided if we're doing interleaved or not. - */ - if (input->attrib == VERT_ATTRIB_POS) { - /* Position array not properly enabled: - */ - if (input->glarray->StrideB == 0) { - intel->Fallback = 1; - return; - } - - interleave = input->glarray->StrideB; - ptr = input->glarray->Ptr; - } - else if (interleave != input->glarray->StrideB || - (const unsigned char *)input->glarray->Ptr - ptr < 0 || - (const unsigned char *)input->glarray->Ptr - ptr > interleave) - { - interleave = 0; - } - - upload[nr_uploads++] = input; - - /* We rebase drawing to start at element zero only when - * varyings are not in vbos, which means we can end up - * uploading non-varying arrays (stride != 0) when min_index - * is zero. This doesn't matter as the amount to upload is - * the same for these arrays whether the draw call is rebased - * or not - we just have to upload the one element. - */ - assert(min_index == 0 || input->glarray->StrideB == 0); - } - } - - /* Handle any arrays to be uploaded. */ - if (nr_uploads > 1 && interleave && interleave <= 256) { - /* All uploads are interleaved, so upload the arrays together as - * interleaved. First, upload the contents and set up upload[0]. - */ - copy_array_to_vbo_array(brw, upload[0], interleave); - - for (i = 1; i < nr_uploads; i++) { - /* Then, just point upload[i] at upload[0]'s buffer. */ - upload[i]->stride = interleave; - upload[i]->offset = upload[0]->offset + - ((const unsigned char *)upload[i]->glarray->Ptr - ptr); - upload[i]->bo = upload[0]->bo; - dri_bo_reference(upload[i]->bo); + if (brw_is_user_buffer(vb)) { + u_upload_buffer( brw->upload, + min_index * vb->stride, + (max_index + 1 - min_index) * vb->stride, + &offset, + &buffer ); } - } - else { - /* Upload non-interleaved arrays */ - for (i = 0; i < nr_uploads; i++) { - copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); + else + { + offset = 0; + buffer = vb->buffer; + count = stride == 0 ? 1 : max_index + 1 - min_index; } + + /* Named buffer object: Just reference its contents directly. */ + dri_bo_unreference(input->bo); + input->bo = intel_bufferobj_buffer(intel, intel_buffer, + INTEL_READ); + dri_bo_reference(input->bo); + + input->offset = (unsigned long)offset; + input->stride = vb->stride; + input->count = count; + + assert(input->offset < input->bo->size); } brw_prepare_query_begin(brw); @@ -632,13 +397,8 @@ static void brw_prepare_indices(struct brw_context *brw) /* Straight upload */ - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(bo); - memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); - drm_intel_gem_bo_unmap_gtt(bo); - } else { - dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); - } + brw_bo_subdata(bo, offset, ib_size, index_buffer->ptr); + } else { offset = (GLuint) (unsigned long) index_buffer->ptr; brw->ib.start_vertex_offset = 0; diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 48c2b9a41c..5ec0c585fe 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -58,7 +58,7 @@ static void compile_gs_prog( struct brw_context *brw, /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ - c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_attrs = util_count_bits(c.key.attrs); if (BRW_IS_IGDNG(brw)) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c new file mode 100644 index 0000000000..b351794dce --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -0,0 +1,41 @@ + + /* _NEW_COLOR */ + if (key->logic_op != GL_COPY) { + cc.cc2.logicop_enable = 1; + cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); + } else if (key->color_blend) { + GLenum eqRGB = key->blend_eq_rgb; + GLenum eqA = key->blend_eq_a; + GLenum srcRGB = key->blend_src_rgb; + GLenum dstRGB = key->blend_dst_rgb; + GLenum srcA = key->blend_src_a; + GLenum dstA = key->blend_dst_a; + + if (eqRGB == GL_MIN || eqRGB == GL_MAX) { + srcRGB = dstRGB = GL_ONE; + } + + if (eqA == GL_MIN || eqA == GL_MAX) { + srcA = dstA = GL_ONE; + } + + cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); + cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); + cc.cc6.blend_function = brw_translate_blend_equation(eqRGB); + + cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); + cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA); + + cc.cc3.blend_enable = 1; + cc.cc3.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); + } + + if (key->dither) { + cc.cc5.dither_enable = 1; + cc.cc6.y_dither_offset = 0; + cc.cc6.x_dither_offset = 0; + } + diff --git a/src/gallium/drivers/i965/brw_pipe_debug.c b/src/gallium/drivers/i965/brw_pipe_debug.c new file mode 100644 index 0000000000..34d6d4028a --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_debug.c @@ -0,0 +1,2 @@ + if (INTEL_DEBUG & DEBUG_STATS) + cc.cc5.statistics_enable = 1; diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c new file mode 100644 index 0000000000..da29bc8bcb --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_depth.c @@ -0,0 +1,52 @@ + /* _NEW_STENCIL */ + if (key->dsa.stencil[0].enable) { + cc.cc0.stencil_enable = 1; + cc.cc0.stencil_func = + intel_translate_compare_func(key->stencil_func[0]); + cc.cc0.stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[0]); + cc.cc0.stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); + cc.cc0.stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); + cc.cc1.stencil_ref = key->stencil_ref[0]; + cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; + cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; + + if (key->stencil_two_side) { + cc.cc0.bf_stencil_enable = 1; + cc.cc0.bf_stencil_func = + intel_translate_compare_func(key->stencil_func[1]); + cc.cc0.bf_stencil_fail_op = + intel_translate_stencil_op(key->stencil_fail_op[1]); + cc.cc0.bf_stencil_pass_depth_fail_op = + intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); + cc.cc0.bf_stencil_pass_depth_pass_op = + intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); + cc.cc1.bf_stencil_ref = key->stencil_ref[1]; + cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; + cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; + } + + /* Not really sure about this: + */ + if (key->stencil_write_mask[0] || + (key->stencil_two_side && key->stencil_write_mask[1])) + cc.cc0.stencil_write_enable = 1; + } + + + if (key->alpha_enabled) { + cc.cc3.alpha_test = 1; + cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); + cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + + UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); + } + + /* _NEW_DEPTH */ + if (key->depth_test) { + cc.cc2.depth_test = 1; + cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); + cc.cc2.depth_write_enable = key->depth_write; + } diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c new file mode 100644 index 0000000000..d4ae332f46 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -0,0 +1,25 @@ + +/** + * called from intelDrawBuffer() + */ +static void brw_set_draw_region( struct intel_context *intel, + struct intel_region *color_regions[], + struct intel_region *depth_region, + GLuint num_color_regions) +{ + struct brw_context *brw = brw_context(&intel->ctx); + GLuint i; + + /* release old color/depth regions */ + if (brw->state.depth_region != depth_region) + brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; + for (i = 0; i < brw->state.nr_color_regions; i++) + intel_region_release(&brw->state.color_regions[i]); + intel_region_release(&brw->state.depth_region); + + /* reference new color/depth regions */ + for (i = 0; i < num_color_regions; i++) + intel_region_reference(&brw->state.color_regions[i], color_regions[i]); + intel_region_reference(&brw->state.depth_region, depth_region); + brw->state.nr_color_regions = num_color_regions; +} diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c new file mode 100644 index 0000000000..008f623151 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -0,0 +1,64 @@ + +/** + * called from intel_batchbuffer_flush and children before sending a + * batchbuffer off. + */ +static void brw_finish_batch(struct intel_context *intel) +{ + struct brw_context *brw = brw_context(&intel->ctx); + brw_emit_query_end(brw); +} + + +/** + * called from intelFlushBatchLocked + */ +static void brw_new_batch( struct intel_context *intel ) +{ + struct brw_context *brw = brw_context(&intel->ctx); + + /* Check that we didn't just wrap our batchbuffer at a bad time. */ + assert(!brw->no_batch_wrap); + + brw->curbe.need_new_bo = GL_TRUE; + + /* Mark all context state as needing to be re-emitted. + * This is probably not as severe as on 915, since almost all of our state + * is just in referenced buffers. + */ + brw->state.dirty.brw |= BRW_NEW_CONTEXT; + + brw->state.dirty.mesa |= ~0; + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; + + /* Move to the end of the current upload buffer so that we'll force choosing + * a new buffer next time. + */ + if (brw->vb.upload.bo != NULL) { + dri_bo_unreference(brw->vb.upload.bo); + brw->vb.upload.bo = NULL; + brw->vb.upload.offset = 0; + } +} + + +static void brw_note_fence( struct intel_context *intel, GLuint fence ) +{ + brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; +} + +/* called from intelWaitForIdle() and intelFlush() + * + * For now, just flush everything. Could be smarter later. + */ +static GLuint brw_flush_cmd( void ) +{ + struct brw_mi_flush flush; + flush.opcode = CMD_MI_FLUSH; + flush.pad = 0; + flush.flags = BRW_FLUSH_STATE_CACHE; + return *(GLuint *)&flush; +} + + diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c new file mode 100644 index 0000000000..d199d0b81a --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -0,0 +1,27 @@ + /* _NEW_BUFFERS */ + if (IS_965(intel->intelScreen->deviceID) && + !IS_G4X(intel->intelScreen->deviceID)) { + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + /* The original gen4 hardware couldn't set up WM surfaces pointing + * at an offset within a tile, which can happen when rendering to + * anything but the base level of a texture or the +X face/0 depth. + * This was fixed with the 4 Series hardware. + * + * For these original chips, you would have to make the depth and + * color destination surfaces include information on the texture + * type, LOD, face, and various limits to use them as a destination. + * I would have done this, but there's also a nasty requirement that + * the depth and the color surfaces all be of the same LOD, which + * may be a worse requirement than this alignment. (Also, we may + * want to just demote the texture to untiled, instead). + */ + if (irb->region && + irb->region->tiling != I915_TILING_NONE && + (irb->region->draw_offset & 4095)) { + DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n"); + return GL_TRUE; + } + } diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index e1c2c7777b..90513245ee 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -59,9 +59,9 @@ static void compile_sf_prog( struct brw_context *brw, brw_init_compile(brw, &c.func); c.key = *key; - c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_attrs = util_count_bits(c.key.attrs); c.nr_attr_regs = (c.nr_attrs+1)/2; - c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS); + c.nr_setup_attrs = util_count_bits(c.key.attrs & DO_SETUP_BITS); c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index ca8f97f9f9..4cc427a935 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -150,7 +150,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); - GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; if (!nr) @@ -188,7 +188,7 @@ static void do_flatshade_line( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); - GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; if (!nr) diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index b817b741e7..6801084616 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -270,7 +270,7 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) /*********************************************************************** * Emit all state: */ -void brw_validate_state( struct brw_context *brw ) +enum pipe_error brw_validate_state( struct brw_context *brw ) { GLcontext *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; @@ -278,10 +278,6 @@ void brw_validate_state( struct brw_context *brw ) GLuint i; brw_clear_validated_bos(brw); - - state->mesa |= brw->intel.NewGLState; - brw->intel.NewGLState = 0; - brw_add_validated_bo(brw, intel->batch->buf); if (brw->emit_state_always) { @@ -290,36 +286,23 @@ void brw_validate_state( struct brw_context *brw ) state->cache |= ~0; } - if (brw->fragment_program != ctx->FragmentProgram._Current) { - brw->fragment_program = ctx->FragmentProgram._Current; - brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; - } - - if (brw->vertex_program != ctx->VertexProgram._Current) { - brw->vertex_program = ctx->VertexProgram._Current; - brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; - } - if (state->mesa == 0 && state->cache == 0 && state->brw == 0) - return; + return 0; if (brw->state.dirty.brw & BRW_NEW_CONTEXT) brw_clear_batch_cache(brw); - brw->intel.Fallback = 0; - /* do prepare stage for all atoms */ for (i = 0; i < Elements(atoms); i++) { const struct brw_tracked_state *atom = atoms[i]; - if (brw->intel.Fallback) - break; - if (check_state(state, &atom->dirty)) { if (atom->prepare) { - atom->prepare(brw); + ret = atom->prepare(brw); + if (ret) + return ret; } } } @@ -329,17 +312,18 @@ void brw_validate_state( struct brw_context *brw ) * If this fails, we can experience GPU lock-ups. */ { - const struct brw_fragment_program *fp; - fp = brw_fragment_program_const(brw->fragment_program); + const struct brw_fragment_program *fp = brw->fragment_program; if (fp) { - assert((fp->tex_units_used & ctx->Texture._EnabledUnits) - == fp->tex_units_used); + assert(fp->info.max_sampler <= brw->nr_samplers && + fp->info.max_texture <= brw->nr_textures); } } + + return 0; } -void brw_upload_state(struct brw_context *brw) +enum pipe_error brw_upload_state(struct brw_context *brw) { struct brw_state_flags *state = &brw->state.dirty; int i; @@ -356,7 +340,7 @@ void brw_upload_state(struct brw_context *brw) _mesa_memset(&examined, 0, sizeof(examined)); prev = *state; - for (i = 0; i < Elements(atoms); i++) { + for (i = 0; i < Elements(atoms); i++) { const struct brw_tracked_state *atom = atoms[i]; struct brw_state_flags generated; @@ -364,12 +348,11 @@ void brw_upload_state(struct brw_context *brw) atom->dirty.brw || atom->dirty.cache); - if (brw->intel.Fallback) - break; - if (check_state(state, &atom->dirty)) { if (atom->emit) { - atom->emit( brw ); + ret = atom->emit( brw ); + if (ret) + return ret; } } @@ -388,12 +371,11 @@ void brw_upload_state(struct brw_context *brw) for (i = 0; i < Elements(atoms); i++) { const struct brw_tracked_state *atom = atoms[i]; - if (brw->intel.Fallback) - break; - if (check_state(state, &atom->dirty)) { if (atom->emit) { - atom->emit( brw ); + ret = atom->emit( brw ); + if (ret) + return ret; } } } @@ -407,10 +389,11 @@ void brw_upload_state(struct brw_context *brw) brw_print_dirty_count(mesa_bits, state->mesa); brw_print_dirty_count(brw_bits, state->brw); brw_print_dirty_count(cache_bits, state->cache); - fprintf(stderr, "\n"); + debug_printf("\n"); } } - - if (!brw->intel.Fallback) - memset(state, 0, sizeof(*state)); + + /* Clear dirty flags: + */ + memset(state, 0, sizeof(*state)); } diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c new file mode 100644 index 0000000000..6684f442d5 --- /dev/null +++ b/src/gallium/drivers/i965/brw_swtnl.c @@ -0,0 +1,114 @@ + +/* XXX: could split the primitive list to fallback only on the + * non-conformant primitives. + */ +static GLboolean check_fallbacks( struct brw_context *brw, + const struct _mesa_prim *prim, + GLuint nr_prims ) +{ + GLcontext *ctx = &brw->intel.ctx; + GLuint i; + + /* If we don't require strict OpenGL conformance, never + * use fallbacks. If we're forcing fallbacks, always + * use fallfacks. + */ + if (brw->intel.conformance_mode == 0) + return GL_FALSE; + + if (brw->intel.conformance_mode == 2) + return GL_TRUE; + + if (ctx->Polygon.SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (reduced_prim[prim[i].mode] == GL_TRIANGLES) + return GL_TRUE; + } + + /* BRW hardware will do AA lines, but they are non-conformant it + * seems. TBD whether we keep this fallback: + */ + if (ctx->Line.SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (reduced_prim[prim[i].mode] == GL_LINES) + return GL_TRUE; + } + + /* Stipple -- these fallbacks could be resolved with a little + * bit of work? + */ + if (ctx->Line.StippleFlag) { + for (i = 0; i < nr_prims; i++) { + /* GS doesn't get enough information to know when to reset + * the stipple counter?!? + */ + if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) + return GL_TRUE; + + if (prim[i].mode == GL_POLYGON && + (ctx->Polygon.FrontMode == GL_LINE || + ctx->Polygon.BackMode == GL_LINE)) + return GL_TRUE; + } + } + + if (ctx->Point.SmoothFlag) { + for (i = 0; i < nr_prims; i++) + if (prim[i].mode == GL_POINTS) + return GL_TRUE; + } + + /* BRW hardware doesn't handle GL_CLAMP texturing correctly; + * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP + * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and + * we want strict conformance, force the fallback. + * Right now, we only do this for 2D textures. + */ + { + int u; + for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; + if (texUnit->Enabled) { + if (texUnit->Enabled & TEXTURE_1D_BIT) { + if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_2D_BIT) { + if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { + return GL_TRUE; + } + } + if (texUnit->Enabled & TEXTURE_3D_BIT) { + if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || + texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { + return GL_TRUE; + } + } + } + } + } + + /* Exceeding hw limits on number of VS inputs? + */ + if (brw->nr_ve == 0 || + brw->nr_ve >= BRW_VEP_MAX) { + return TRUE; + } + + /* Position array with zero stride? + */ + if (brw->vs[brw->ve[0]]->stride == 0) + return TRUE; + + + + /* Nothing stopping us from the fast path now */ + return GL_FALSE; +} + + + + diff --git a/src/gallium/drivers/i965/brw_types.h b/src/gallium/drivers/i965/brw_types.h new file mode 100644 index 0000000000..32b62848da --- /dev/null +++ b/src/gallium/drivers/i965/brw_types.h @@ -0,0 +1,11 @@ +#ifndef BRW_TYPES_H +#define BRW_TYPES_H + +typedef GLuint uint32_t; +typedef GLubyte uint8_t; +typedef GLushort uint16_t; +/* no GLenum, translate all away */ + +typedef GLboolean uint8_t; + +#endif diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c index ce21aa4869..17f671a8fa 100644 --- a/src/gallium/drivers/i965/brw_util.c +++ b/src/gallium/drivers/i965/brw_util.c @@ -35,14 +35,6 @@ #include "brw_util.h" #include "brw_defines.h" -GLuint brw_count_bits( GLuint val ) -{ - GLuint i; - for (i = 0; val ; val >>= 1) - if (val & 1) - i++; - return i; -} GLuint brw_translate_blend_equation( GLenum mode ) diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index f0c79efbd9..53a5560105 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -61,9 +61,7 @@ static void do_vs_prog( struct brw_context *brw, } if (0) - _mesa_print_program(&c.vp->program.Base); - - + tgsi_dump(&c.vp->tokens, 0); /* Emit GEN4 code. */ @@ -96,9 +94,9 @@ static void brw_upload_vs_prog(struct brw_context *brw) * the inputs it asks for, whether they are varying or not. */ key.program_string_id = vp->id; - key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); - key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || - ctx->Polygon.BackMode != GL_FILL); + key.nr_userclip = brw->nr_userclip; + key.copy_edgeflag = (brw->rast->fill_ccw != PIPE_POLYGON_MODE_FILL || + brw->rast->fill_cw != PIPE_POLYGON_MODE_FILL); /* Make an early check for the key. */ @@ -116,7 +114,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_POLYGON, + .mesa = PIPE_NEW_UCP | PIPE_NEW_RAST, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 1638ef8111..7f20c4baca 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -33,7 +33,7 @@ #include "main/macros.h" #include "shader/program.h" #include "shader/prog_parameter.h" -#include "shader/prog_print.h" +#include "pipe/p_shader_tokens.h" #include "brw_context.h" #include "brw_vs.h" @@ -129,6 +129,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) reg++; } } + /* If there are no inputs, we'll still be reading one attribute's worth * because it's required -- see urb_read_length setting. */ @@ -226,6 +227,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * vertex urb, so is half the amount: */ c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2; + /* Setting this field to 0 leads to undefined behavior according to the * the VS_STATE docs. Our VUEs will always have at least one attribute * sitting in them, even if it's padding. @@ -960,9 +962,6 @@ static void emit_arl( struct brw_vs_compile *c, /** * Return the brw reg for the given instruction's src argument. - * Will return mangled results for SWZ op. The emit_swz() function - * ignores this result and recalculates taking extended swizzles into - * account. */ static struct brw_reg get_arg( struct brw_vs_compile *c, const struct prog_instruction *inst, @@ -1024,74 +1023,6 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, } -static void emit_swz( struct brw_vs_compile *c, - struct brw_reg dst, - const struct prog_instruction *inst) -{ - const GLuint argIndex = 0; - const struct prog_src_register src = inst->SrcReg[argIndex]; - struct brw_compile *p = &c->func; - GLuint zeros_mask = 0; - GLuint ones_mask = 0; - GLuint src_mask = 0; - GLubyte src_swz[4]; - GLboolean need_tmp = (src.Negate && - dst.file != BRW_GENERAL_REGISTER_FILE); - struct brw_reg tmp = dst; - GLuint i; - - if (need_tmp) - tmp = get_tmp(c); - - for (i = 0; i < 4; i++) { - if (dst.dw1.bits.writemask & (1<vp->program.Base.Instructions[insn]; - for (i = 0; i < 3; i++) { - struct prog_src_register *src = &inst->SrcReg[i]; - GLuint index = src->Index; - GLuint file = src->File; - if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS) - c->output_regs[index].used_in_src = GL_TRUE; - } - } - /* Static register allocation */ brw_vs_alloc_regs(c); @@ -1362,18 +1279,14 @@ void brw_vs_emit(struct brw_vs_compile *c ) _mesa_print_instruction(inst); #endif - /* Get argument regs. SWZ is special and does this itself. + /* Get argument regs. */ - if (inst->Opcode != OPCODE_SWZ) - for (i = 0; i < 3; i++) { - const struct prog_src_register *src = &inst->SrcReg[i]; - index = src->Index; - file = src->File; - if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - args[i] = c->output_regs[index].reg; - else - args[i] = get_arg(c, inst, i); - } + for (i = 0; i < 3; i++) { + const struct prog_src_register *src = &inst->SrcReg[i]; + index = src->Index; + file = src->File; + args[i] = get_arg(c, inst, i); + } /* Get dest regs. Note that it is possible for a reg to be both * dst and arg, given the static allocation of registers. So @@ -1381,10 +1294,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) */ index = inst->DstReg.Index; file = inst->DstReg.File; - if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) - dst = c->output_regs[index].reg; - else - dst = get_dst(c, inst->DstReg); + dst = get_dst(c, inst->DstReg); if (inst->SaturateMode != SATURATE_OFF) { _mesa_problem(NULL, "Unsupported saturate %d in vertex shader", @@ -1392,151 +1302,144 @@ void brw_vs_emit(struct brw_vs_compile *c ) } switch (inst->Opcode) { - case OPCODE_ABS: + case TGSI_OPCODE_ABS: brw_MOV(p, dst, brw_abs(args[0])); break; - case OPCODE_ADD: + case TGSI_OPCODE_ADD: brw_ADD(p, dst, args[0], args[1]); break; - case OPCODE_COS: + case TGSI_OPCODE_COS: emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_DP3: + case TGSI_OPCODE_DP3: brw_DP3(p, dst, args[0], args[1]); break; - case OPCODE_DP4: + case TGSI_OPCODE_DP4: brw_DP4(p, dst, args[0], args[1]); break; - case OPCODE_DPH: + case TGSI_OPCODE_DPH: brw_DPH(p, dst, args[0], args[1]); break; - case OPCODE_NRM3: + case TGSI_OPCODE_NRM3: emit_nrm(c, dst, args[0], 3); break; - case OPCODE_NRM4: + case TGSI_OPCODE_NRM4: emit_nrm(c, dst, args[0], 4); break; - case OPCODE_DST: + case TGSI_OPCODE_DST: unalias2(c, dst, args[0], args[1], emit_dst_noalias); break; - case OPCODE_EXP: + case TGSI_OPCODE_EXP: unalias1(c, dst, args[0], emit_exp_noalias); break; - case OPCODE_EX2: + case TGSI_OPCODE_EX2: emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_ARL: + case TGSI_OPCODE_ARL: emit_arl(c, dst, args[0]); break; - case OPCODE_FLR: + case TGSI_OPCODE_FLR: brw_RNDD(p, dst, args[0]); break; - case OPCODE_FRC: + case TGSI_OPCODE_FRC: brw_FRC(p, dst, args[0]); break; - case OPCODE_LOG: + case TGSI_OPCODE_LOG: unalias1(c, dst, args[0], emit_log_noalias); break; - case OPCODE_LG2: + case TGSI_OPCODE_LG2: emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_LIT: + case TGSI_OPCODE_LIT: unalias1(c, dst, args[0], emit_lit_noalias); break; - case OPCODE_LRP: + case TGSI_OPCODE_LRP: unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias); break; - case OPCODE_MAD: + case TGSI_OPCODE_MAD: brw_MOV(p, brw_acc_reg(), args[2]); brw_MAC(p, dst, args[0], args[1]); break; - case OPCODE_MAX: + case TGSI_OPCODE_MAX: emit_max(p, dst, args[0], args[1]); break; - case OPCODE_MIN: + case TGSI_OPCODE_MIN: emit_min(p, dst, args[0], args[1]); break; - case OPCODE_MOV: + case TGSI_OPCODE_MOV: brw_MOV(p, dst, args[0]); break; - case OPCODE_MUL: + case TGSI_OPCODE_MUL: brw_MUL(p, dst, args[0], args[1]); break; - case OPCODE_POW: + case TGSI_OPCODE_POW: emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); break; - case OPCODE_RCP: + case TGSI_OPCODE_RCP: emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_RSQ: + case TGSI_OPCODE_RSQ: emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); break; - - case OPCODE_SEQ: + case TGSI_OPCODE_SEQ: emit_seq(p, dst, args[0], args[1]); break; - case OPCODE_SIN: + case TGSI_OPCODE_SIN: emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL); break; - case OPCODE_SNE: + case TGSI_OPCODE_SNE: emit_sne(p, dst, args[0], args[1]); break; - case OPCODE_SGE: + case TGSI_OPCODE_SGE: emit_sge(p, dst, args[0], args[1]); break; - case OPCODE_SGT: + case TGSI_OPCODE_SGT: emit_sgt(p, dst, args[0], args[1]); break; - case OPCODE_SLT: + case TGSI_OPCODE_SLT: emit_slt(p, dst, args[0], args[1]); break; - case OPCODE_SLE: + case TGSI_OPCODE_SLE: emit_sle(p, dst, args[0], args[1]); break; - case OPCODE_SUB: + case TGSI_OPCODE_SUB: brw_ADD(p, dst, args[0], negate(args[1])); break; - case OPCODE_SWZ: - /* The args[0] value can't be used here as it won't have - * correctly encoded the full swizzle: - */ - emit_swz(c, dst, inst); - break; - case OPCODE_TRUNC: + case TGSI_OPCODE_TRUNC: /* round toward zero */ brw_RNDZ(p, dst, args[0]); break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: emit_xpd(p, dst, args[0], args[1]); break; - case OPCODE_IF: + case TGSI_OPCODE_IF: assert(if_depth < MAX_IF_DEPTH); if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); /* Note that brw_IF smashes the predicate_control field. */ if_inst[if_depth]->header.predicate_control = get_predicate(inst); if_depth++; break; - case OPCODE_ELSE: + case TGSI_OPCODE_ELSE: if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); break; - case OPCODE_ENDIF: + case TGSI_OPCODE_ENDIF: assert(if_depth > 0); brw_ENDIF(p, if_inst[--if_depth]); break; - case OPCODE_BGNLOOP: + case TGSI_OPCODE_BGNLOOP: loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; - case OPCODE_BRK: + case TGSI_OPCODE_BRK: brw_set_predicate_control(p, get_predicate(inst)); brw_BREAK(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_CONT: + case TGSI_OPCODE_CONT: brw_set_predicate_control(p, get_predicate(inst)); brw_CONT(p); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_ENDLOOP: + case TGSI_OPCODE_ENDLOOP: { struct brw_instruction *inst0, *inst1; GLuint br = 1; @@ -1550,23 +1453,23 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* patch all the BREAK/CONT instructions from last BEGINLOOP */ while (inst0 > loop_inst[loop_depth]) { inst0--; - if (inst0->header.opcode == BRW_OPCODE_BREAK) { + if (inst0->header.opcode == BRW_TGSI_OPCODE_BREAK) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); inst0->bits3.if_else.pop_count = 0; } - else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) { + else if (inst0->header.opcode == BRW_TGSI_OPCODE_CONTINUE) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0); inst0->bits3.if_else.pop_count = 0; } } } break; - case OPCODE_BRA: + case TGSI_OPCODE_BRA: brw_set_predicate_control(p, get_predicate(inst)); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); break; - case OPCODE_CAL: + case TGSI_OPCODE_CAL: brw_set_access_mode(p, BRW_ALIGN_1); brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); brw_set_access_mode(p, BRW_ALIGN_16); @@ -1575,27 +1478,27 @@ void brw_vs_emit(struct brw_vs_compile *c ) brw_save_call(p, inst->Comment, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; - case OPCODE_RET: + case TGSI_OPCODE_RET: brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(-4)); brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); brw_set_access_mode(p, BRW_ALIGN_16); break; - case OPCODE_END: + case TGSI_OPCODE_END: end_offset = p->nr_insn; /* this instruction will get patched later to jump past subroutine * code, etc. */ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; - case OPCODE_PRINT: + case TGSI_OPCODE_PRINT: /* no-op */ break; - case OPCODE_BGNSUB: + case TGSI_OPCODE_BGNSUB: brw_save_label(p, inst->Comment, p->nr_insn); break; - case OPCODE_ENDSUB: + case TGSI_OPCODE_ENDSUB: /* no-op */ break; default: @@ -1618,33 +1521,6 @@ void brw_vs_emit(struct brw_vs_compile *c ) hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; } - if ((inst->DstReg.File == PROGRAM_OUTPUT) - && (inst->DstReg.Index != VERT_RESULT_HPOS) - && c->output_regs[inst->DstReg.Index].used_in_src) { - brw_MOV(p, get_dst(c, inst->DstReg), dst); - } - - /* Result color clamping. - * - * When destination register is an output register and - * it's primary/secondary front/back color, we have to clamp - * the result to [0,1]. This is done by enabling the - * saturation bit for the last instruction. - * - * We don't use brw_set_saturate() as it modifies - * p->current->header.saturate, which affects all the subsequent - * instructions. Instead, we directly modify the header - * of the last (already stored) instruction. - */ - if (inst->DstReg.File == PROGRAM_OUTPUT) { - if ((inst->DstReg.Index == VERT_RESULT_COL0) - || (inst->DstReg.Index == VERT_RESULT_COL1) - || (inst->DstReg.Index == VERT_RESULT_BFC0) - || (inst->DstReg.Index == VERT_RESULT_BFC1)) { - p->store[p->nr_insn-1].header.saturate = 1; - } - } - release_tmps(c); } diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 2292de94c4..20d31880b4 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -269,61 +269,46 @@ static void brw_wm_populate_key( struct brw_context *brw, uses_depth, key); + /* Revisit this, figure out if it's really useful, and either push + * it into the state tracker so that everyone benefits (use to + * create fs varients with TEX rather than TXP), or discard. + */ + key->proj_attrib_mask = ~0; /*brw->wm.input_size_masks[4-1];*/ - /* BRW_NEW_WM_INPUT_DIMENSIONS */ - key->proj_attrib_mask = brw->wm.input_size_masks[4-1]; - - /* _NEW_LIGHT */ - key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + /* PIPE_NEW_RAST */ + key->flat_shade = brw->rast.flat_shade; - /* _NEW_HINT */ - key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + /* This can be determined by looking at the INTERP mode each input decl. + */ + key->linear_color = 0; /* _NEW_TEXTURE */ for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; - - if (unit->_ReallyEnabled) { - const struct gl_texture_object *t = unit->_Current; - const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + if (i < brw->nr_textures) { + const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; + const struct gl_texture_object *t = unit->_Current; + const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; + if (img->InternalFormat == GL_YCBCR_MESA) { key->yuvtex_mask |= 1 << i; if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR) - key->yuvtex_swap_mask |= 1 << i; + key->yuvtex_swap_mask |= 1 << i; } - key->tex_swizzles[i] = t->_Swizzle; + key->tex_swizzles[i] = t->_Swizzle; + + if (0) + key->shadowtex_mask |= 1<tex_swizzles[i] = SWIZZLE_NOOP; } } - /* Shadow */ - key->shadowtex_mask = fp->program.Base.ShadowSamplers; - /* _NEW_BUFFERS */ - /* - * Include the draw buffer origin and height so that we can calculate - * fragment position values relative to the bottom left of the drawable, - * from the incoming screen origin relative position we get as part of our - * payload. - * - * We could avoid recompiling by including this as a constant referenced by - * our program, but if we were to do that it would also be nice to handle - * getting that constant updated at batchbuffer submit time (when we - * hold the lock and know where the buffer really is) rather than at emit - * time when we don't hold the lock and are just guessing. We could also - * just avoid using this as key data if the program doesn't use - * fragment.position. - * - * This pretty much becomes moot with DRI2 and redirected buffers anyway, - * as our origins will always be zero then. - */ + /* _NEW_FRAMEBUFFER */ if (brw->intel.driDrawable != NULL) { - key->origin_x = brw->intel.driDrawable->x; - key->origin_y = brw->intel.driDrawable->y; - key->drawable_height = brw->intel.driDrawable->h; + key->drawable_height = brw->fb.cbufs[0].height; } /* CACHE_NEW_VS_PROG */ diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 872b1f3ecf..756a680150 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -76,7 +76,6 @@ struct brw_wm_prog_key { GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; GLuint program_string_id:32; - GLuint origin_x, origin_y; GLuint drawable_height; GLuint vp_outputs_written; }; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index bf80a2942a..9c47c46a3d 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -125,23 +125,21 @@ static void emit_wpos_xy(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; - /* Calculate the pixel offset from window bottom left into destination - * X and Y channels. - */ if (mask & WRITEMASK_X) { - /* X' = X - origin */ - brw_ADD(p, + /* X' = X */ + brw_MOV(p, dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(0 - c->key.origin_x)); + retype(arg0[0], BRW_REGISTER_TYPE_W)); } + /* XXX: is this needed any more, or is this a NOOP? + */ if (mask & WRITEMASK_Y) { - /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + /* Y' = height - 1 - Y */ brw_ADD(p, dst[1], negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), - brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + brw_imm_d(c->key.drawable_height - 1)); } } @@ -1376,7 +1374,6 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; case OPCODE_MOV: - case OPCODE_SWZ: emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 4e3edfbbff..5f47d86f71 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -30,25 +30,12 @@ */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" +#include "pipe/p_shader_constants.h" + #include "brw_context.h" #include "brw_wm.h" #include "brw_util.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/prog_statevars.h" - - -/** An invalid texture target */ -#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS - -/** An invalid texture unit */ -#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT - -#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS #define X 0 #define Y 1 @@ -68,11 +55,6 @@ static const char *wm_opcode_strings[] = { "FRONTFACING", }; -#if 0 -static const char *wm_file_strings[] = { - "PAYLOAD" -}; -#endif /*********************************************************************** @@ -165,13 +147,13 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c ) } c->fp_temp |= 1<<(bit-1); - return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1)); + return dst_reg(PROGRAM_TEMPORARY, c->first_internal_temp+(bit-1)); } static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) { - c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP)); + c->fp_temp &= ~(1 << (temp.Index - c->first_internal_temp)); } @@ -192,58 +174,29 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c, return inst; } -static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c, - GLuint op, - struct prog_dst_register dest, - GLuint saturate, - GLuint tex_src_unit, - GLuint tex_src_target, - GLuint tex_shadow, - struct prog_src_register src0, - struct prog_src_register src1, - struct prog_src_register src2 ) +static struct prog_instruction * emit_op(struct brw_wm_compile *c, + GLuint op, + struct prog_dst_register dest, + GLuint saturate, + struct prog_src_register src0, + struct prog_src_register src1, + struct prog_src_register src2 ) { struct prog_instruction *inst = get_fp_inst(c); - assert(tex_src_unit < BRW_MAX_TEX_UNIT || - tex_src_unit == TEX_UNIT_NONE); - assert(tex_src_target < NUM_TEXTURE_TARGETS || - tex_src_target == TEX_TARGET_NONE); - - /* update mask of which texture units are referenced by this program */ - if (tex_src_unit != TEX_UNIT_NONE) - c->fp->tex_units_used |= (1 << tex_src_unit); - memset(inst, 0, sizeof(*inst)); inst->Opcode = op; inst->DstReg = dest; inst->SaturateMode = saturate; - inst->TexSrcUnit = tex_src_unit; - inst->TexSrcTarget = tex_src_target; - inst->TexShadow = tex_shadow; inst->SrcReg[0] = src0; inst->SrcReg[1] = src1; inst->SrcReg[2] = src2; return inst; } - - -static struct prog_instruction * emit_op(struct brw_wm_compile *c, - GLuint op, - struct prog_dst_register dest, - GLuint saturate, - struct prog_src_register src0, - struct prog_src_register src1, - struct prog_src_register src2 ) -{ - return emit_tex_op(c, op, dest, saturate, - TEX_UNIT_NONE, TEX_TARGET_NONE, 0, /* unit, tgt, shadow */ - src0, src1, src2); -} -/* Many Mesa opcodes produce the same value across all the result channels. +/* Many opcodes produce the same value across all the result channels. * We'd rather not have to support that splatting in the opcode implementations, * and brw_wm_pass*.c wants to optimize them out by shuffling references around * anyway. We can easily get both by emitting the opcode to one channel, and @@ -267,7 +220,7 @@ static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); if (other_channel_mask != 0) { inst = emit_op(c, - OPCODE_MOV, + TGSI_OPCODE_MOV, dst_mask(inst0->DstReg, other_channel_mask), 0, src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), @@ -356,7 +309,9 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) } static void emit_interp( struct brw_wm_compile *c, - GLuint idx ) + GLuint semantic, + GLuint semantic_index, + GLuint interp_mode ) { struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); @@ -366,7 +321,7 @@ static void emit_interp( struct brw_wm_compile *c, * multiplied by 1/W in the SF program, and LINTERP on those * which have not: */ - switch (idx) { + switch (semantic) { case FRAG_ATTRIB_WPOS: /* Have to treat wpos.xy specially: */ @@ -390,8 +345,8 @@ static void emit_interp( struct brw_wm_compile *c, deltas, src_undef()); break; - case FRAG_ATTRIB_COL0: - case FRAG_ATTRIB_COL1: + + case TGSI_SEMANTIC_COLOR: if (c->key.flat_shade) { emit_op(c, WM_CINTERP, @@ -402,25 +357,13 @@ static void emit_interp( struct brw_wm_compile *c, src_undef()); } else { - if (c->key.linear_color) { - emit_op(c, - WM_LINTERP, - dst, - 0, - interp, - deltas, - src_undef()); - } - else { - /* perspective-corrected color interpolation */ - emit_op(c, - WM_PINTERP, - dst, - 0, - interp, - deltas, - get_pixel_w(c)); - } + emit_op(c, + translate_interp_mode(interp_mode), + dst, + 0, + interp, + deltas, + src_undef()); } break; case FRAG_ATTRIB_FOGC: @@ -434,7 +377,7 @@ static void emit_interp( struct brw_wm_compile *c, get_pixel_w(c)); emit_op(c, - OPCODE_MOV, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_YZW), 0, src_swizzle(interp, @@ -468,7 +411,7 @@ static void emit_interp( struct brw_wm_compile *c, get_pixel_w(c)); emit_op(c, - OPCODE_MOV, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_ZW), 0, src_swizzle(interp, @@ -482,7 +425,7 @@ static void emit_interp( struct brw_wm_compile *c, default: emit_op(c, - WM_PINTERP, + translate_interp_mode(interp_mode), dst, 0, interp, @@ -490,8 +433,6 @@ static void emit_interp( struct brw_wm_compile *c, get_pixel_w(c)); break; } - - c->fp_interp_emitted |= 1<SaturateMode, src0, @@ -596,7 +537,7 @@ static void precalc_dst( struct brw_wm_compile *c, /* dst.xz = swz src0.1zzz */ swz = emit_op(c, - OPCODE_SWZ, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_XZ), inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), @@ -609,7 +550,7 @@ static void precalc_dst( struct brw_wm_compile *c, /* dst.w = mov src1.w */ emit_op(c, - OPCODE_MOV, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_W), inst->SaturateMode, src1, @@ -631,7 +572,7 @@ static void precalc_lit( struct brw_wm_compile *c, /* dst.xw = swz src0.1111 */ swz = emit_op(c, - OPCODE_SWZ, + TGSI_OPCODE_MOV, dst_mask(dst, WRITEMASK_XW), 0, src_swizzle1(src0, SWIZZLE_ONE), @@ -643,7 +584,7 @@ static void precalc_lit( struct brw_wm_compile *c, if (dst.WriteMask & WRITEMASK_YZ) { emit_op(c, - OPCODE_LIT, + TGSI_OPCODE_LIT, dst_mask(dst, WRITEMASK_YZ), inst->SaturateMode, src0, @@ -681,7 +622,7 @@ static void precalc_tex( struct brw_wm_compile *c, coord = src_reg_from_dst(tmpcoord); /* tmpcoord = src0 (i.e.: coord = src0) */ - out = emit_op(c, OPCODE_MOV, + out = emit_op(c, TGSI_OPCODE_MOV, tmpcoord, 0, src0, @@ -691,7 +632,7 @@ static void precalc_tex( struct brw_wm_compile *c, out->SrcReg[0].Abs = 1; /* tmp0 = MAX(coord.X, coord.Y) */ - emit_op(c, OPCODE_MAX, + emit_op(c, TGSI_OPCODE_MAX, tmp0, 0, src_swizzle1(coord, X), @@ -699,7 +640,7 @@ static void precalc_tex( struct brw_wm_compile *c, src_undef()); /* tmp1 = MAX(tmp0, coord.Z) */ - emit_op(c, OPCODE_MAX, + emit_op(c, TGSI_OPCODE_MAX, tmp1, 0, tmp0src, @@ -707,7 +648,7 @@ static void precalc_tex( struct brw_wm_compile *c, src_undef()); /* tmp0 = 1 / tmp1 */ - emit_op(c, OPCODE_RCP, + emit_op(c, TGSI_OPCODE_RCP, dst_mask(tmp0, WRITEMASK_X), 0, tmp1src, @@ -715,7 +656,7 @@ static void precalc_tex( struct brw_wm_compile *c, src_undef()); /* tmpCoord = src0 * tmp0 */ - emit_op(c, OPCODE_MUL, + emit_op(c, TGSI_OPCODE_MUL, tmpcoord, 0, src0, @@ -738,7 +679,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } */ emit_op(c, - OPCODE_MUL, + TGSI_OPCODE_MUL, tmpcoord, 0, inst->SrcReg[0], @@ -785,7 +726,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp = TEX ... */ emit_tex_op(c, - OPCODE_TEX, + TGSI_OPCODE_TEX, tmp, inst->SaturateMode, unit, @@ -798,7 +739,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp.xyz = ADD TMP, C0 */ emit_op(c, - OPCODE_ADD, + TGSI_OPCODE_ADD, dst_mask(tmp, WRITEMASK_XYZ), 0, tmpsrc, @@ -809,7 +750,7 @@ static void precalc_tex( struct brw_wm_compile *c, */ emit_op(c, - OPCODE_MUL, + TGSI_OPCODE_MUL, dst_mask(tmp, WRITEMASK_Y), 0, tmpsrc, @@ -824,7 +765,7 @@ static void precalc_tex( struct brw_wm_compile *c, */ emit_op(c, - OPCODE_MAD, + TGSI_OPCODE_MAD, dst_mask(dst, WRITEMASK_XYZ), 0, swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), @@ -834,7 +775,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* RGB.y = MAD YUV.z, C1.w, RGB.y */ emit_op(c, - OPCODE_MAD, + TGSI_OPCODE_MAD, dst_mask(dst, WRITEMASK_Y), 0, src_swizzle1(tmpsrc, Z), @@ -846,7 +787,7 @@ static void precalc_tex( struct brw_wm_compile *c, else { /* ordinary RGBA tex instruction */ emit_tex_op(c, - OPCODE_TEX, + TGSI_OPCODE_TEX, inst->DstReg, inst->SaturateMode, unit, @@ -861,7 +802,7 @@ static void precalc_tex( struct brw_wm_compile *c, if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) { /* swizzle the result of the TEX instruction */ struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg); - emit_op(c, OPCODE_SWZ, + emit_op(c, TGSI_OPCODE_MOV, inst->DstReg, SATURATE_OFF, /* saturate already done above */ src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), @@ -884,7 +825,7 @@ static GLboolean projtex( struct brw_wm_compile *c, const struct prog_src_register src = inst->SrcReg[0]; GLboolean retVal; - assert(inst->Opcode == OPCODE_TXP); + assert(inst->Opcode == TGSI_OPCODE_TXP); /* Only try to detect the simplest cases. Could detect (later) * cases where we are trying to emit code like RCP {1.0}, MUL x, @@ -921,7 +862,7 @@ static void precalc_txp( struct brw_wm_compile *c, /* tmp0.w = RCP inst.arg[0][3] */ emit_op(c, - OPCODE_RCP, + TGSI_OPCODE_RCP, dst_mask(tmp, WRITEMASK_W), 0, src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), @@ -931,7 +872,7 @@ static void precalc_txp( struct brw_wm_compile *c, /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww */ emit_op(c, - OPCODE_MUL, + TGSI_OPCODE_MUL, dst_mask(tmp, WRITEMASK_XYZ), 0, src0, @@ -1015,6 +956,7 @@ static void validate_src_regs( struct brw_wm_compile *c, GLuint idx = inst->SrcReg[i].Index; if (!(c->fp_interp_emitted & (1<fp_interp_emitted |= 1<Opcode) { - case OPCODE_SWZ: + case TGSI_OPCODE_ABS: out = emit_insn(c, inst); - out->Opcode = OPCODE_MOV; - break; - - case OPCODE_ABS: - out = emit_insn(c, inst); - out->Opcode = OPCODE_MOV; + out->Opcode = TGSI_OPCODE_MOV; out->SrcReg[0].Negate = NEGATE_NONE; out->SrcReg[0].Abs = 1; break; - case OPCODE_SUB: + case TGSI_OPCODE_SUB: out = emit_insn(c, inst); - out->Opcode = OPCODE_ADD; + out->Opcode = TGSI_OPCODE_ADD; out->SrcReg[1].Negate ^= NEGATE_XYZW; break; - case OPCODE_SCS: + case TGSI_OPCODE_SCS: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask &= WRITEMASK_XY; break; - case OPCODE_DST: + case TGSI_OPCODE_DST: precalc_dst(c, inst); break; - case OPCODE_LIT: + case TGSI_OPCODE_LIT: precalc_lit(c, inst); break; - case OPCODE_TEX: + case TGSI_OPCODE_TEX: precalc_tex(c, inst); break; - case OPCODE_TXP: + case TGSI_OPCODE_TXP: precalc_txp(c, inst); break; - case OPCODE_TXB: + case TGSI_OPCODE_TXB: out = emit_insn(c, inst); out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask &= WRITEMASK_XYZ; break; - case OPCODE_KIL: + case TGSI_OPCODE_KIL: out = emit_insn(c, inst); /* This should probably be done in the parser. */ out->DstReg.WriteMask = 0; break; - case OPCODE_END: + case TGSI_OPCODE_END: emit_fb_write(c); break; - case OPCODE_PRINT: - break; default: if (brw_wm_is_scalar_result(inst->Opcode)) emit_scalar_insn(c, inst); diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index c9fe1dd8ad..d836e2fb34 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -6,9 +6,6 @@ #include "brw_eu.h" #include "brw_wm.h" -enum _subroutine { - SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4 -}; static struct brw_reg get_dst_reg(struct brw_wm_compile *c, const struct prog_instruction *inst, @@ -32,10 +29,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) case OPCODE_CAL: case OPCODE_BRK: case OPCODE_RET: - case OPCODE_NOISE1: - case OPCODE_NOISE2: - case OPCODE_NOISE3: - case OPCODE_NOISE4: case OPCODE_BGNLOOP: return GL_TRUE; default: @@ -1495,1036 +1488,7 @@ static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) 0, 16, 2 ); } -/* One-, two- and three-dimensional Perlin noise, similar to the description - in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */ -static void noise1_sub( struct brw_wm_compile *c ) { - struct brw_compile *p = &c->func; - struct brw_reg param, - x0, x1, /* gradients at each end */ - t, tmp[ 2 ], /* float temporaries */ - itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */ - int i; - int mark = mark_tmps( c ); - - x0 = alloc_tmp( c ); - x1 = alloc_tmp( c ); - t = alloc_tmp( c ); - tmp[ 0 ] = alloc_tmp( c ); - tmp[ 1 ] = alloc_tmp( c ); - itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD ); - itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD ); - itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD ); - itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD ); - itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD ); - - param = lookup_tmp( c, mark - 2 ); - - brw_set_access_mode( p, BRW_ALIGN_1 ); - - brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ - - /* Arrange the two end coordinates into scalars (itmp0/itmp1) to - be hashed. Also compute the remainder (offset within the unit - length), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param ); - brw_FRC( p, param, param ); - brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) ); - brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ - brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ - - /* We're now ready to perform the hashing. The two hashes are - interleaved for performance. The hash function used is - designed to rapidly achieve avalanche and require only 32x16 - bit multiplication, and 16-bit swizzles (which we get for - free). We can't use immediate operands in the multiplies, - because immediates are permitted only in src1 and the 16-bit - factor is permitted only in src0. */ - for( i = 0; i < 2; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] ); - for( i = 0; i < 2; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - for( i = 0; i < 2; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] ); - for( i = 0; i < 2; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - for( i = 0; i < 2; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); - for( i = 0; i < 2; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - - /* Now we want to initialise the two gradients based on the - hashes. Format conversion from signed integer to float leaves - everything scaled too high by a factor of pow( 2, 31 ), but - we correct for that right at the end. */ - brw_ADD( p, t, param, brw_imm_f( -1.0 ) ); - brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) ); - brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) ); - - brw_MUL( p, x0, x0, param ); - brw_MUL( p, x1, x1, t ); - - /* We interpolate between the gradients using the polynomial - 6t^5 - 15t^4 + 10t^3 (Perlin). */ - brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); - brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the - pipeline */ - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param ); - brw_MUL( p, param, tmp[ 0 ], param ); - brw_MUL( p, x1, x1, param ); - brw_ADD( p, x0, x0, x1 ); - /* scale by pow( 2, -30 ), to compensate for the format conversion - above and an extra factor of 2 so that a single gradient covers - the [-1,1] range */ - brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) ); - - release_tmps( c, mark ); -} - -static void emit_noise1( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct brw_compile *p = &c->func; - struct brw_reg src, param, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - int mark = mark_tmps( c ); - - assert( mark == 0 ); - - src = get_src_reg( c, inst, 0, 0 ); - - param = alloc_tmp( c ); - - brw_MOV( p, param, src ); - - invoke_subroutine( c, SUB_NOISE1, noise1_sub ); - - /* Fill in the result: */ - brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) - brw_set_saturate( p, 0 ); - - release_tmps( c, mark ); -} - -static void noise2_sub( struct brw_wm_compile *c ) { - - struct brw_compile *p = &c->func; - struct brw_reg param0, param1, - x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */ - t, tmp[ 4 ], /* float temporaries */ - itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */ - int i; - int mark = mark_tmps( c ); - - x0y0 = alloc_tmp( c ); - x0y1 = alloc_tmp( c ); - x1y0 = alloc_tmp( c ); - x1y1 = alloc_tmp( c ); - t = alloc_tmp( c ); - for( i = 0; i < 4; i++ ) { - tmp[ i ] = alloc_tmp( c ); - itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); - } - itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD ); - itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD ); - itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD ); - - param0 = lookup_tmp( c, mark - 3 ); - param1 = lookup_tmp( c, mark - 2 ); - - brw_set_access_mode( p, BRW_ALIGN_1 ); - - /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to - be hashed. Also compute the remainders (offsets within the unit - square), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); - brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); - brw_FRC( p, param0, param0 ); - brw_FRC( p, param1, param1 ); - brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */ - brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ), - low_words( itmp[ 1 ] ) ); - brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */ - brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */ - brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) ); - brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) ); - brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) ); - - /* We're now ready to perform the hashing. The four hashes are - interleaved for performance. The hash function used is - designed to rapidly achieve avalanche and require only 32x16 - bit multiplication, and 16-bit swizzles (which we get for - free). We can't use immediate operands in the multiplies, - because immediates are permitted only in src1 and the 16-bit - factor is permitted only in src0. */ - for( i = 0; i < 4; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ), - high_words( itmp[ i ] ) ); - - /* Now we want to initialise the four gradients based on the - hashes. Format conversion from signed integer to float leaves - everything scaled too high by a factor of pow( 2, 15 ), but - we correct for that right at the end. */ - brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); - brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); - brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) ); - brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) ); - - brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param0 ); - brw_MUL( p, x0y1, x0y1, param0 ); - - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 ); - brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t ); - brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t ); - - brw_ADD( p, x0y0, x0y0, tmp[ 0 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 2 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 1 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 3 ] ); - - /* We interpolate between the gradients using the polynomial - 6t^5 - 15t^4 + 10t^3 (Perlin). */ - brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) ); - brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) ); - brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the - pipeline */ - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) ); - brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the - pipeline */ - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 ); - brw_MUL( p, param0, tmp[ 0 ], param0 ); - brw_MUL( p, param1, tmp[ 1 ], param1 ); - - /* Here we interpolate in the y dimension... */ - brw_MUL( p, x0y1, x0y1, param1 ); - brw_MUL( p, x1y1, x1y1, param1 ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. There are horrible register dependencies here, - but we have nothing else to do. */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, param0 ); - brw_ADD( p, x0y0, x0y0, x1y0 ); - - /* scale by pow( 2, -15 ), as described above */ - brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) ); - - release_tmps( c, mark ); -} - -static void emit_noise2( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, param0, param1, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - int mark = mark_tmps( c ); - - assert( mark == 0 ); - - src0 = get_src_reg( c, inst, 0, 0 ); - src1 = get_src_reg( c, inst, 0, 1 ); - - param0 = alloc_tmp( c ); - param1 = alloc_tmp( c ); - - brw_MOV( p, param0, src0 ); - brw_MOV( p, param1, src1 ); - - invoke_subroutine( c, SUB_NOISE2, noise2_sub ); - - /* Fill in the result: */ - brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) - brw_set_saturate( p, 0 ); - - release_tmps( c, mark ); -} - -/** - * The three-dimensional case is much like the one- and two- versions above, - * but since the number of corners is rapidly growing we now pack 16 16-bit - * hashes into each register to extract more parallelism from the EUs. - */ -static void noise3_sub( struct brw_wm_compile *c ) { - - struct brw_compile *p = &c->func; - struct brw_reg param0, param1, param2, - x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ - xi, yi, zi, /* interpolation coefficients */ - t, tmp[ 8 ], /* float temporaries */ - itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ - wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ - int i; - int mark = mark_tmps( c ); - - x0y0 = alloc_tmp( c ); - x0y1 = alloc_tmp( c ); - x1y0 = alloc_tmp( c ); - x1y1 = alloc_tmp( c ); - xi = alloc_tmp( c ); - yi = alloc_tmp( c ); - zi = alloc_tmp( c ); - t = alloc_tmp( c ); - for( i = 0; i < 8; i++ ) { - tmp[ i ] = alloc_tmp( c ); - itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); - wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); - } - - param0 = lookup_tmp( c, mark - 4 ); - param1 = lookup_tmp( c, mark - 3 ); - param2 = lookup_tmp( c, mark - 2 ); - - brw_set_access_mode( p, BRW_ALIGN_1 ); - - /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to - be hashed. Also compute the remainders (offsets within the unit - cube), interleaved to reduce register dependency penalties. */ - brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 ); - brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 ); - brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 ); - brw_FRC( p, param0, param0 ); - brw_FRC( p, param1, param1 ); - brw_FRC( p, param2, param2 ); - /* Since we now have only 16 bits of precision in the hash, we must - be more careful about thorough mixing to maintain entropy as we - squash the input vector into a small scalar. */ - brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) ); - brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) ); - brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ), - brw_imm_uw( 0x9B93 ) ); - brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), - brw_imm_uw( 0xBC8F ) ); - - /* Temporarily disable the execution mask while we work with ExecSize=16 - channels (the mask is set for ExecSize=8 and is probably incorrect). - Although this might cause execution of unwanted channels, the code - writes only to temporary registers and has no side effects, so - disabling the mask is harmless. */ - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); - brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); - brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); - - /* We're now ready to perform the hashing. The eight hashes are - interleaved for performance. The hash function used is - designed to rapidly achieve avalanche and require only 16x16 - bit multiplication, and 8-bit swizzles (which we get for - free). */ - for( i = 0; i < 4; i++ ) - brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), - odd_bytes( wtmp[ i ] ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), - odd_bytes( wtmp[ i ] ) ); - brw_pop_insn_state( p ); - - /* Now we want to initialise the four rear gradients based on the - hashes. Format conversion from signed integer to float leaves - everything scaled too high by a factor of pow( 2, 15 ), but - we correct for that right at the end. */ - /* x component */ - brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); - brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); - brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); - brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param0 ); - brw_MUL( p, x0y1, x0y1, param0 ); - - /* y component */ - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - brw_ADD( p, t, param0, brw_imm_f( -1.0 ) ); - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); - - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - - /* z component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* We interpolate between the gradients using the polynomial - 6t^5 - 15t^4 + 10t^3 (Perlin). */ - brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) ); - brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) ); - brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) ); - brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) ); - brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) ); - brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) ); - brw_MUL( p, xi, xi, param0 ); - brw_MUL( p, yi, yi, param1 ); - brw_MUL( p, zi, zi, param2 ); - brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) ); - brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) ); - brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) ); - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */ - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */ - brw_MUL( p, xi, xi, param0 ); - brw_MUL( p, yi, yi, param1 ); - brw_MUL( p, zi, zi, param2 ); - brw_MUL( p, xi, xi, param0 ); - brw_MUL( p, yi, yi, param1 ); - brw_MUL( p, zi, zi, param2 ); - brw_MUL( p, xi, xi, param0 ); - brw_MUL( p, yi, yi, param1 ); - brw_MUL( p, zi, zi, param2 ); - - /* Here we interpolate in the y dimension... */ - brw_MUL( p, x0y1, x0y1, yi ); - brw_MUL( p, x1y1, x1y1, yi ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. Leave the result in tmp[ 0 ] (see below)... */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, xi ); - brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); - - /* Now do the same thing for the front four gradients... */ - /* x component */ - brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); - brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); - brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param1, brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param0 ); - brw_MUL( p, x0y1, x0y1, param0 ); - - /* y component */ - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - brw_ADD( p, t, param2, brw_imm_f( -1.0 ) ); - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 ); - - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - - /* z component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* The interpolation coefficients are still around from last time, so - again interpolate in the y dimension... */ - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); - brw_MUL( p, x0y1, x0y1, yi ); - brw_MUL( p, x1y1, x1y1, yi ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. The rear face is in tmp[ 0 ] (see above), so this - time put the front face in tmp[ 1 ] and we're nearly there... */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, xi ); - brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); - - /* The final interpolation, in the z dimension: */ - brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); - - /* scale by pow( 2, -15 ), as described above */ - brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); - - release_tmps( c, mark ); -} - -static void emit_noise3( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, src2, param0, param1, param2, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - int mark = mark_tmps( c ); - - assert( mark == 0 ); - - src0 = get_src_reg( c, inst, 0, 0 ); - src1 = get_src_reg( c, inst, 0, 1 ); - src2 = get_src_reg( c, inst, 0, 2 ); - - param0 = alloc_tmp( c ); - param1 = alloc_tmp( c ); - param2 = alloc_tmp( c ); - - brw_MOV( p, param0, src0 ); - brw_MOV( p, param1, src1 ); - brw_MOV( p, param2, src2 ); - - invoke_subroutine( c, SUB_NOISE3, noise3_sub ); - - /* Fill in the result: */ - brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) - brw_set_saturate( p, 0 ); - - release_tmps( c, mark ); -} - -/** - * For the four-dimensional case, the little micro-optimisation benefits - * we obtain by unrolling all the loops aren't worth the massive bloat it - * now causes. Instead, we loop twice around performing a similar operation - * to noise3, once for the w=0 cube and once for the w=1, with a bit more - * code to glue it all together. - */ -static void noise4_sub( struct brw_wm_compile *c ) -{ - struct brw_compile *p = &c->func; - struct brw_reg param[ 4 ], - x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */ - w0, /* noise for the w=0 cube */ - floors[ 2 ], /* integer coordinates of base corner of hypercube */ - interp[ 4 ], /* interpolation coefficients */ - t, tmp[ 8 ], /* float temporaries */ - itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */ - wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */ - int i, j; - int mark = mark_tmps( c ); - GLuint loop, origin; - - x0y0 = alloc_tmp( c ); - x0y1 = alloc_tmp( c ); - x1y0 = alloc_tmp( c ); - x1y1 = alloc_tmp( c ); - t = alloc_tmp( c ); - w0 = alloc_tmp( c ); - floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); - floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD ); - - for( i = 0; i < 4; i++ ) { - param[ i ] = lookup_tmp( c, mark - 5 + i ); - interp[ i ] = alloc_tmp( c ); - } - - for( i = 0; i < 8; i++ ) { - tmp[ i ] = alloc_tmp( c ); - itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD ); - wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 ); - } - - brw_set_access_mode( p, BRW_ALIGN_1 ); - - /* We only want 16 bits of precision from the integral part of each - co-ordinate, but unfortunately the RNDD semantics would saturate - at 16 bits if we performed the operation directly to a 16-bit - destination. Therefore, we round to 32-bit temporaries where - appropriate, and then store only the lower 16 bits. */ - brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] ); - brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] ); - brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] ); - brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] ); - brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) ); - brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) ); - - /* Modify the flag register here, because the side effect is useful - later (see below). We know for certain that all flags will be - cleared, since the FRC instruction cannot possibly generate - negative results. Even for exceptional inputs (infinities, denormals, - NaNs), the architecture guarantees that the L conditional is false. */ - brw_set_conditionalmod( p, BRW_CONDITIONAL_L ); - brw_FRC( p, param[ 0 ], param[ 0 ] ); - brw_set_predicate_control( p, BRW_PREDICATE_NONE ); - for( i = 1; i < 4; i++ ) - brw_FRC( p, param[ i ], param[ i ] ); - - /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first - of all. */ - for( i = 0; i < 4; i++ ) - brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) ); - for( i = 0; i < 4; i++ ) - brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); - for( i = 0; i < 4; i++ ) - brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) ); - for( j = 0; j < 3; j++ ) - for( i = 0; i < 4; i++ ) - brw_MUL( p, interp[ i ], interp[ i ], param[ i ] ); - - /* Mark the current address, as it will be a jump destination. The - following code will be executed twice: first, with the flag - register clear indicating the w=0 case, and second with flags - set for w=1. */ - loop = p->nr_insn; - - /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to - be hashed. Since we have only 16 bits of precision in the hash, we - must be careful about thorough mixing to maintain entropy as we - squash the input vector into a small scalar. */ - brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ), - brw_imm_uw( 0xBC8F ) ); - brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ), - brw_imm_uw( 0xD0BD ) ); - brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ), - brw_imm_uw( 0x9B93 ) ); - brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ), - brw_imm_uw( 0xA359 ) ); - brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ), - brw_imm_uw( 0xBC8F ) ); - - /* Temporarily disable the execution mask while we work with ExecSize=16 - channels (the mask is set for ExecSize=8 and is probably incorrect). - Although this might cause execution of unwanted channels, the code - writes only to temporary registers and has no side effects, so - disabling the mask is harmless. */ - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) ); - brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) ); - brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) ); - - /* We're now ready to perform the hashing. The eight hashes are - interleaved for performance. The hash function used is - designed to rapidly achieve avalanche and require only 16x16 - bit multiplication, and 8-bit swizzles (which we get for - free). */ - for( i = 0; i < 4; i++ ) - brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), - odd_bytes( wtmp[ i ] ) ); - for( i = 0; i < 4; i++ ) - brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) ); - for( i = 0; i < 4; i++ ) - brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ), - odd_bytes( wtmp[ i ] ) ); - brw_pop_insn_state( p ); - - /* Now we want to initialise the four rear gradients based on the - hashes. Format conversion from signed integer to float leaves - everything scaled too high by a factor of pow( 2, 15 ), but - we correct for that right at the end. */ - /* x component */ - brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); - brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) ); - brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) ); - brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param[ 0 ] ); - brw_MUL( p, x0y1, x0y1, param[ 0 ] ); - - /* y component */ - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - /* prepare t for the w component (used below): w the first time through - the loop; w - 1 the second time) */ - brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); - brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); - p->current->header.predicate_inverse = 1; - brw_MOV( p, t, param[ 3 ] ); - p->current->header.predicate_inverse = 0; - brw_set_predicate_control( p, BRW_PREDICATE_NONE ); - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); - - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - - /* z component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* w component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* Here we interpolate in the y dimension... */ - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); - brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); - brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. Leave the result in tmp[ 0 ] (see below)... */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); - brw_ADD( p, tmp[ 0 ], x0y0, x1y0 ); - - /* Now do the same thing for the front four gradients... */ - /* x component */ - brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) ); - brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) ); - brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) ); - brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, x1y0, x1y0, t ); - brw_MUL( p, x1y1, x1y1, t ); - brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) ); - brw_MUL( p, x0y0, x0y0, param[ 0 ] ); - brw_MUL( p, x0y1, x0y1, param[ 0 ] ); - - /* y component */ - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) ); - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] ); - - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - - /* z component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) ); - brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) ); - brw_pop_insn_state( p ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - /* prepare t for the w component (used below): w the first time through - the loop; w - 1 the second time) */ - brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); - brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) ); - p->current->header.predicate_inverse = 1; - brw_MOV( p, t, param[ 3 ] ); - p->current->header.predicate_inverse = 0; - brw_set_predicate_control( p, BRW_PREDICATE_NONE ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* w component */ - brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) ); - brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) ); - brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) ); - - brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t ); - brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t ); - brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t ); - brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t ); - - brw_ADD( p, x0y0, x0y0, tmp[ 4 ] ); - brw_ADD( p, x0y1, x0y1, tmp[ 5 ] ); - brw_ADD( p, x1y0, x1y0, tmp[ 6 ] ); - brw_ADD( p, x1y1, x1y1, tmp[ 7 ] ); - - /* Interpolate in the y dimension: */ - brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); - brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); - brw_MUL( p, x0y1, x0y1, interp[ 1 ] ); - brw_MUL( p, x1y1, x1y1, interp[ 1 ] ); - brw_ADD( p, x0y0, x0y0, x0y1 ); - brw_ADD( p, x1y0, x1y0, x1y1 ); - - /* And now in x. The rear face is in tmp[ 0 ] (see above), so this - time put the front face in tmp[ 1 ] and we're nearly there... */ - brw_ADD( p, x1y0, x1y0, negate( x0y0 ) ); - brw_MUL( p, x1y0, x1y0, interp[ 0 ] ); - brw_ADD( p, tmp[ 1 ], x0y0, x1y0 ); - - /* Another interpolation, in the z dimension: */ - brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) ); - brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] ); - - /* Exit the loop if we've computed both cubes... */ - origin = p->nr_insn; - brw_push_insn_state( p ); - brw_set_predicate_control( p, BRW_PREDICATE_NORMAL ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) ); - brw_pop_insn_state( p ); - - /* Save the result for the w=0 case, and increment the w coordinate: */ - brw_MOV( p, w0, tmp[ 0 ] ); - brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ), - brw_imm_uw( 1 ) ); - - /* Loop around for the other cube. Explicitly set the flag register - (unfortunately we must spend an extra instruction to do this: we - can't rely on a side effect of the previous MOV or ADD because - conditional modifiers which are normally true might be false in - exceptional circumstances, e.g. given a NaN input; the add to - brw_ip_reg() is not suitable because the IP is not an 8-vector). */ - brw_push_insn_state( p ); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) ); - brw_ADD( p, brw_ip_reg(), brw_ip_reg(), - brw_imm_d( ( loop - p->nr_insn ) << 4 ) ); - brw_pop_insn_state( p ); - - /* Patch the previous conditional branch now that we know the - destination address. */ - brw_set_src1( p->store + origin, - brw_imm_d( ( p->nr_insn - origin ) << 4 ) ); - - /* The very last interpolation. */ - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) ); - brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] ); - brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 ); - - /* scale by pow( 2, -15 ), as described above */ - brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) ); - - release_tmps( c, mark ); -} - -static void emit_noise4( struct brw_wm_compile *c, - const struct prog_instruction *inst ) -{ - struct brw_compile *p = &c->func; - struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst; - GLuint mask = inst->DstReg.WriteMask; - int i; - int mark = mark_tmps( c ); - - assert( mark == 0 ); - - src0 = get_src_reg( c, inst, 0, 0 ); - src1 = get_src_reg( c, inst, 0, 1 ); - src2 = get_src_reg( c, inst, 0, 2 ); - src3 = get_src_reg( c, inst, 0, 3 ); - - param0 = alloc_tmp( c ); - param1 = alloc_tmp( c ); - param2 = alloc_tmp( c ); - param3 = alloc_tmp( c ); - - brw_MOV( p, param0, src0 ); - brw_MOV( p, param1, src1 ); - brw_MOV( p, param2, src2 ); - brw_MOV( p, param3, src3 ); - - invoke_subroutine( c, SUB_NOISE4, noise4_sub ); - - /* Fill in the result: */ - brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE ); - for (i = 0 ; i < 4; i++) { - if (mask & (1<SaturateMode == SATURATE_ZERO_ONE ) - brw_set_saturate( p, 0 ); - - release_tmps( c, mark ); -} static void emit_wpos_xy(struct brw_wm_compile *c, const struct prog_instruction *inst) @@ -2543,19 +1507,18 @@ static void emit_wpos_xy(struct brw_wm_compile *c, * X and Y channels. */ if (mask & WRITEMASK_X) { - /* X' = X - origin_x */ - brw_ADD(p, + /* X' = X */ + brw_MOV(p, dst[0], - retype(src0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(0 - c->key.origin_x)); + retype(src0[0], BRW_REGISTER_TYPE_W)); } if (mask & WRITEMASK_Y) { - /* Y' = height - (Y - origin_y) = height + origin_y - Y */ + /* Y' = height - 1 - Y */ brw_ADD(p, dst[1], negate(retype(src0[1], BRW_REGISTER_TYPE_W)), - brw_imm_d(c->key.origin_y + c->key.drawable_height - 1)); + brw_imm_d(c->key.drawable_height - 1)); } } @@ -2827,7 +1790,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_trunc(c, inst); break; case OPCODE_MOV: - case OPCODE_SWZ: emit_mov(c, inst); break; case OPCODE_DP3: @@ -2903,18 +1865,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_MAD: emit_mad(c, inst); break; - case OPCODE_NOISE1: - emit_noise1(c, inst); - break; - case OPCODE_NOISE2: - emit_noise2(c, inst); - break; - case OPCODE_NOISE3: - emit_noise3(c, inst); - break; - case OPCODE_NOISE4: - emit_noise4(c, inst); - break; case OPCODE_TEX: emit_tex(c, inst); break; diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 6279258339..0c411b57f5 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -422,7 +422,6 @@ void brw_wm_pass0( struct brw_wm_compile *c ) */ switch (inst->Opcode) { case OPCODE_MOV: - case OPCODE_SWZ: if (!inst->SaturateMode) { pass0_precalc_mov(c, inst); } diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index b449394029..d940ec09a9 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -120,7 +120,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) GLuint writemask; GLuint read0, read1, read2; - if (inst->opcode == OPCODE_KIL) { + if (inst->opcode == TGSI_OPCODE_KIL) { track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */ continue; } @@ -154,76 +154,75 @@ void brw_wm_pass1( struct brw_wm_compile *c ) /* Mark all inputs which contribute to the marked outputs: */ switch (inst->opcode) { - case OPCODE_ABS: - case OPCODE_FLR: - case OPCODE_FRC: - case OPCODE_MOV: - case OPCODE_SWZ: - case OPCODE_TRUNC: + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_FLR: + case TGSI_OPCODE_FRC: + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_TRUNC: read0 = writemask; break; - case OPCODE_SUB: - case OPCODE_SLT: - case OPCODE_SLE: - case OPCODE_SGE: - case OPCODE_SGT: - case OPCODE_SEQ: - case OPCODE_SNE: - case OPCODE_ADD: - case OPCODE_MAX: - case OPCODE_MIN: - case OPCODE_MUL: + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_MAX: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MUL: read0 = writemask; read1 = writemask; break; - case OPCODE_DDX: - case OPCODE_DDY: + case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDY: read0 = writemask; break; - case OPCODE_MAD: - case OPCODE_CMP: - case OPCODE_LRP: + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_CMP: + case TGSI_OPCODE_LRP: read0 = writemask; read1 = writemask; read2 = writemask; break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ; if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ; if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY; read1 = read0; break; - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - case OPCODE_SCS: + case TGSI_OPCODE_COS: + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_SCS: case WM_CINTERP: case WM_PIXELXY: read0 = WRITEMASK_X; break; - case OPCODE_POW: + case TGSI_OPCODE_POW: read0 = WRITEMASK_X; read1 = WRITEMASK_X; break; - case OPCODE_TEX: - case OPCODE_TXP: + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: read0 = get_texcoord_mask(inst->tex_idx); if (inst->tex_shadow) read0 |= WRITEMASK_Z; break; - case OPCODE_TXB: + case TGSI_OPCODE_TXB: /* Shadow ignored for txb. */ read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W; @@ -254,28 +253,28 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read2 = WRITEMASK_W; /* pixel w */ break; - case OPCODE_DP3: + case TGSI_OPCODE_DP3: read0 = WRITEMASK_XYZ; read1 = WRITEMASK_XYZ; break; - case OPCODE_DPH: + case TGSI_OPCODE_DPH: read0 = WRITEMASK_XYZ; read1 = WRITEMASK_XYZW; break; - case OPCODE_DP4: + case TGSI_OPCODE_DP4: read0 = WRITEMASK_XYZW; read1 = WRITEMASK_XYZW; break; - case OPCODE_LIT: + case TGSI_OPCODE_LIT: read0 = WRITEMASK_XYW; break; - case OPCODE_DST: + case TGSI_OPCODE_DST: case WM_FRONTFACING: - case OPCODE_KIL_NV: + case TGSI_OPCODE_KIL_NV: default: break; } diff --git a/src/gallium/drivers/i965/intel_chipset.h b/src/gallium/drivers/i965/intel_chipset.h index 3dc8653a73..3c38f1676c 100644 --- a/src/gallium/drivers/i965/intel_chipset.h +++ b/src/gallium/drivers/i965/intel_chipset.h @@ -66,7 +66,6 @@ #define PCI_CHIP_Q45_G 0x2E12 #define PCI_CHIP_G45_G 0x2E22 #define PCI_CHIP_G41_G 0x2E32 -#define PCI_CHIP_B43_G 0x2E42 #define PCI_CHIP_ILD_G 0x0042 #define PCI_CHIP_ILM_G 0x0046 @@ -84,8 +83,7 @@ #define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ devid == PCI_CHIP_Q45_G || \ devid == PCI_CHIP_G45_G || \ - devid == PCI_CHIP_G41_G || \ - devid == PCI_CHIP_B43_G) + devid == PCI_CHIP_G41_G) #define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) #define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) -- cgit v1.2.3 From 6b48fb002257e6f221dd9d8439a5e1aa718ed2cc Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 23 Oct 2009 20:19:14 +0100 Subject: i965: ignore cliprect_mode --- src/gallium/drivers/i965/intel_batchbuffer.h | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/intel_batchbuffer.h b/src/gallium/drivers/i965/intel_batchbuffer.h index d4899aab7f..a595d2e0c5 100644 --- a/src/gallium/drivers/i965/intel_batchbuffer.h +++ b/src/gallium/drivers/i965/intel_batchbuffer.h @@ -51,8 +51,6 @@ struct intel_batchbuffer GLubyte *map; GLubyte *ptr; - enum cliprect_mode cliprect_mode; - GLuint size; /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */ @@ -126,21 +124,10 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch, if (intel_batchbuffer_space(batch) < sz) intel_batchbuffer_flush(batch); - if ((cliprect_mode == LOOP_CLIPRECTS || - cliprect_mode == REFERENCES_CLIPRECTS) && - batch->intel->constant_cliprect) - cliprect_mode = NO_LOOP_CLIPRECTS; - - if (cliprect_mode != IGNORE_CLIPRECTS) { - if (batch->cliprect_mode == IGNORE_CLIPRECTS) { - batch->cliprect_mode = cliprect_mode; - } else { - if (batch->cliprect_mode != cliprect_mode) { - intel_batchbuffer_flush(batch); - batch->cliprect_mode = cliprect_mode; - } - } - } + /* All commands should be executed once regardless of cliprect + * mode. + */ + (void)cliprect_mode; } /* Here are the crusty old macros, to be removed: -- cgit v1.2.3 From 22906f730141a233341f3ec124bbb9dd2e8904e2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 23 Oct 2009 23:27:43 +0100 Subject: i965g: wip on removing GL stuff, trying to get a few files compiling --- src/gallium/drivers/i965/Makefile | 73 ++----- src/gallium/drivers/i965/brw_cc.c | 18 +- src/gallium/drivers/i965/brw_clip.c | 4 +- src/gallium/drivers/i965/brw_clip_state.c | 7 +- src/gallium/drivers/i965/brw_context.c | 64 +++--- src/gallium/drivers/i965/brw_context.h | 199 ++++++++----------- src/gallium/drivers/i965/brw_curbe.c | 5 +- src/gallium/drivers/i965/brw_draw.c | 25 +-- src/gallium/drivers/i965/brw_draw.h | 7 +- src/gallium/drivers/i965/brw_draw_upload.c | 39 ++-- src/gallium/drivers/i965/brw_eu_debug.c | 2 - src/gallium/drivers/i965/brw_gs.c | 8 +- src/gallium/drivers/i965/brw_gs_emit.c | 5 - src/gallium/drivers/i965/brw_gs_state.c | 7 +- src/gallium/drivers/i965/brw_misc_state.c | 22 +- src/gallium/drivers/i965/brw_pipe_depth.c | 18 +- src/gallium/drivers/i965/brw_pipe_fb.c | 4 +- src/gallium/drivers/i965/brw_pipe_flush.c | 8 +- src/gallium/drivers/i965/brw_pipe_query.c | 246 +++++++++++++++++++++++ src/gallium/drivers/i965/brw_program.c | 166 ---------------- src/gallium/drivers/i965/brw_queryobj.c | 254 ------------------------ src/gallium/drivers/i965/brw_sf.c | 12 +- src/gallium/drivers/i965/brw_sf.h | 12 +- src/gallium/drivers/i965/brw_sf_emit.c | 26 +-- src/gallium/drivers/i965/brw_sf_state.c | 16 +- src/gallium/drivers/i965/brw_state.h | 30 +-- src/gallium/drivers/i965/brw_state_batch.c | 1 - src/gallium/drivers/i965/brw_state_cache.c | 59 +++--- src/gallium/drivers/i965/brw_state_dump.c | 12 +- src/gallium/drivers/i965/brw_state_upload.c | 4 +- src/gallium/drivers/i965/brw_structs.h | 1 + src/gallium/drivers/i965/brw_swtnl.c | 1 - src/gallium/drivers/i965/brw_tex.c | 7 - src/gallium/drivers/i965/brw_tex_layout.c | 12 +- src/gallium/drivers/i965/brw_types.h | 15 +- src/gallium/drivers/i965/brw_util.c | 2 - src/gallium/drivers/i965/brw_util.h | 2 +- src/gallium/drivers/i965/brw_vs.c | 5 +- src/gallium/drivers/i965/brw_vs_emit.c | 3 - src/gallium/drivers/i965/brw_vs_state.c | 9 +- src/gallium/drivers/i965/brw_vs_surface_state.c | 20 +- src/gallium/drivers/i965/brw_wm.c | 6 +- src/gallium/drivers/i965/brw_wm_emit.c | 1 - src/gallium/drivers/i965/brw_wm_glsl.c | 4 - src/gallium/drivers/i965/brw_wm_iz.c | 1 - src/gallium/drivers/i965/brw_wm_sampler_state.c | 15 +- src/gallium/drivers/i965/brw_wm_state.c | 19 +- src/gallium/drivers/i965/brw_wm_surface_state.c | 181 ++++++++--------- src/gallium/drivers/i965/intel_batchbuffer.h | 7 +- src/gallium/drivers/i965/intel_tex_format.c | 197 ------------------ src/gallium/drivers/i965/intel_tex_layout.c | 7 +- 51 files changed, 634 insertions(+), 1234 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_pipe_query.c delete mode 100644 src/gallium/drivers/i965/brw_program.c delete mode 100644 src/gallium/drivers/i965/brw_queryobj.c (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 7a55333e89..480d2efbc5 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -1,38 +1,9 @@ - -TOP = ../../../../.. +TOP = ../../../.. include $(TOP)/configs/current -LIBNAME = i965_dri.so +LIBNAME = i965 -DRIVER_SOURCES = \ - intel_batchbuffer.c \ - intel_blit.c \ - intel_buffer_objects.c \ - intel_buffers.c \ - intel_clear.c \ - intel_context.c \ - intel_decode.c \ - intel_extensions.c \ - intel_fbo.c \ - intel_mipmap_tree.c \ - intel_regions.c \ - intel_screen.c \ - intel_span.c \ - intel_pixel.c \ - intel_pixel_bitmap.c \ - intel_pixel_copy.c \ - intel_pixel_draw.c \ - intel_pixel_read.c \ - intel_state.c \ - intel_swapbuffers.c \ - intel_syncobj.c \ - intel_tex.c \ - intel_tex_copy.c \ - intel_tex_format.c \ - intel_tex_image.c \ - intel_tex_layout.c \ - intel_tex_subimage.c \ - intel_tex_validate.c \ +C_SOURCES = \ brw_cc.c \ brw_clip.c \ brw_clip_line.c \ @@ -50,13 +21,18 @@ DRIVER_SOURCES = \ brw_eu_debug.c \ brw_eu_emit.c \ brw_eu_util.c \ - brw_fallback.c \ brw_gs.c \ brw_gs_emit.c \ brw_gs_state.c \ brw_misc_state.c \ - brw_program.c \ - brw_queryobj.c \ + brw_pipe_blend.c \ + brw_pipe_debug.c \ + brw_pipe_depth.c \ + brw_pipe_fb.c \ + brw_pipe_flush.c \ + brw_pipe_query.c \ + brw_pipe_shader.c \ + brw_screen_surface.c \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ @@ -64,41 +40,30 @@ DRIVER_SOURCES = \ brw_state_cache.c \ brw_state_dump.c \ brw_state_upload.c \ + brw_swtnl.c \ brw_tex.c \ brw_tex_layout.c \ brw_urb.c \ brw_util.c \ brw_vs.c \ - brw_vs_constval.c \ brw_vs_emit.c \ brw_vs_state.c \ brw_vs_surface_state.c \ - brw_vtbl.c \ brw_wm.c \ brw_wm_debug.c \ brw_wm_emit.c \ brw_wm_fp.c \ - brw_wm_iz.c \ brw_wm_glsl.c \ + brw_wm_iz.c \ brw_wm_pass0.c \ brw_wm_pass1.c \ brw_wm_pass2.c \ brw_wm_sampler_state.c \ brw_wm_state.c \ - brw_wm_surface_state.c - -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(MINIGLX_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - -DRIVER_DEFINES = -I../intel -I../intel/server - -DRI_LIB_DEPS += -ldrm_intel - -include ../Makefile.template + brw_wm_surface_state.c \ + brw_bo.c \ + intel_batchbuffer.c \ + intel_tex_format.c \ + intel_tex_layout.c -intel_decode.o: ../intel/intel_decode.c -intel_tex_layout.o: ../intel/intel_tex_layout.c +include ../../Makefile.template diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index 9ab5638137..af432b1f52 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -33,13 +33,9 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "brw_util.h" -#include "main/macros.h" -#include "main/enums.h" static void prepare_cc_vp( struct brw_context *brw ) { - GLcontext *ctx = &brw->intel.ctx; struct brw_cc_viewport ccv; memset(&ccv, 0, sizeof(ccv)); @@ -48,13 +44,13 @@ static void prepare_cc_vp( struct brw_context *brw ) ccv.min_depth = ctx->Viewport.Near; ccv.max_depth = ctx->Viewport.Far; - dri_bo_unreference(brw->cc.vp_bo); + brw->sws->bo_unreference(brw->cc.vp_bo); brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); } const struct brw_tracked_state brw_cc_vp = { .dirty = { - .mesa = _NEW_VIEWPORT, + .mesa = PIPE_NEW_VIEWPORT, .brw = BRW_NEW_CONTEXT, .cache = 0 }, @@ -71,8 +67,8 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) { memset(key, 0, sizeof(*key)); - key->dsa = brw->curr.dsa.base; - key->blend = brw->curr.blend.base; + key->dsa = brw->dsa; + key->blend = brw->blend; /* Clear non-respected values: */ @@ -82,11 +78,11 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) /** * Creates the state cache entry for the given CC unit key. */ -static dri_bo * +static struct brw_winsys_buffer * cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) { struct brw_cc_unit_state cc; - dri_bo *bo; + struct brw_winsys_buffer *bo; memset(&cc, 0, sizeof(cc)); @@ -124,7 +120,7 @@ static void prepare_cc_unit( struct brw_context *brw ) cc_unit_populate_key(brw, &key); - dri_bo_unreference(brw->cc.state_bo); + brw->sws->bo_unreference(brw->cc.state_bo); brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT, &key, sizeof(key), &brw->cc.vp_bo, 1, diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index df1b3718d0..d82ebeb9a9 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -129,7 +129,7 @@ static void compile_clip_prog( struct brw_context *brw, /* Upload */ - dri_bo_unreference(brw->clip.prog_bo); + brw->sws->bo_unreference(brw->clip.prog_bo); brw->clip.prog_bo = brw_upload_cache( &brw->cache, BRW_CLIP_PROG, &c.key, sizeof(c.key), @@ -199,7 +199,7 @@ static void upload_clip_prog(struct brw_context *brw) } } - dri_bo_unreference(brw->clip.prog_bo); + brw->sws->bo_unreference(brw->clip.prog_bo); brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG, &key, sizeof(key), NULL, 0, diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 72e27205e2..0ea7ce5734 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -49,7 +49,6 @@ struct brw_clip_unit_key { static void clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) { - GLcontext *ctx = &brw->intel.ctx; memset(key, 0, sizeof(*key)); /* CACHE_NEW_CLIP_PROG */ @@ -69,12 +68,12 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) key->depth_clamp = 0; // XXX: add this to gallium: ctx->Transform.DepthClamp; } -static dri_bo * +static struct brw_winsys_buffer * clip_unit_create_from_key(struct brw_context *brw, struct brw_clip_unit_key *key) { struct brw_clip_unit_state clip; - dri_bo *bo; + struct brw_winsys_buffer *bo; memset(&clip, 0, sizeof(clip)); @@ -162,7 +161,7 @@ static void upload_clip_unit( struct brw_context *brw ) clip_unit_populate_key(brw, &key); - dri_bo_unreference(brw->clip.state_bo); + brw->sws->bo_unreference(brw->clip.state_bo); brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT, &key, sizeof(key), &brw->clip.prog_bo, 1, diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index bf0ec89e13..063ada5772 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -30,32 +30,21 @@ */ -#include "main/imports.h" -#include "main/api_noop.h" -#include "main/macros.h" -#include "main/vtxfmt.h" -#include "main/simple_list.h" -#include "shader/shader_api.h" +#include "pipe/p_context.h" #include "brw_context.h" #include "brw_defines.h" #include "brw_draw.h" #include "brw_state.h" #include "brw_vs.h" -#include "intel_tex.h" -#include "intel_blit.h" +#include "brw_screen_tex.h" #include "intel_batchbuffer.h" -#include "intel_pixel.h" -#include "intel_span.h" -#include "tnl/t_pipeline.h" -#include "utils.h" -GLboolean brwCreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate) +struct pipe_context *brw_create_context( struct pipe_screen *screen, + void *priv ) { struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); @@ -87,9 +76,8 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, /** * called from intelDestroyContext() */ -static void brw_destroy_context( struct intel_context *intel ) +static void brw_destroy_context( struct brw_context *brw ) { - struct brw_context *brw = brw_context(&intel->ctx); int i; brw_destroy_state(brw); @@ -102,27 +90,27 @@ static void brw_destroy_context( struct intel_context *intel ) brw->state.nr_color_regions = 0; intel_region_release(&brw->state.depth_region); - dri_bo_unreference(brw->curbe.curbe_bo); - dri_bo_unreference(brw->vs.prog_bo); - dri_bo_unreference(brw->vs.state_bo); - dri_bo_unreference(brw->vs.bind_bo); - dri_bo_unreference(brw->gs.prog_bo); - dri_bo_unreference(brw->gs.state_bo); - dri_bo_unreference(brw->clip.prog_bo); - dri_bo_unreference(brw->clip.state_bo); - dri_bo_unreference(brw->clip.vp_bo); - dri_bo_unreference(brw->sf.prog_bo); - dri_bo_unreference(brw->sf.state_bo); - dri_bo_unreference(brw->sf.vp_bo); + brw->sws->bo_unreference(brw->curbe.curbe_bo); + brw->sws->bo_unreference(brw->vs.prog_bo); + brw->sws->bo_unreference(brw->vs.state_bo); + brw->sws->bo_unreference(brw->vs.bind_bo); + brw->sws->bo_unreference(brw->gs.prog_bo); + brw->sws->bo_unreference(brw->gs.state_bo); + brw->sws->bo_unreference(brw->clip.prog_bo); + brw->sws->bo_unreference(brw->clip.state_bo); + brw->sws->bo_unreference(brw->clip.vp_bo); + brw->sws->bo_unreference(brw->sf.prog_bo); + brw->sws->bo_unreference(brw->sf.state_bo); + brw->sws->bo_unreference(brw->sf.vp_bo); for (i = 0; i < BRW_MAX_TEX_UNIT; i++) - dri_bo_unreference(brw->wm.sdc_bo[i]); - dri_bo_unreference(brw->wm.bind_bo); + brw->sws->bo_unreference(brw->wm.sdc_bo[i]); + brw->sws->bo_unreference(brw->wm.bind_bo); for (i = 0; i < BRW_WM_MAX_SURF; i++) - dri_bo_unreference(brw->wm.surf_bo[i]); - dri_bo_unreference(brw->wm.sampler_bo); - dri_bo_unreference(brw->wm.prog_bo); - dri_bo_unreference(brw->wm.state_bo); - dri_bo_unreference(brw->cc.prog_bo); - dri_bo_unreference(brw->cc.state_bo); - dri_bo_unreference(brw->cc.vp_bo); + brw->sws->bo_unreference(brw->wm.surf_bo[i]); + brw->sws->bo_unreference(brw->wm.sampler_bo); + brw->sws->bo_unreference(brw->wm.prog_bo); + brw->sws->bo_unreference(brw->wm.state_bo); + brw->sws->bo_unreference(brw->cc.prog_bo); + brw->sws->bo_unreference(brw->cc.state_bo); + brw->sws->bo_unreference(brw->cc.vp_bo); } diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 009e28b227..0fcb75a440 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -33,9 +33,9 @@ #ifndef BRWCONTEXT_INC #define BRWCONTEXT_INC -#include "intel_context.h" #include "brw_structs.h" -#include "main/imports.h" +#include "brw_winsys.h" +#include "pipe/p_state.h" /* Glossary: @@ -119,6 +119,19 @@ struct brw_context; +#define PIPE_NEW_DEPTH_STENCIL_ALPHA 0x1 +#define PIPE_NEW_RAST 0x2 +#define PIPE_NEW_BLEND 0x2 +#define PIPE_NEW_VIEWPORT 0x2 +#define PIPE_NEW_FRAMEBUFFER 0x2 +#define PIPE_NEW_VERTEX_BUFFER 0x2 +#define PIPE_NEW_VERTEX_ELEMENT 0x2 +#define PIPE_NEW_FRAGMENT_SHADER 0x2 +#define PIPE_NEW_VERTEX_SHADER 0x2 +#define PIPE_NEW_FRAGMENT_CONSTS 0x2 +#define PIPE_NEW_VERTEX_CONSTS 0x2 + + #define BRW_NEW_URB_FENCE 0x1 #define BRW_NEW_FRAGMENT_PROGRAM 0x2 #define BRW_NEW_VERTEX_PROGRAM 0x4 @@ -156,26 +169,23 @@ struct brw_state_flags { }; -/** Subclass of Mesa vertex program */ struct brw_vertex_program { - struct gl_vertex_program program; + const struct tgsi_token *tokens; GLuint id; - dri_bo *const_buffer; /** Program constant buffer/surface */ + struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ GLboolean use_const_buffer; }; /** Subclass of Mesa fragment program */ struct brw_fragment_program { - struct gl_fragment_program program; + const struct tgsi_token *tokens; + GLuint id; /**< serial no. to identify frag progs, never re-used */ - GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ + GLboolean isGLSL; /**< any IF/LOOP/CONT/BREAK instructions */ - dri_bo *const_buffer; /** Program constant buffer/surface */ + struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ GLboolean use_const_buffer; - - /** for debugging, which texture units are referenced */ - GLbitfield tex_units_used; }; @@ -244,7 +254,7 @@ struct brw_vs_prog_data { /* Size == 0 if output either not written, or always [0,0,0,1] */ struct brw_vs_ouput_sizes { - GLubyte output_size[VERT_RESULT_MAX]; + GLubyte output_size[PIPE_MAX_SHADER_OUTPUTS]; }; @@ -312,10 +322,10 @@ struct brw_cache_item { GLuint hash; GLuint key_size; /* for variable-sized keys */ const void *key; - dri_bo **reloc_bufs; + struct brw_winsys_buffer **reloc_bufs; GLuint nr_reloc_bufs; - dri_bo *bo; + struct brw_winsys_buffer *bo; GLuint data_size; struct brw_cache_item *next; @@ -336,7 +346,7 @@ struct brw_cache { /* Record of the last BOs chosen for each cache_id. Used to set * brw->state.dirty.cache when a new cache item is chosen. */ - dri_bo *last_bo[BRW_MAX_CACHE]; + struct brw_winsys_buffer *last_bo[BRW_MAX_CACHE]; }; @@ -384,56 +394,22 @@ struct brw_cached_batch_item { /* Protect against a future where VERT_ATTRIB_MAX > 32. Wouldn't life * be easier if C allowed arrays of packed elements? */ -#define ATTRIB_BIT_DWORDS ((VERT_ATTRIB_MAX+31)/32) - -struct brw_vertex_element { - const struct gl_client_array *glarray; - - /** The corresponding Mesa vertex attribute */ - gl_vert_attrib attrib; - /** Size of a complete element */ - GLuint element_size; - /** Number of uploaded elements for this input. */ - GLuint count; - /** Byte stride between elements in the uploaded array */ - GLuint stride; - /** Offset of the first element within the buffer object */ - unsigned int offset; - /** Buffer object containing the uploaded vertex data */ - dri_bo *bo; -}; - - - -struct brw_vertex_info { - GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */ -}; +#define VS_INPUT_BITMASK_DWORDS ((PIPE_MAX_SHADER_INPUTS+31)/32) -/* Cache for TNL programs. - */ -struct brw_tnl_cache_item { - GLuint hash; - void *key; - void *data; - struct brw_tnl_cache_item *next; +struct brw_vertex_info { + GLuint sizes[VS_INPUT_BITMASK_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */ }; -struct brw_tnl_cache { - struct brw_tnl_cache_item **items; - GLuint size, n_items; -}; struct brw_query_object { - struct gl_query_object Base; - /** Doubly linked list of active query objects in the context. */ struct brw_query_object *prev, *next; /** Last query BO associated with this query. */ - dri_bo *bo; + struct brw_winsys_buffer *bo; /** First index in bo with query data for this object. */ int first_index; /** Last index in bo with query data for this object. */ @@ -445,22 +421,29 @@ struct brw_query_object { /** - * brw_context is derived from intel_context. + * brw_context is derived from pipe_context */ struct brw_context { + struct pipe_context *pipe; + struct pipe_screen *screen; + + struct brw_winsys_screen *sws; + GLuint primitive; GLboolean emit_state_always; GLboolean no_batch_wrap; + /* Active vertex program: + */ + const struct gl_vertex_program *vertex_program; + const struct gl_fragment_program *fragment_program; + struct pipe_framebuffer_state fb; + struct { struct brw_state_flags dirty; - GLuint nr_color_regions; - struct intel_region *color_regions[MAX_DRAW_BUFFERS]; - struct intel_region *depth_region; - /** * List of buffers accumulated in brw_validate_state to receive * dri_bo_check_aperture treatment before exec, so we can know if we @@ -471,7 +454,7 @@ struct brw_context * consisting of the vertex buffers, pipelined state pointers, * the CURBE, the depth buffer, and a query BO. */ - dri_bo *validated_bos[VERT_ATTRIB_MAX + 16]; + struct brw_winsys_buffer *validated_bos[PIPE_MAX_SHADER_INPUTS + 16]; int validated_bo_count; } state; @@ -480,18 +463,14 @@ struct brw_context struct brw_cached_batch_item *cached_batch_items; struct { - struct brw_vertex_element inputs[VERT_ATTRIB_MAX]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned num_vertex_element; + unsigned num_vertex_buffer; - struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; - GLuint nr_enabled; - -#define BRW_NR_UPLOAD_BUFS 17 -#define BRW_UPLOAD_INIT_SIZE (128*1024) - - struct { - dri_bo *bo; - GLuint offset; - } upload; + struct u_upload_mgr *upload_vertex; + struct u_upload_mgr *upload_index; + /* Summary of size and varying of active arrays, so we can check * for changes to this state: @@ -509,7 +488,7 @@ struct brw_context const struct _mesa_index_buffer *ib; /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */ - dri_bo *bo; + struct brw_winsys_buffer *bo; unsigned int offset; unsigned int size; /* Offset to index buffer index to use in CMD_3D_PRIM so that we can @@ -519,16 +498,6 @@ struct brw_context unsigned int start_vertex_offset; } ib; - /* Active vertex program: - */ - const struct gl_vertex_program *vertex_program; - const struct gl_fragment_program *fragment_program; - - - /* For populating the gtt: - */ - GLuint next_free_page; - /* BRW_NEW_URB_ALLOCATIONS: */ @@ -545,12 +514,6 @@ struct brw_context GLuint nr_sf_entries; GLuint nr_cs_entries; -/* GLuint vs_size; */ -/* GLuint gs_size; */ -/* GLuint clip_size; */ -/* GLuint sf_size; */ -/* GLuint cs_size; */ - GLuint vs_start; GLuint gs_start; GLuint clip_start; @@ -570,7 +533,7 @@ struct brw_context GLuint vs_size; GLuint total_size; - dri_bo *curbe_bo; + struct brw_winsys_buffer *curbe_bo; /** Offset within curbe_bo of space for current curbe entry */ GLuint curbe_offset; /** Offset within curbe_bo of space for next curbe entry */ @@ -588,12 +551,12 @@ struct brw_context struct { struct brw_vs_prog_data *prog_data; - dri_bo *prog_bo; - dri_bo *state_bo; + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; /** Binding table of pointers to surf_bo entries */ - dri_bo *bind_bo; - dri_bo *surf_bo[BRW_VS_MAX_SURF]; + struct brw_winsys_buffer *bind_bo; + struct brw_winsys_buffer *surf_bo[BRW_VS_MAX_SURF]; GLuint nr_surfaces; } vs; @@ -601,25 +564,25 @@ struct brw_context struct brw_gs_prog_data *prog_data; GLboolean prog_active; - dri_bo *prog_bo; - dri_bo *state_bo; + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; } gs; struct { struct brw_clip_prog_data *prog_data; - dri_bo *prog_bo; - dri_bo *state_bo; - dri_bo *vp_bo; + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; + struct brw_winsys_buffer *vp_bo; } clip; struct { struct brw_sf_prog_data *prog_data; - dri_bo *prog_bo; - dri_bo *state_bo; - dri_bo *vp_bo; + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; + struct brw_winsys_buffer *vp_bo; } sf; struct { @@ -629,38 +592,38 @@ struct brw_context /** Input sizes, calculated from active vertex program. * One bit per fragment program input attribute. */ - GLbitfield input_size_masks[4]; + //GLbitfield input_size_masks[4]; /** Array of surface default colors (texture border color) */ - dri_bo *sdc_bo[BRW_MAX_TEX_UNIT]; + struct brw_winsys_buffer *sdc_bo[BRW_MAX_TEX_UNIT]; GLuint render_surf; GLuint nr_surfaces; GLuint max_threads; - dri_bo *scratch_bo; + struct brw_winsys_buffer *scratch_bo; GLuint sampler_count; - dri_bo *sampler_bo; + struct brw_winsys_buffer *sampler_bo; /** Binding table of pointers to surf_bo entries */ - dri_bo *bind_bo; - dri_bo *surf_bo[BRW_WM_MAX_SURF]; + struct brw_winsys_buffer *bind_bo; + struct brw_winsys_buffer *surf_bo[PIPE_MAX_COLOR_BUFS]; - dri_bo *prog_bo; - dri_bo *state_bo; + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; } wm; struct { - dri_bo *prog_bo; - dri_bo *state_bo; - dri_bo *vp_bo; + struct brw_winsys_buffer *prog_bo; + struct brw_winsys_buffer *state_bo; + struct brw_winsys_buffer *vp_bo; } cc; struct { struct brw_query_object active_head; - dri_bo *bo; + struct brw_winsys_buffer *bo; int index; GLboolean active; } query; @@ -679,12 +642,6 @@ struct brw_context */ void brwInitVtbl( struct brw_context *brw ); -/*====================================================================== - * brw_context.c - */ -GLboolean brwCreateContext( const __GLcontextModes *mesaVis, - __DRIcontextPrivate *driContextPriv, - void *sharedContextPrivate); /*====================================================================== * brw_queryobj.c @@ -697,7 +654,7 @@ void brw_emit_query_end(struct brw_context *brw); /*====================================================================== * brw_state_dump.c */ -void brw_debug_batch(struct intel_context *intel); +void brw_debug_batch(struct brw_context *intel); /*====================================================================== * brw_tex.c @@ -706,9 +663,9 @@ void brw_validate_textures( struct brw_context *brw ); /*====================================================================== - * brw_program.c + * brw_pipe_shader.c */ -void brwInitFragProgFuncs( struct dd_function_table *functions ); +void brw_init_shader_funcs( struct brw_context *brw ); /* brw_urb.c diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 3e32c4983d..33ea9a00f7 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -47,7 +47,6 @@ */ static void calculate_curbe_offsets( struct brw_context *brw ) { - GLcontext *ctx = &brw->intel.ctx; /* CACHE_NEW_WM_PROG */ const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; @@ -157,7 +156,6 @@ static GLfloat fixed_plane[6][4] = { */ static void prepare_constant_buffer(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; const struct brw_vertex_program *vp = brw_vertex_program_const(brw->vertex_program); const struct brw_fragment_program *fp = @@ -269,7 +267,7 @@ static void prepare_constant_buffer(struct brw_context *brw) (brw->curbe.need_new_bo || brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)) { - dri_bo_unreference(brw->curbe.curbe_bo); + brw->sws->bo_unreference(brw->curbe.curbe_bo); brw->curbe.curbe_bo = NULL; } @@ -310,7 +308,6 @@ static void prepare_constant_buffer(struct brw_context *brw) static void emit_constant_buffer(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; BEGIN_BATCH(2, IGNORE_CLIPRECTS); diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 8cd117c24f..856999f3ef 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -26,15 +26,6 @@ **************************************************************************/ -#include "main/glheader.h" -#include "main/context.h" -#include "main/state.h" -#include "main/enums.h" -#include "tnl/tnl.h" -#include "vbo/vbo_context.h" -#include "swrast/swrast.h" -#include "swrast_setup/swrast_setup.h" - #include "brw_draw.h" #include "brw_defines.h" #include "brw_context.h" @@ -67,7 +58,6 @@ static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = { */ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) { - GLcontext *ctx = &brw->intel.ctx; if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); @@ -110,7 +100,6 @@ static void brw_emit_prim(struct brw_context *brw, uint32_t hw_prim) { struct brw_3d_primitive prim_packet; - struct intel_context *intel = &brw->intel; if (INTEL_DEBUG & DEBUG_PRIMS) _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), @@ -163,7 +152,7 @@ static void brw_merge_inputs( struct brw_context *brw, GLuint i; for (i = 0; i < VERT_ATTRIB_MAX; i++) - dri_bo_unreference(brw->vb.inputs[i].bo); + brw->sws->bo_unreference(brw->vb.inputs[i].bo); memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs)); memset(&brw->vb.info, 0, sizeof(brw->vb.info)); @@ -185,7 +174,7 @@ static void brw_merge_inputs( struct brw_context *brw, /* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ -static GLboolean brw_try_draw_prims( GLcontext *ctx, +static GLboolean brw_try_draw_prims( struct brw_context *brw, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, @@ -193,7 +182,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, GLuint min_index, GLuint max_index ) { - struct intel_context *intel = intel_context(ctx); struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; GLboolean warn = GL_FALSE; @@ -241,7 +229,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx, return 0; } -void brw_draw_prims( GLcontext *ctx, +void brw_draw_prims( struct brw_context *brw, const struct gl_client_array *arrays[], const struct _mesa_prim *prim, GLuint nr_prims, @@ -274,7 +262,6 @@ void brw_draw_prims( GLcontext *ctx, void brw_draw_init( struct brw_context *brw ) { - GLcontext *ctx = &brw->intel.ctx; struct vbo_context *vbo = vbo_context(ctx); /* Register our drawing function: @@ -287,15 +274,15 @@ void brw_draw_destroy( struct brw_context *brw ) int i; if (brw->vb.upload.bo != NULL) { - dri_bo_unreference(brw->vb.upload.bo); + brw->sws->bo_unreference(brw->vb.upload.bo); brw->vb.upload.bo = NULL; } for (i = 0; i < VERT_ATTRIB_MAX; i++) { - dri_bo_unreference(brw->vb.inputs[i].bo); + brw->sws->bo_unreference(brw->vb.inputs[i].bo); brw->vb.inputs[i].bo = NULL; } - dri_bo_unreference(brw->ib.bo); + brw->sws->bo_unreference(brw->ib.bo); brw->ib.bo = NULL; } diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h index 2a14db217f..dc7ca8731d 100644 --- a/src/gallium/drivers/i965/brw_draw.h +++ b/src/gallium/drivers/i965/brw_draw.h @@ -28,13 +28,12 @@ #ifndef BRW_DRAW_H #define BRW_DRAW_H -#include "main/mtypes.h" /* for GLcontext... */ -#include "vbo/vbo.h" +#include "brw_types.h" struct brw_context; -void brw_draw_prims( GLcontext *ctx, +void brw_draw_prims( struct brw_context *brw, const struct gl_client_array *arrays[], const struct _mesa_prim *prims, GLuint nr_prims, @@ -48,7 +47,7 @@ void brw_draw_destroy( struct brw_context *brw ); /* brw_draw_current.c */ -void brw_init_current_values(GLcontext *ctx, +void brw_init_current_values(struct brw_context *brw, struct gl_client_array *arrays); #endif diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index ad3ef6b7dd..dce015d79f 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -191,8 +191,6 @@ static unsigned get_index_type(int type) static boolean brw_prepare_vertices(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; - struct intel_context *intel = intel_context(ctx); GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; GLuint i; const unsigned char *ptr = NULL; @@ -210,15 +208,17 @@ static boolean brw_prepare_vertices(struct brw_context *brw) - for (i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; + for (i = 0; i < brw->vb.num_vertex_buffer; i++) { + struct brw_vertex_buffer *vb = brw->vb.vertex_buffer[i]; + unsigned size = (vb->stride == 0 ? + vb->size : + vb->stride * (max_index + 1 - min_index)); - input->element_size = get_size(input->glarray->Type) * input->glarray->Size; if (brw_is_user_buffer(vb)) { - u_upload_buffer( brw->upload, + u_upload_buffer( brw->upload_vertex, min_index * vb->stride, - (max_index + 1 - min_index) * vb->stride, + size, &offset, &buffer ); } @@ -226,20 +226,20 @@ static boolean brw_prepare_vertices(struct brw_context *brw) { offset = 0; buffer = vb->buffer; - count = stride == 0 ? 1 : max_index + 1 - min_index; } - - /* Named buffer object: Just reference its contents directly. */ - dri_bo_unreference(input->bo); - input->bo = intel_bufferobj_buffer(intel, intel_buffer, - INTEL_READ); - dri_bo_reference(input->bo); - + + /* Set up post-upload info about this vertex buffer: + */ input->offset = (unsigned long)offset; input->stride = vb->stride; input->count = count; + brw->sws->bo_unreference(input->bo); + input->bo = intel_bufferobj_buffer(intel, intel_buffer, + INTEL_READ); + brw->sws->bo_reference(input->bo); assert(input->offset < input->bo->size); + assert(input->offset + size <= input->bo->size); } brw_prepare_query_begin(brw); @@ -253,8 +253,6 @@ static boolean brw_prepare_vertices(struct brw_context *brw) static void brw_emit_vertices(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; - struct intel_context *intel = intel_context(ctx); GLuint i; brw_emit_query_begin(brw); @@ -370,11 +368,9 @@ const struct brw_tracked_state brw_vertices = { static void brw_prepare_indices(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; - struct intel_context *intel = &brw->intel; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; GLuint ib_size; - dri_bo *bo = NULL; + struct brw_winsys_buffer *bo = NULL; struct gl_buffer_object *bufferobj; GLuint offset; GLuint ib_type_size; @@ -421,7 +417,7 @@ static void brw_prepare_indices(struct brw_context *brw) } else { bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), INTEL_READ); - dri_bo_reference(bo); + brw->sws->bo_reference(bo); /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading * the index buffer state when we're just moving the start index @@ -461,7 +457,6 @@ const struct brw_tracked_state brw_indices = { static void brw_emit_index_buffer(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; const struct _mesa_index_buffer *index_buffer = brw->ib.ib; if (index_buffer == NULL) diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c index 29f3f6d02f..ad7ec36e86 100644 --- a/src/gallium/drivers/i965/brw_eu_debug.c +++ b/src/gallium/drivers/i965/brw_eu_debug.c @@ -30,8 +30,6 @@ */ -#include "main/mtypes.h" -#include "main/imports.h" #include "brw_eu.h" void brw_print_reg( struct brw_reg hwreg ) diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 5ec0c585fe..58930e7964 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -29,10 +29,6 @@ * Keith Whitwell */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" - #include "intel_batchbuffer.h" #include "brw_defines.h" @@ -124,7 +120,7 @@ static void compile_gs_prog( struct brw_context *brw, /* Upload */ - dri_bo_unreference(brw->gs.prog_bo); + brw->sws->bo_unreference(brw->gs.prog_bo); brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG, &c.key, sizeof(c.key), NULL, 0, @@ -180,7 +176,7 @@ static void prepare_gs_prog(struct brw_context *brw) } if (brw->gs.prog_active) { - dri_bo_unreference(brw->gs.prog_bo); + brw->sws->bo_unreference(brw->gs.prog_bo); brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, &key, sizeof(key), NULL, 0, diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c index a9b2aa2eac..9ec206d7e8 100644 --- a/src/gallium/drivers/i965/brw_gs_emit.c +++ b/src/gallium/drivers/i965/brw_gs_emit.c @@ -30,11 +30,6 @@ */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" - -#include "shader/program.h" #include "intel_batchbuffer.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c index ed9d2ffe60..6d03d72d96 100644 --- a/src/gallium/drivers/i965/brw_gs_state.c +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -34,7 +34,6 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "main/macros.h" struct brw_gs_unit_key { unsigned int total_grf; @@ -69,11 +68,11 @@ gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key) key->urb_size = brw->urb.vsize; } -static dri_bo * +static struct brw_winsys_buffer * gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) { struct brw_gs_unit_state gs; - dri_bo *bo; + struct brw_winsys_buffer *bo; memset(&gs, 0, sizeof(gs)); @@ -128,7 +127,7 @@ static void prepare_gs_unit(struct brw_context *brw) gs_unit_populate_key(brw, &key); - dri_bo_unreference(brw->gs.state_bo); + brw->sws->bo_unreference(brw->gs.state_bo); brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT, &key, sizeof(key), &brw->gs.prog_bo, 1, diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index ea71857548..d33bf40a01 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -48,7 +48,6 @@ static void upload_blend_constant_color(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; struct brw_blend_constant_color bcc; memset(&bcc, 0, sizeof(bcc)); @@ -75,17 +74,11 @@ const struct brw_tracked_state brw_blend_constant_color = { /* Constant single cliprect for framebuffer object or DRI2 drawing */ static void upload_drawing_rect(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; - GLcontext *ctx = &intel->ctx; - - if (!intel->constant_cliprect) - return; - BEGIN_BATCH(4, NO_LOOP_CLIPRECTS); OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965); - OUT_BATCH(0); /* xmin, ymin */ - OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | - ((ctx->DrawBuffer->Height - 1) << 16)); + OUT_BATCH(0); + OUT_BATCH(((brw->fb.width - 1) & 0xffff) | + ((brw->fb.height - 1) << 16)); OUT_BATCH(0); ADVANCE_BATCH(); } @@ -114,8 +107,6 @@ static void prepare_binding_table_pointers(struct brw_context *brw) */ static void upload_binding_table_pointers(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; - BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); if (brw->vs.bind_bo != NULL) @@ -148,8 +139,6 @@ const struct brw_tracked_state brw_binding_table_pointers = { */ static void upload_pipelined_state_pointers(struct brw_context *brw ) { - struct intel_context *intel = &brw->intel; - BEGIN_BATCH(7, IGNORE_CLIPRECTS); OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); @@ -210,7 +199,6 @@ static void prepare_depthbuffer(struct brw_context *brw) static void emit_depthbuffer(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; struct intel_region *region = brw->state.depth_region; unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; @@ -287,7 +275,6 @@ const struct brw_tracked_state brw_depthbuffer = { static void upload_polygon_stipple(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; struct brw_polygon_stipple bps; GLuint i; @@ -401,7 +388,6 @@ const struct brw_tracked_state brw_aa_line_parameters = { static void upload_line_stipple(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; struct brw_line_stipple bls; GLfloat tmp; GLint tmpi; @@ -507,8 +493,6 @@ const struct brw_tracked_state brw_invarient_state = { */ static void upload_state_base_address( struct brw_context *brw ) { - struct intel_context *intel = &brw->intel; - /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. */ diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c index da29bc8bcb..29f135d37a 100644 --- a/src/gallium/drivers/i965/brw_pipe_depth.c +++ b/src/gallium/drivers/i965/brw_pipe_depth.c @@ -1,5 +1,9 @@ - /* _NEW_STENCIL */ - if (key->dsa.stencil[0].enable) { + +static void * +brw_create_depth_stencil( struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *tmpl ) +{ + if (tmpl->stencil[0].enable) { cc.cc0.stencil_enable = 1; cc.cc0.stencil_func = intel_translate_compare_func(key->stencil_func[0]); @@ -13,7 +17,7 @@ cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; - if (key->stencil_two_side) { + if (tmpl->stencil[1].enable) { cc.cc0.bf_stencil_enable = 1; cc.cc0.bf_stencil_func = intel_translate_compare_func(key->stencil_func[1]); @@ -30,9 +34,8 @@ /* Not really sure about this: */ - if (key->stencil_write_mask[0] || - (key->stencil_two_side && key->stencil_write_mask[1])) - cc.cc0.stencil_write_enable = 1; + cc.cc0.stencil_write_enable = (cc.cc1.stencil_write_mask || + cc.cc2.bf_stencil_write_mask); } @@ -50,3 +53,6 @@ cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); cc.cc2.depth_write_enable = key->depth_write; } + + +} diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index d4ae332f46..dbf97a0544 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -2,12 +2,12 @@ /** * called from intelDrawBuffer() */ -static void brw_set_draw_region( struct intel_context *intel, +static void brw_set_draw_region( struct pipe_context *pipe, struct intel_region *color_regions[], struct intel_region *depth_region, GLuint num_color_regions) { - struct brw_context *brw = brw_context(&intel->ctx); + struct brw_context *brw = brw_context(pipe); GLuint i; /* release old color/depth regions */ diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index 008f623151..d5b7bd3b83 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -13,10 +13,8 @@ static void brw_finish_batch(struct intel_context *intel) /** * called from intelFlushBatchLocked */ -static void brw_new_batch( struct intel_context *intel ) +static void brw_new_batch( struct brw_context *brw ) { - struct brw_context *brw = brw_context(&intel->ctx); - /* Check that we didn't just wrap our batchbuffer at a bad time. */ assert(!brw->no_batch_wrap); @@ -36,14 +34,14 @@ static void brw_new_batch( struct intel_context *intel ) * a new buffer next time. */ if (brw->vb.upload.bo != NULL) { - dri_bo_unreference(brw->vb.upload.bo); + brw->sws->bo_unreference(brw->vb.upload.bo); brw->vb.upload.bo = NULL; brw->vb.upload.offset = 0; } } -static void brw_note_fence( struct intel_context *intel, GLuint fence ) +static void brw_note_fence( struct brw_context *brw, GLuint fence ) { brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; } diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c new file mode 100644 index 0000000000..0b9ba0c0ed --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -0,0 +1,246 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +/** @file support for ARB_query_object + * + * ARB_query_object is implemented by using the PIPE_CONTROL command to stall + * execution on the completion of previous depth tests, and write the + * current PS_DEPTH_COUNT to a buffer object. + * + * We use before and after counts when drawing during a query so that + * we don't pick up other clients' query data in ours. To reduce overhead, + * a single BO is used to record the query data for all active queries at + * once. This also gives us a simple bound on how much batchbuffer space is + * required for handling queries, so that we can be sure that we won't + * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT. + */ +#include "util/u_simple_list.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "intel_batchbuffer.h" +#include "intel_reg.h" + +/** Waits on the query object's BO and totals the results for this query */ +static void +brw_queryobj_get_results(struct brw_query_object *query) +{ + int i; + uint64_t *results; + + if (query->bo == NULL) + return; + + /* Map and count the pixels from the current query BO */ + dri_bo_map(query->bo, GL_FALSE); + results = query->bo->virtual; + for (i = query->first_index; i <= query->last_index; i++) { + query->Base.Result += results[i * 2 + 1] - results[i * 2]; + } + dri_bo_unmap(query->bo); + + brw->sws->bo_unreference(query->bo); + query->bo = NULL; +} + +static struct pipe_query * +brw_query_create(struct pipe_context *pipe, unsigned type ) +{ + struct brw_query_object *query; + + switch (query->type) { + case PIPE_QUERY_OCCLUSION_COUNTER: + query = CALLOC_STRUCT( brw_query_object ); + if (query == NULL) + return NULL; + return &query->Base; + + default: + return NULL; + } +} + +static void +brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q) +{ + struct brw_query_object *query = (struct brw_query_object *)q; + + brw->sws->bo_unreference(query->bo); + FREE(query); +} + +static void +brw_begin_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_query_object *query = (struct brw_query_object *)q; + + /* Reset our driver's tracking of query state. */ + brw->sws->bo_unreference(query->bo); + query->bo = NULL; + query->first_index = -1; + query->last_index = -1; + + insert_at_head(&brw->query.active_head, query); + brw->stats_wm++; + brw->dirty.mesa |= PIPE_NEW_QUERY; +} + +static void +brw_end_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_query_object *query = (struct brw_query_object *)q; + + /* Flush the batchbuffer in case it has writes to our query BO. + * Have later queries write to a new query BO so that further rendering + * doesn't delay the collection of our results. + */ + if (query->bo) { + brw_emit_query_end(brw); + intel_batchbuffer_flush(brw->batch); + + brw->sws->bo_unreference(brw->query.bo); + brw->query.bo = NULL; + } + + remove_from_list(query); + brw->stats_wm--; + brw->dirty.mesa |= PIPE_NEW_QUERY; +} + +static void brw_wait_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct brw_query_object *query = (struct brw_query_object *)q; + + brw_queryobj_get_results(query); + query->Base.Ready = GL_TRUE; +} + +static void brw_check_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct brw_query_object *query = (struct brw_query_object *)q; + + if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) { + brw_queryobj_get_results(query); + query->Base.Ready = GL_TRUE; + } +} + +/** Called to set up the query BO and account for its aperture space */ +void +brw_prepare_query_begin(struct brw_context *brw) +{ + /* Skip if we're not doing any queries. */ + if (is_empty_list(&brw->query.active_head)) + return; + + /* Get a new query BO if we're going to need it. */ + if (brw->query.bo == NULL || + brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) { + brw->sws->bo_unreference(brw->query.bo); + brw->query.bo = NULL; + + brw->query.bo = dri_bo_alloc(brw->bufmgr, "query", 4096, 1); + brw->query.index = 0; + } + + brw_add_validated_bo(brw, brw->query.bo); +} + +/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */ +void +brw_emit_query_begin(struct brw_context *brw) +{ + struct brw_query_object *query; + + /* Skip if we're not doing any queries, or we've emitted the start. */ + if (brw->query.active || is_empty_list(&brw->query.active_head)) + return; + + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_WRITE_DEPTH_COUNT); + /* This object could be mapped cacheable, but we don't have an exposed + * mechanism to support that. Since it's going uncached, tell GEM that + * we're writing to it. The usual clflush should be all that's required + * to pick up the results. + */ + OUT_RELOC(brw->query.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + PIPE_CONTROL_GLOBAL_GTT_WRITE | + ((brw->query.index * 2) * sizeof(uint64_t))); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + foreach(query, &brw->query.active_head) { + if (query->bo != brw->query.bo) { + if (query->bo != NULL) + brw_queryobj_get_results(query); + brw->sws->bo_reference(brw->query.bo); + query->bo = brw->query.bo; + query->first_index = brw->query.index; + } + query->last_index = brw->query.index; + } + brw->query.active = GL_TRUE; +} + +/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */ +void +brw_emit_query_end(struct brw_context *brw) +{ + if (!brw->query.active) + return; + + BEGIN_BATCH(4, IGNORE_CLIPRECTS); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_WRITE_DEPTH_COUNT); + OUT_RELOC(brw->query.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + PIPE_CONTROL_GLOBAL_GTT_WRITE | + ((brw->query.index * 2 + 1) * sizeof(uint64_t))); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + brw->query.active = GL_FALSE; + brw->query.index++; +} + +void brw_init_queryobj_functions(struct dd_function_table *functions) +{ + functions->NewQueryObject = brw_new_query_object; + functions->DeleteQuery = brw_delete_query; + functions->BeginQuery = brw_begin_query; + functions->EndQuery = brw_end_query; + functions->CheckQuery = brw_check_query; + functions->WaitQuery = brw_wait_query; +} diff --git a/src/gallium/drivers/i965/brw_program.c b/src/gallium/drivers/i965/brw_program.c deleted file mode 100644 index bac69187c1..0000000000 --- a/src/gallium/drivers/i965/brw_program.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "main/imports.h" -#include "main/enums.h" -#include "shader/prog_parameter.h" -#include "shader/program.h" -#include "shader/programopt.h" -#include "tnl/tnl.h" - -#include "brw_context.h" -#include "brw_util.h" -#include "brw_wm.h" - -static void brwBindProgram( GLcontext *ctx, - GLenum target, - struct gl_program *prog ) -{ - struct brw_context *brw = brw_context(ctx); - - switch (target) { - case GL_VERTEX_PROGRAM_ARB: - brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; - break; - case GL_FRAGMENT_PROGRAM_ARB: - brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; - break; - } -} - -static struct gl_program *brwNewProgram( GLcontext *ctx, - GLenum target, - GLuint id ) -{ - struct brw_context *brw = brw_context(ctx); - - switch (target) { - case GL_VERTEX_PROGRAM_ARB: { - struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program); - if (prog) { - prog->id = brw->program_id++; - - return _mesa_init_vertex_program( ctx, &prog->program, - target, id ); - } - else - return NULL; - } - - case GL_FRAGMENT_PROGRAM_ARB: { - struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program); - if (prog) { - prog->id = brw->program_id++; - - return _mesa_init_fragment_program( ctx, &prog->program, - target, id ); - } - else - return NULL; - } - - default: - return _mesa_new_program(ctx, target, id); - } -} - -static void brwDeleteProgram( GLcontext *ctx, - struct gl_program *prog ) -{ - if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; - struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); - dri_bo_unreference(brw_fprog->const_buffer); - } - - _mesa_delete_program( ctx, prog ); -} - - -static GLboolean brwIsProgramNative( GLcontext *ctx, - GLenum target, - struct gl_program *prog ) -{ - return GL_TRUE; -} - -static void brwProgramStringNotify( GLcontext *ctx, - GLenum target, - struct gl_program *prog ) -{ - struct brw_context *brw = brw_context(ctx); - - if (target == GL_FRAGMENT_PROGRAM_ARB) { - struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; - struct brw_fragment_program *newFP = brw_fragment_program(fprog); - const struct brw_fragment_program *curFP = - brw_fragment_program_const(brw->fragment_program); - - if (fprog->FogOption) { - _mesa_append_fog_code(ctx, fprog); - fprog->FogOption = GL_NONE; - } - - if (newFP == curFP) - brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; - newFP->id = brw->program_id++; - newFP->isGLSL = brw_wm_is_glsl(fprog); - } - else if (target == GL_VERTEX_PROGRAM_ARB) { - struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; - struct brw_vertex_program *newVP = brw_vertex_program(vprog); - const struct brw_vertex_program *curVP = - brw_vertex_program_const(brw->vertex_program); - - if (newVP == curVP) - brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; - if (newVP->program.IsPositionInvariant) { - _mesa_insert_mvp_code(ctx, &newVP->program); - } - newVP->id = brw->program_id++; - - /* Also tell tnl about it: - */ - _tnl_program_string(ctx, target, prog); - } -} - -void brwInitFragProgFuncs( struct dd_function_table *functions ) -{ - assert(functions->ProgramStringNotify == _tnl_program_string); - - functions->BindProgram = brwBindProgram; - functions->NewProgram = brwNewProgram; - functions->DeleteProgram = brwDeleteProgram; - functions->IsProgramNative = brwIsProgramNative; - functions->ProgramStringNotify = brwProgramStringNotify; -} - diff --git a/src/gallium/drivers/i965/brw_queryobj.c b/src/gallium/drivers/i965/brw_queryobj.c deleted file mode 100644 index a195bc32b0..0000000000 --- a/src/gallium/drivers/i965/brw_queryobj.c +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Copyright © 2008 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -/** @file support for ARB_query_object - * - * ARB_query_object is implemented by using the PIPE_CONTROL command to stall - * execution on the completion of previous depth tests, and write the - * current PS_DEPTH_COUNT to a buffer object. - * - * We use before and after counts when drawing during a query so that - * we don't pick up other clients' query data in ours. To reduce overhead, - * a single BO is used to record the query data for all active queries at - * once. This also gives us a simple bound on how much batchbuffer space is - * required for handling queries, so that we can be sure that we won't - * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT. - */ -#include "main/simple_list.h" -#include "main/imports.h" - -#include "brw_context.h" -#include "brw_state.h" -#include "intel_batchbuffer.h" -#include "intel_reg.h" - -/** Waits on the query object's BO and totals the results for this query */ -static void -brw_queryobj_get_results(struct brw_query_object *query) -{ - int i; - uint64_t *results; - - if (query->bo == NULL) - return; - - /* Map and count the pixels from the current query BO */ - dri_bo_map(query->bo, GL_FALSE); - results = query->bo->virtual; - for (i = query->first_index; i <= query->last_index; i++) { - query->Base.Result += results[i * 2 + 1] - results[i * 2]; - } - dri_bo_unmap(query->bo); - - dri_bo_unreference(query->bo); - query->bo = NULL; -} - -static struct gl_query_object * -brw_new_query_object(GLcontext *ctx, GLuint id) -{ - struct brw_query_object *query; - - query = _mesa_calloc(sizeof(struct brw_query_object)); - - query->Base.Id = id; - query->Base.Result = 0; - query->Base.Active = GL_FALSE; - query->Base.Ready = GL_TRUE; - - return &query->Base; -} - -static void -brw_delete_query(GLcontext *ctx, struct gl_query_object *q) -{ - struct brw_query_object *query = (struct brw_query_object *)q; - - dri_bo_unreference(query->bo); - _mesa_free(query); -} - -static void -brw_begin_query(GLcontext *ctx, struct gl_query_object *q) -{ - struct brw_context *brw = brw_context(ctx); - struct intel_context *intel = intel_context(ctx); - struct brw_query_object *query = (struct brw_query_object *)q; - - /* Reset our driver's tracking of query state. */ - dri_bo_unreference(query->bo); - query->bo = NULL; - query->first_index = -1; - query->last_index = -1; - - insert_at_head(&brw->query.active_head, query); - intel->stats_wm++; -} - -/** - * Begin the ARB_occlusion_query query on a query object. - */ -static void -brw_end_query(GLcontext *ctx, struct gl_query_object *q) -{ - struct brw_context *brw = brw_context(ctx); - struct intel_context *intel = intel_context(ctx); - struct brw_query_object *query = (struct brw_query_object *)q; - - /* Flush the batchbuffer in case it has writes to our query BO. - * Have later queries write to a new query BO so that further rendering - * doesn't delay the collection of our results. - */ - if (query->bo) { - brw_emit_query_end(brw); - intel_batchbuffer_flush(intel->batch); - - dri_bo_unreference(brw->query.bo); - brw->query.bo = NULL; - } - - remove_from_list(query); - - intel->stats_wm--; -} - -static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q) -{ - struct brw_query_object *query = (struct brw_query_object *)q; - - brw_queryobj_get_results(query); - query->Base.Ready = GL_TRUE; -} - -static void brw_check_query(GLcontext *ctx, struct gl_query_object *q) -{ - struct brw_query_object *query = (struct brw_query_object *)q; - - if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) { - brw_queryobj_get_results(query); - query->Base.Ready = GL_TRUE; - } -} - -/** Called to set up the query BO and account for its aperture space */ -void -brw_prepare_query_begin(struct brw_context *brw) -{ - struct intel_context *intel = &brw->intel; - - /* Skip if we're not doing any queries. */ - if (is_empty_list(&brw->query.active_head)) - return; - - /* Get a new query BO if we're going to need it. */ - if (brw->query.bo == NULL || - brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) { - dri_bo_unreference(brw->query.bo); - brw->query.bo = NULL; - - brw->query.bo = dri_bo_alloc(intel->bufmgr, "query", 4096, 1); - brw->query.index = 0; - } - - brw_add_validated_bo(brw, brw->query.bo); -} - -/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */ -void -brw_emit_query_begin(struct brw_context *brw) -{ - struct intel_context *intel = &brw->intel; - struct brw_query_object *query; - - /* Skip if we're not doing any queries, or we've emitted the start. */ - if (brw->query.active || is_empty_list(&brw->query.active_head)) - return; - - BEGIN_BATCH(4, IGNORE_CLIPRECTS); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_WRITE_DEPTH_COUNT); - /* This object could be mapped cacheable, but we don't have an exposed - * mechanism to support that. Since it's going uncached, tell GEM that - * we're writing to it. The usual clflush should be all that's required - * to pick up the results. - */ - OUT_RELOC(brw->query.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | - ((brw->query.index * 2) * sizeof(uint64_t))); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - foreach(query, &brw->query.active_head) { - if (query->bo != brw->query.bo) { - if (query->bo != NULL) - brw_queryobj_get_results(query); - dri_bo_reference(brw->query.bo); - query->bo = brw->query.bo; - query->first_index = brw->query.index; - } - query->last_index = brw->query.index; - } - brw->query.active = GL_TRUE; -} - -/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */ -void -brw_emit_query_end(struct brw_context *brw) -{ - struct intel_context *intel = &brw->intel; - - if (!brw->query.active) - return; - - BEGIN_BATCH(4, IGNORE_CLIPRECTS); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_WRITE_DEPTH_COUNT); - OUT_RELOC(brw->query.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | - ((brw->query.index * 2 + 1) * sizeof(uint64_t))); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - brw->query.active = GL_FALSE; - brw->query.index++; -} - -void brw_init_queryobj_functions(struct dd_function_table *functions) -{ - functions->NewQueryObject = brw_new_query_object; - functions->DeleteQuery = brw_delete_query; - functions->BeginQuery = brw_begin_query; - functions->EndQuery = brw_end_query; - functions->CheckQuery = brw_check_query; - functions->WaitQuery = brw_wait_query; -} diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index 90513245ee..0115f77c08 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -30,10 +30,6 @@ */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" - #include "intel_batchbuffer.h" #include "brw_defines.h" @@ -46,7 +42,6 @@ static void compile_sf_prog( struct brw_context *brw, struct brw_sf_prog_key *key ) { - GLcontext *ctx = &brw->intel.ctx; struct brw_sf_compile c; const GLuint *program; GLuint program_size; @@ -116,7 +111,7 @@ static void compile_sf_prog( struct brw_context *brw, /* Upload */ - dri_bo_unreference(brw->sf.prog_bo); + brw->sws->bo_unreference(brw->sf.prog_bo); brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG, &c.key, sizeof(c.key), NULL, 0, @@ -129,7 +124,6 @@ static void compile_sf_prog( struct brw_context *brw, */ static void upload_sf_prog(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; struct brw_sf_prog_key key; memset(&key, 0, sizeof(key)); @@ -167,7 +161,7 @@ static void upload_sf_prog(struct brw_context *brw) key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_HINT */ - key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST); + key.linear_color = 0; /* _NEW_POLYGON */ if (key.do_twoside_color) { @@ -179,7 +173,7 @@ static void upload_sf_prog(struct brw_context *brw) key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0); } - dri_bo_unreference(brw->sf.prog_bo); + brw->sws->bo_unreference(brw->sf.prog_bo); brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG, &key, sizeof(key), NULL, 0, diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h index 6426b6df9f..26c2e8891a 100644 --- a/src/gallium/drivers/i965/brw_sf.h +++ b/src/gallium/drivers/i965/brw_sf.h @@ -45,19 +45,23 @@ #define SF_UNFILLED_TRIS 3 struct brw_sf_prog_key { - GLuint attrs:32; + + /* Bitmask of linear and perspective interpolated inputs, 0..nr + */ + GLuint persp_attrs:32; + GLuint linear_attrs:32; + GLuint primitive:2; GLuint do_twoside_color:1; GLuint do_flat_shading:1; GLuint frontface_ccw:1; GLuint do_point_sprite:1; - GLuint linear_color:1; /**< linear interp vs. perspective interp */ + GLuint sprite_origin_lower_left:1; GLuint pad:25; - GLenum SpriteOrigin; }; struct brw_sf_point_tex { - GLboolean CoordReplace; + GLboolean CoordReplace; }; struct brw_sf_compile { diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index 4cc427a935..c98d7ec13a 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -30,10 +30,6 @@ */ -#include "main/glheader.h" -#include "main/macros.h" -#include "main/enums.h" - #include "intel_batchbuffer.h" #include "brw_defines.h" @@ -305,6 +301,10 @@ static void invert_det( struct brw_sf_compile *c) } +/* Two attributes packed into a wide register. Figure out if either + * or both of them need linear/perspective interpolation. Constant + * regs are left as-is. + */ static GLboolean calculate_masks( struct brw_sf_compile *c, GLuint reg, GLushort *pc, @@ -312,20 +312,8 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, GLushort *pc_linear) { GLboolean is_last_attr = (reg == c->nr_setup_regs - 1); - GLuint persp_mask; - GLuint linear_mask; - - if (c->key.do_flat_shading || c->key.linear_color) - persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS | - FRAG_BIT_COL0 | - FRAG_BIT_COL1); - else - persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS); - - if (c->key.do_flat_shading) - linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1); - else - linear_mask = c->key.attrs; + GLuint persp_mask = c->key.persp_attrs; + GLuint linear_mask = c->key.linear_attrs; *pc_persp = 0; *pc_linear = 0; @@ -570,7 +558,7 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) { brw_set_predicate_control_flag_value(p, pc); if (tex->CoordReplace) { - if (c->key.SpriteOrigin == GL_LOWER_LEFT) { + if (c->key.sprite_origin_lower_left) { brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); } diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index bc0f076073..5e1229d22f 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -34,12 +34,9 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "main/macros.h" -#include "intel_fbo.h" static void upload_sf_vp(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; struct brw_sf_viewport sfv; GLfloat y_scale, y_bias; @@ -92,7 +89,7 @@ static void upload_sf_vp(struct brw_context *brw) sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; } - dri_bo_unreference(brw->sf.vp_bo); + brw->sws->bo_unreference(brw->sf.vp_bo); brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); } @@ -126,7 +123,6 @@ struct brw_sf_unit_key { static void sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) { - GLcontext *ctx = &brw->intel.ctx; memset(key, 0, sizeof(*key)); /* CACHE_NEW_SF_PROG */ @@ -159,12 +155,12 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; } -static dri_bo * +static struct brw_winsys_buffer * sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, - dri_bo **reloc_bufs) + struct brw_winsys_buffer **reloc_bufs) { struct brw_sf_unit_state sf; - dri_bo *bo; + struct brw_winsys_buffer *bo; int chipset_max_threads; memset(&sf, 0, sizeof(sf)); @@ -332,14 +328,14 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, static void upload_sf_unit( struct brw_context *brw ) { struct brw_sf_unit_key key; - dri_bo *reloc_bufs[2]; + struct brw_winsys_buffer *reloc_bufs[2]; sf_unit_populate_key(brw, &key); reloc_bufs[0] = brw->sf.prog_bo; reloc_bufs[1] = brw->sf.vp_bo; - dri_bo_unreference(brw->sf.state_bo); + brw->sws->bo_unreference(brw->sf.state_bo); brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT, &key, sizeof(key), reloc_bufs, 2, diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index d639656b9d..a007d542d0 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -36,12 +36,12 @@ #include "brw_context.h" static inline void -brw_add_validated_bo(struct brw_context *brw, dri_bo *bo) +brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo) { assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos)); if (bo != NULL) { - dri_bo_reference(bo); + brw->sws->bo_reference(bo); brw->state.validated_bos[brw->state.validated_bo_count++] = bo; } }; @@ -95,9 +95,9 @@ const struct brw_tracked_state brw_index_buffer; * Use same key for WM and VS surfaces. */ struct brw_surface_key { - GLenum target, depthmode; - dri_bo *bo; - GLint format, internal_format; + unsigned target; + struct brw_winsys_buffer *bo; + GLint format; GLint first_level, last_level; GLint width, height, depth; GLint pitch, cpp; @@ -116,42 +116,42 @@ void brw_destroy_state(struct brw_context *brw); /*********************************************************************** * brw_state_cache.c */ -dri_bo *brw_cache_data(struct brw_cache *cache, +struct brw_winsys_buffer *brw_cache_data(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, - dri_bo **reloc_bufs, + struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs); -dri_bo *brw_cache_data_sz(struct brw_cache *cache, +struct brw_winsys_buffer *brw_cache_data_sz(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, GLuint data_size, - dri_bo **reloc_bufs, + struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs); -dri_bo *brw_upload_cache( struct brw_cache *cache, +struct brw_winsys_buffer *brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_sz, - dri_bo **reloc_bufs, + struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs, const void *data, GLuint data_sz, const void *aux, void *aux_return ); -dri_bo *brw_search_cache( struct brw_cache *cache, +struct brw_winsys_buffer *brw_search_cache( struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - dri_bo **reloc_bufs, + struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs, void *aux_return); void brw_state_cache_check_size( struct brw_context *brw ); void brw_init_caches( struct brw_context *brw ); void brw_destroy_caches( struct brw_context *brw ); -void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo); +void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo); /*********************************************************************** * brw_state_batch.c @@ -166,7 +166,7 @@ void brw_destroy_batch_cache( struct brw_context *brw ); void brw_clear_batch_cache( struct brw_context *brw ); /* brw_wm_surface_state.c */ -dri_bo * +struct brw_winsys_buffer * brw_create_constant_surface( struct brw_context *brw, struct brw_surface_key *key ); diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c index 7821898cf9..9568794625 100644 --- a/src/gallium/drivers/i965/brw_state_batch.c +++ b/src/gallium/drivers/i965/brw_state_batch.c @@ -33,7 +33,6 @@ #include "brw_state.h" #include "intel_batchbuffer.h" -#include "main/imports.h" diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c index c262e1db8b..91d0f80297 100644 --- a/src/gallium/drivers/i965/brw_state_cache.c +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -56,7 +56,6 @@ * incorrect program is run for the other instance. */ -#include "main/imports.h" #include "brw_state.h" #include "intel_batchbuffer.h" @@ -72,7 +71,7 @@ static GLuint hash_key(const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) + struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs) { GLuint *ikey = (GLuint *)key; GLuint hash = 0, i; @@ -88,7 +87,7 @@ hash_key(const void *key, GLuint key_size, /* Include the BO pointers as key data as well */ ikey = (GLuint *)reloc_bufs; - key_size = nr_reloc_bufs * sizeof(dri_bo *); + key_size = nr_reloc_bufs * sizeof(struct brw_winsys_buffer *); for (i = 0; i < key_size/4; i++) { hash ^= ikey[i]; hash = (hash << 5) | (hash >> 27); @@ -103,14 +102,14 @@ hash_key(const void *key, GLuint key_size, */ static void update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, - dri_bo *bo) + struct brw_winsys_buffer *bo) { if (bo == cache->last_bo[cache_id]) return; /* no change */ - dri_bo_unreference(cache->last_bo[cache_id]); + brw->sws->bo_unreference(cache->last_bo[cache_id]); cache->last_bo[cache_id] = bo; - dri_bo_reference(cache->last_bo[cache_id]); + brw->sws->bo_reference(cache->last_bo[cache_id]); cache->brw->state.dirty.cache |= 1 << cache_id; } @@ -118,7 +117,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, static struct brw_cache_item * search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, GLuint hash, const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs) + struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs) { struct brw_cache_item *c; @@ -139,7 +138,7 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, memcmp(c->key, key, key_size) == 0 && c->nr_reloc_bufs == nr_reloc_bufs && memcmp(c->reloc_bufs, reloc_bufs, - nr_reloc_bufs * sizeof(dri_bo *)) == 0) + nr_reloc_bufs * sizeof(struct brw_winsys_buffer *)) == 0) return c; } @@ -173,12 +172,12 @@ rehash(struct brw_cache *cache) /** * Returns the buffer object matching cache_id and key, or NULL. */ -dri_bo * +struct brw_winsys_buffer * brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - dri_bo **reloc_bufs, GLuint nr_reloc_bufs, + struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs, void *aux_return) { struct brw_cache_item *item; @@ -195,17 +194,17 @@ brw_search_cache(struct brw_cache *cache, update_cache_last(cache, cache_id, item->bo); - dri_bo_reference(item->bo); + brw->sws->bo_reference(item->bo); return item->bo; } -dri_bo * +struct brw_winsys_buffer * brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - dri_bo **reloc_bufs, + struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs, const void *data, GLuint data_size, @@ -214,10 +213,10 @@ brw_upload_cache( struct brw_cache *cache, { struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); - GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *); + GLuint relocs_size = nr_reloc_bufs * sizeof(struct brw_winsys_buffer *); GLuint aux_size = cache->aux_size[cache_id]; void *tmp; - dri_bo *bo; + struct brw_winsys_buffer *bo; int i; /* Create the buffer object to contain the data */ @@ -233,7 +232,7 @@ brw_upload_cache( struct brw_cache *cache, memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); for (i = 0; i < nr_reloc_bufs; i++) { if (reloc_bufs[i] != NULL) - dri_bo_reference(reloc_bufs[i]); + brw->sws->bo_reference(reloc_bufs[i]); } item->cache_id = cache_id; @@ -244,7 +243,7 @@ brw_upload_cache( struct brw_cache *cache, item->nr_reloc_bufs = nr_reloc_bufs; item->bo = bo; - dri_bo_reference(bo); + brw->sws->bo_reference(bo); item->data_size = data_size; if (cache->n_items > cache->size * 1.5) @@ -277,15 +276,15 @@ brw_upload_cache( struct brw_cache *cache, /** * This doesn't really work with aux data. Use search/upload instead */ -dri_bo * +struct brw_winsys_buffer * brw_cache_data_sz(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, GLuint data_size, - dri_bo **reloc_bufs, + struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs) { - dri_bo *bo; + struct brw_winsys_buffer *bo; struct brw_cache_item *item; GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs); @@ -293,7 +292,7 @@ brw_cache_data_sz(struct brw_cache *cache, reloc_bufs, nr_reloc_bufs); if (item) { update_cache_last(cache, cache_id, item->bo); - dri_bo_reference(item->bo); + brw->sws->bo_reference(item->bo); return item->bo; } @@ -314,11 +313,11 @@ brw_cache_data_sz(struct brw_cache *cache, * better to use, as the potentially changing offsets in the data-used-as-key * will result in excessive cache misses. */ -dri_bo * +struct brw_winsys_buffer * brw_cache_data(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, - dri_bo **reloc_bufs, + struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs) { return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id], @@ -497,8 +496,8 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) next = c->next; for (j = 0; j < c->nr_reloc_bufs; j++) - dri_bo_unreference(c->reloc_bufs[j]); - dri_bo_unreference(c->bo); + brw->sws->bo_unreference(c->reloc_bufs[j]); + brw->sws->bo_unreference(c->bo); free((void *)c->key); free(c); } @@ -523,7 +522,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) * at the cost of walking the entire hash table. */ void -brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo) +brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo) { struct brw_cache_item **prev; GLuint i; @@ -535,14 +534,14 @@ brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo) for (prev = &cache->items[i]; *prev;) { struct brw_cache_item *c = *prev; - if (drm_intel_bo_references(c->bo, bo)) { + if (cache->sws->bo_references(c->bo, bo)) { int j; *prev = c->next; for (j = 0; j < c->nr_reloc_bufs; j++) - dri_bo_unreference(c->reloc_bufs[j]); - dri_bo_unreference(c->bo); + brw->sws->bo_unreference(c->reloc_bufs[j]); + brw->sws->bo_unreference(c->bo); free((void *)c->key); free(c); cache->n_items--; @@ -580,7 +579,7 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { - dri_bo_unreference(cache->last_bo[i]); + brw->sws->bo_unreference(cache->last_bo[i]); free(cache->name[i]); } free(cache->items); diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c index e94fa7d2b4..1bc83fb9c1 100644 --- a/src/gallium/drivers/i965/brw_state_dump.c +++ b/src/gallium/drivers/i965/brw_state_dump.c @@ -25,8 +25,6 @@ * */ -#include "main/mtypes.h" - #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" @@ -55,7 +53,7 @@ state_out(const char *name, void *data, uint32_t hw_offset, int index, /** Generic, undecoded state buffer debug printout */ static void -state_struct_out(const char *name, dri_bo *buffer, unsigned int state_size) +state_struct_out(const char *name, struct brw_winsys_buffer *buffer, unsigned int state_size) { int i; @@ -102,7 +100,7 @@ static void dump_wm_surface_state(struct brw_context *brw) int i; for (i = 0; i < brw->wm.nr_surfaces; i++) { - dri_bo *surf_bo = brw->wm.surf_bo[i]; + struct brw_winsys_buffer *surf_bo = brw->wm.surf_bo[i]; unsigned int surfoff; struct brw_surface_state *surf; char name[20]; @@ -162,7 +160,7 @@ static void dump_sf_viewport_state(struct brw_context *brw) dri_bo_unmap(brw->sf.vp_bo); } -static void brw_debug_prog(const char *name, dri_bo *prog) +static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog) { unsigned int i; uint32_t *data; @@ -202,10 +200,8 @@ static void brw_debug_prog(const char *name, dri_bo *prog) * The buffer offsets printed rely on the buffer containing the last offset * it was validated at. */ -void brw_debug_batch(struct intel_context *intel) +void brw_debug_batch(struct brw_context *brw) { - struct brw_context *brw = brw_context(&intel->ctx); - state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces); dump_wm_surface_state(brw); diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index 6801084616..b68b6cb21a 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -149,7 +149,7 @@ brw_clear_validated_bos(struct brw_context *brw) /* Clear the last round of validated bos */ for (i = 0; i < brw->state.validated_bo_count; i++) { - dri_bo_unreference(brw->state.validated_bos[i]); + brw->sws->bo_unreference(brw->state.validated_bos[i]); brw->state.validated_bos[i] = NULL; } brw->state.validated_bo_count = 0; @@ -272,8 +272,6 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) */ enum pipe_error brw_validate_state( struct brw_context *brw ) { - GLcontext *ctx = &brw->intel.ctx; - struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; GLuint i; diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h index 66d4127271..27d264c3de 100644 --- a/src/gallium/drivers/i965/brw_structs.h +++ b/src/gallium/drivers/i965/brw_structs.h @@ -33,6 +33,7 @@ #ifndef BRW_STRUCTS_H #define BRW_STRUCTS_H +#include "brw_types.h" /** Number of general purpose registers (VS, WM, etc) */ #define BRW_MAX_GRF 128 diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c index 6684f442d5..83f138f67a 100644 --- a/src/gallium/drivers/i965/brw_swtnl.c +++ b/src/gallium/drivers/i965/brw_swtnl.c @@ -6,7 +6,6 @@ static GLboolean check_fallbacks( struct brw_context *brw, const struct _mesa_prim *prim, GLuint nr_prims ) { - GLcontext *ctx = &brw->intel.ctx; GLuint i; /* If we don't require strict OpenGL conformance, never diff --git a/src/gallium/drivers/i965/brw_tex.c b/src/gallium/drivers/i965/brw_tex.c index e911b105b2..c33c19ee51 100644 --- a/src/gallium/drivers/i965/brw_tex.c +++ b/src/gallium/drivers/i965/brw_tex.c @@ -30,11 +30,6 @@ */ -#include "main/glheader.h" -#include "main/mtypes.h" -#include "main/teximage.h" - -#include "intel_context.h" #include "intel_regions.h" #include "intel_tex.h" #include "brw_context.h" @@ -45,8 +40,6 @@ */ void brw_validate_textures( struct brw_context *brw ) { - GLcontext *ctx = &brw->intel.ctx; - struct intel_context *intel = &brw->intel; int i; for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { diff --git a/src/gallium/drivers/i965/brw_tex_layout.c b/src/gallium/drivers/i965/brw_tex_layout.c index 5986cbffad..75cdc18912 100644 --- a/src/gallium/drivers/i965/brw_tex_layout.c +++ b/src/gallium/drivers/i965/brw_tex_layout.c @@ -34,13 +34,11 @@ #include "intel_mipmap_tree.h" #include "intel_tex_layout.h" -#include "intel_context.h" -#include "main/macros.h" #include "intel_chipset.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE -GLboolean brw_miptree_layout(struct intel_context *intel, +GLboolean brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t tiling) { @@ -67,7 +65,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, mt->pitch = ALIGN(mt->width0, align_w); } - if (mt->first_level != mt->last_level) { + if (mt->last_level != 0) { GLuint mip1_width; if (mt->compressed) { @@ -93,7 +91,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; } - for (level = mt->first_level; level <= mt->last_level; level++) { + for (level = 0; level <= mt->last_level; level++) { GLuint img_height; GLuint nr_images = 6; GLuint q = 0; @@ -109,7 +107,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, else img_height = ALIGN(height, align_h); - if (level == mt->first_level + 1) { + if (level == 1) { x += ALIGN(width, align_w); } else { @@ -147,7 +145,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel, pack_x_pitch = width; pack_x_nr = 1; - for (level = mt->first_level ; level <= mt->last_level ; level++) { + for (level = 0 ; level <= mt->last_level ; level++) { GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; GLint x = 0; GLint y = 0; diff --git a/src/gallium/drivers/i965/brw_types.h b/src/gallium/drivers/i965/brw_types.h index 32b62848da..87dae13d94 100644 --- a/src/gallium/drivers/i965/brw_types.h +++ b/src/gallium/drivers/i965/brw_types.h @@ -1,11 +1,18 @@ #ifndef BRW_TYPES_H #define BRW_TYPES_H -typedef GLuint uint32_t; -typedef GLubyte uint8_t; -typedef GLushort uint16_t; +#include "pipe/p_compiler.h" + +typedef uint32_t GLuint; +typedef uint8_t GLubyte; +typedef uint16_t GLushort; +typedef int32_t GLint; +typedef int8_t GLbyte; +typedef int16_t GLshort; +typedef float GLfloat; + /* no GLenum, translate all away */ -typedef GLboolean uint8_t; +typedef uint8_t GLboolean; #endif diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c index 17f671a8fa..c5244e58ab 100644 --- a/src/gallium/drivers/i965/brw_util.c +++ b/src/gallium/drivers/i965/brw_util.c @@ -30,8 +30,6 @@ */ -#include "main/mtypes.h" -#include "shader/prog_parameter.h" #include "brw_util.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_util.h b/src/gallium/drivers/i965/brw_util.h index 33e7cd87e4..37c3acbc11 100644 --- a/src/gallium/drivers/i965/brw_util.h +++ b/src/gallium/drivers/i965/brw_util.h @@ -33,7 +33,7 @@ #ifndef BRW_UTIL_H #define BRW_UTIL_H -#include "main/mtypes.h" +#include "brw_types.h" extern GLuint brw_count_bits( GLuint val ); extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList); diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index 53a5560105..97e523c3ee 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -71,7 +71,7 @@ static void do_vs_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); - dri_bo_unreference(brw->vs.prog_bo); + brw->sws->bo_unreference(brw->vs.prog_bo); brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG, &c.key, sizeof(c.key), NULL, 0, @@ -83,7 +83,6 @@ static void do_vs_prog( struct brw_context *brw, static void brw_upload_vs_prog(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; struct brw_vs_prog_key key; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; @@ -100,7 +99,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) /* Make an early check for the key. */ - dri_bo_unreference(brw->vs.prog_bo); + brw->sws->bo_unreference(brw->vs.prog_bo); brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG, &key, sizeof(key), NULL, 0, diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 7f20c4baca..6adb743017 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -30,9 +30,6 @@ */ -#include "main/macros.h" -#include "shader/program.h" -#include "shader/prog_parameter.h" #include "pipe/p_shader_tokens.h" #include "brw_context.h" #include "brw_vs.h" diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c index d790ab6555..1717223e49 100644 --- a/src/gallium/drivers/i965/brw_vs_state.c +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -34,7 +34,6 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "main/macros.h" struct brw_vs_unit_key { unsigned int total_grf; @@ -51,8 +50,6 @@ struct brw_vs_unit_key { static void vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) { - GLcontext *ctx = &brw->intel.ctx; - memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG */ @@ -79,11 +76,11 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) } } -static dri_bo * +static struct brw_winsys_buffer * vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) { struct brw_vs_unit_state vs; - dri_bo *bo; + struct brw_winsys_buffer *bo; int chipset_max_threads; memset(&vs, 0, sizeof(vs)); @@ -163,7 +160,7 @@ static void prepare_vs_unit(struct brw_context *brw) vs_unit_populate_key(brw, &key); - dri_bo_unreference(brw->vs.state_bo); + brw->sws->bo_unreference(brw->vs.state_bo); brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT, &key, sizeof(key), &brw->vs.prog_bo, 1, diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c index 89f47522a1..6446e8e761 100644 --- a/src/gallium/drivers/i965/brw_vs_surface_state.c +++ b/src/gallium/drivers/i965/brw_vs_surface_state.c @@ -29,11 +29,6 @@ * Keith Whitwell */ -#include "main/mtypes.h" -#include "main/texformat.h" -#include "main/texstore.h" -#include "shader/prog_parameter.h" - #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" @@ -47,7 +42,6 @@ static drm_intel_bo * brw_vs_update_constant_buffer(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; struct brw_vertex_program *vp = (struct brw_vertex_program *) brw->vertex_program; const struct gl_program_parameter_list *params = vp->program.Base.Parameters; @@ -73,7 +67,7 @@ brw_vs_update_constant_buffer(struct brw_context *brw) * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer. */ static void -brw_update_vs_constant_surface( GLcontext *ctx, +brw_update_vs_constant_surface( struct brw_context *brw, GLuint surf) { struct brw_context *brw = brw_context(ctx); @@ -87,7 +81,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, /* If we're in this state update atom, we need to update VS constants, so * free the old buffer and create a new one for the new contents. */ - dri_bo_unreference(vp->const_buffer); + brw->sws->bo_unreference(vp->const_buffer); vp->const_buffer = brw_vs_update_constant_buffer(brw); /* If there's no constant buffer, then no surface BO is needed to point at @@ -101,8 +95,7 @@ brw_update_vs_constant_surface( GLcontext *ctx, memset(&key, 0, sizeof(key)); - key.format = MESA_FORMAT_RGBA_FLOAT32; - key.internal_format = GL_RGBA; + key.format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.bo = vp->const_buffer; key.depthmode = GL_NONE; key.pitch = params->NumParameters; @@ -132,10 +125,10 @@ brw_update_vs_constant_surface( GLcontext *ctx, /** * Constructs the binding table for the VS surface state. */ -static dri_bo * +static struct brw_winsys_buffer * brw_vs_get_binding_table(struct brw_context *brw) { - dri_bo *bind_bo; + struct brw_winsys_buffer *bind_bo; bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, @@ -186,7 +179,6 @@ brw_vs_get_binding_table(struct brw_context *brw) */ static void prepare_vs_surfaces(struct brw_context *brw ) { - GLcontext *ctx = &brw->intel.ctx; int i; int nr_surfaces = 0; @@ -208,7 +200,7 @@ static void prepare_vs_surfaces(struct brw_context *brw ) * just slightly increases our working set size. */ if (brw->vs.nr_surfaces != 0) { - dri_bo_unreference(brw->vs.bind_bo); + brw->sws->bo_unreference(brw->vs.bind_bo); brw->vs.bind_bo = brw_vs_get_binding_table(brw); } } diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 20d31880b4..32b8900bac 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -29,7 +29,6 @@ * Keith Whitwell */ -#include "main/texformat.h" #include "brw_context.h" #include "brw_util.h" #include "brw_wm.h" @@ -186,7 +185,7 @@ static void do_wm_prog( struct brw_context *brw, */ program = brw_get_program(&c->func, &program_size); - dri_bo_unreference(brw->wm.prog_bo); + brw->sws->bo_unreference(brw->wm.prog_bo); brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG, &c->key, sizeof(c->key), NULL, 0, @@ -200,7 +199,6 @@ static void do_wm_prog( struct brw_context *brw, static void brw_wm_populate_key( struct brw_context *brw, struct brw_wm_prog_key *key ) { - GLcontext *ctx = &brw->intel.ctx; /* BRW_NEW_FRAGMENT_PROGRAM */ const struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; @@ -329,7 +327,7 @@ static void brw_prepare_wm_prog(struct brw_context *brw) /* Make an early check for the key. */ - dri_bo_unreference(brw->wm.prog_bo); + brw->sws->bo_unreference(brw->wm.prog_bo); brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, &key, sizeof(key), NULL, 0, diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 9c47c46a3d..fec33f74eb 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -30,7 +30,6 @@ */ -#include "main/macros.h" #include "brw_context.h" #include "brw_wm.h" diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index d836e2fb34..c4f0711793 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -1,7 +1,3 @@ -#include "main/macros.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" -#include "shader/prog_optimize.h" #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" diff --git a/src/gallium/drivers/i965/brw_wm_iz.c b/src/gallium/drivers/i965/brw_wm_iz.c index 5e399ac62a..6f1e9fcc3c 100644 --- a/src/gallium/drivers/i965/brw_wm_iz.c +++ b/src/gallium/drivers/i965/brw_wm_iz.c @@ -30,7 +30,6 @@ */ -#include "main/mtypes.h" #include "brw_wm.h" diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c index dff466587a..a8993f9312 100644 --- a/src/gallium/drivers/i965/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c @@ -34,9 +34,6 @@ #include "brw_state.h" #include "brw_defines.h" -#include "main/macros.h" - - /* Samplers aren't strictly wm state from the hardware's perspective, * but that is the only situation in which we use them in this driver. @@ -79,7 +76,7 @@ static GLint S_FIXED(GLfloat value, GLuint frac_bits) } -static dri_bo *upload_default_color( struct brw_context *brw, +static struct brw_winsys_buffer *upload_default_color( struct brw_context *brw, const GLfloat *color ) { struct brw_sampler_default_color sdc; @@ -102,7 +99,7 @@ struct wm_sampler_key { float max_aniso; GLenum minfilter, magfilter; GLenum comparemode, comparefunc; - dri_bo *sdc_bo; + struct brw_winsys_buffer *sdc_bo; /** If target is cubemap, take context setting. */ @@ -115,7 +112,7 @@ struct wm_sampler_key { * entry. */ static void brw_update_sampler_state(struct wm_sampler_entry *key, - dri_bo *sdc_bo, + struct brw_winsys_buffer *sdc_bo, struct brw_sampler_state *sampler) { _mesa_memset(sampler, 0, sizeof(*sampler)); @@ -240,7 +237,6 @@ static void brw_wm_sampler_populate_key(struct brw_context *brw, struct wm_sampler_key *key) { - GLcontext *ctx = &brw->intel.ctx; int unit; memset(key, 0, sizeof(*key)); @@ -272,7 +268,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw, entry->comparemode = texObj->CompareMode; entry->comparefunc = texObj->CompareFunc; - dri_bo_unreference(brw->wm.sdc_bo[unit]); + brw->sws->bo_unreference(brw->wm.sdc_bo[unit]); if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { float bordercolor[4] = { texObj->BorderColor[0], @@ -300,7 +296,6 @@ brw_wm_sampler_populate_key(struct brw_context *brw, */ static void upload_wm_samplers( struct brw_context *brw ) { - GLcontext *ctx = &brw->intel.ctx; struct wm_sampler_key key; int i; @@ -311,7 +306,7 @@ static void upload_wm_samplers( struct brw_context *brw ) brw->state.dirty.cache |= CACHE_NEW_SAMPLER; } - dri_bo_unreference(brw->wm.sampler_bo); + brw->sws->bo_unreference(brw->wm.sampler_bo); brw->wm.sampler_bo = NULL; if (brw->wm.sampler_count == 0) return; diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index 361f91292b..958c00d3e0 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -60,10 +60,8 @@ struct brw_wm_unit_key { static void wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { - GLcontext *ctx = &brw->intel.ctx; const struct gl_fragment_program *fp = brw->fragment_program; const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; - struct intel_context *intel = &brw->intel; memset(key, 0, sizeof(*key)); @@ -121,7 +119,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* temporary sanity check assertion */ ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); - /* _NEW_DEPTH */ + /* _NEW_QUERY */ key->stats_wm = intel->stats_wm; /* _NEW_LINE */ @@ -136,12 +134,12 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /** * Setup wm hardware state. See page 225 of Volume 2 */ -static dri_bo * +static struct brw_winsys_buffer * wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, - dri_bo **reloc_bufs) + struct brw_winsys_buffer **reloc_bufs) { struct brw_wm_unit_state wm; - dri_bo *bo; + struct brw_winsys_buffer *bo; memset(&wm, 0, sizeof(wm)); @@ -257,9 +255,8 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, static void upload_wm_unit( struct brw_context *brw ) { - struct intel_context *intel = &brw->intel; struct brw_wm_unit_key key; - dri_bo *reloc_bufs[3]; + struct brw_winsys_buffer *reloc_bufs[3]; wm_unit_populate_key(brw, &key); /* Allocate the necessary scratch space if we haven't already. Don't @@ -271,7 +268,7 @@ static void upload_wm_unit( struct brw_context *brw ) GLuint total = key.total_scratch * key.max_threads; if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) { - dri_bo_unreference(brw->wm.scratch_bo); + brw->sws->bo_unreference(brw->wm.scratch_bo); brw->wm.scratch_bo = NULL; } if (brw->wm.scratch_bo == NULL) { @@ -286,7 +283,7 @@ static void upload_wm_unit( struct brw_context *brw ) reloc_bufs[1] = brw->wm.scratch_bo; reloc_bufs[2] = brw->wm.sampler_bo; - dri_bo_unreference(brw->wm.state_bo); + brw->sws->bo_unreference(brw->wm.state_bo); brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT, &key, sizeof(key), reloc_bufs, 3, @@ -302,7 +299,7 @@ const struct brw_tracked_state brw_wm_unit = { _NEW_POLYGONSTIPPLE | _NEW_LINE | _NEW_COLOR | - _NEW_DEPTH), + _NEW_QUERY), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index f7cc5153a8..86dcb74b5b 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -30,11 +30,6 @@ */ -#include "main/mtypes.h" -#include "main/texformat.h" -#include "main/texstore.h" -#include "shader/prog_parameter.h" - #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" #include "intel_tex.h" @@ -70,90 +65,87 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, +static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) { - switch( mesa_format ) { - case MESA_FORMAT_L8: + switch( pipe_format ) { + case PIPE_FORMAT_L8_UNORM: return BRW_SURFACEFORMAT_L8_UNORM; - case MESA_FORMAT_I8: + case PIPE_FORMAT_I8_UNORM: return BRW_SURFACEFORMAT_I8_UNORM; - case MESA_FORMAT_A8: + case PIPE_FORMAT_A8_UNORM: return BRW_SURFACEFORMAT_A8_UNORM; - case MESA_FORMAT_AL88: + case PIPE_FORMAT_A8L8_UNORM: return BRW_SURFACEFORMAT_L8A8_UNORM; - case MESA_FORMAT_RGB888: - assert(0); /* not supported for sampling */ - return BRW_SURFACEFORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - case MESA_FORMAT_ARGB8888: - if (internal_format == GL_RGB) - return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; - else - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + case PIPE_FORMAT_R8G8B8X8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; - case MESA_FORMAT_RGBA8888_REV: - if (internal_format == GL_RGB) - return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; - else - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + case PIPE_FORMAT_: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - case MESA_FORMAT_RGB565: + case PIPE_FORMAT_RGB565: return BRW_SURFACEFORMAT_B5G6R5_UNORM; - case MESA_FORMAT_ARGB1555: + case PIPE_FORMAT_ARGB1555: return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; - case MESA_FORMAT_ARGB4444: + case PIPE_FORMAT_ARGB4444: return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; - case MESA_FORMAT_YCBCR_REV: + + case PIPE_FORMAT_L16_UNORM: + return BRW_SURFACEFORMAT_L16_UNORM; + + case PIPE_FORMAT_I16_UNORM: + return BRW_SURFACEFORMAT_I16_UNORM; + + case PIPE_FORMAT_A16_UNORM: + return BRW_SURFACEFORMAT_A16_UNORM; + + case PIPE_FORMAT_YCBCR_REV: return BRW_SURFACEFORMAT_YCRCB_NORMAL; - case MESA_FORMAT_YCBCR: + case PIPE_FORMAT_YCBCR: return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; - case MESA_FORMAT_RGB_FXT1: - case MESA_FORMAT_RGBA_FXT1: + case PIPE_FORMAT_RGB_FXT1: + case PIPE_FORMAT_RGBA_FXT1: return BRW_SURFACEFORMAT_FXT1; - case MESA_FORMAT_Z16: - if (depth_mode == GL_INTENSITY) - return BRW_SURFACEFORMAT_I16_UNORM; - else if (depth_mode == GL_ALPHA) - return BRW_SURFACEFORMAT_A16_UNORM; - else - return BRW_SURFACEFORMAT_L16_UNORM; - - case MESA_FORMAT_RGB_DXT1: + case PIPE_FORMAT_RGB_DXT1: return BRW_SURFACEFORMAT_DXT1_RGB; - case MESA_FORMAT_RGBA_DXT1: + case PIPE_FORMAT_RGBA_DXT1: return BRW_SURFACEFORMAT_BC1_UNORM; - case MESA_FORMAT_RGBA_DXT3: + case PIPE_FORMAT_RGBA_DXT3: return BRW_SURFACEFORMAT_BC2_UNORM; - case MESA_FORMAT_RGBA_DXT5: + case PIPE_FORMAT_RGBA_DXT5: return BRW_SURFACEFORMAT_BC3_UNORM; - case MESA_FORMAT_SARGB8: + case PIPE_FORMAT_R8G8B8A8_SRGB: return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; - case MESA_FORMAT_SLA8: + case PIPE_FORMAT_A8L8_SRGB: return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; - case MESA_FORMAT_SL8: + case PIPE_FORMAT_L8_SRGB: return BRW_SURFACEFORMAT_L8_UNORM_SRGB; - case MESA_FORMAT_SRGB_DXT1: + case PIPE_FORMAT_SRGB_DXT1: return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; - case MESA_FORMAT_S8_Z24: + case PIPE_FORMAT_S8_Z24: /* XXX: these different surface formats don't seem to * make any difference for shadow sampler/compares. */ @@ -164,10 +156,10 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, else return BRW_SURFACEFORMAT_L24X8_UNORM; - case MESA_FORMAT_DUDV8: + case PIPE_FORMAT_DUDV8: return BRW_SURFACEFORMAT_R8G8_SNORM; - case MESA_FORMAT_SIGNED_RGBA8888_REV: + case PIPE_FORMAT_SIGNED_RGBA8888_REV: return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; default: @@ -195,12 +187,12 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) } } -static dri_bo * +static struct brw_winsys_buffer * brw_create_texture_surface( struct brw_context *brw, struct brw_surface_key *key ) { struct brw_surface_state surf; - dri_bo *bo; + struct brw_winsys_buffer *bo; memset(&surf, 0, sizeof(surf)); @@ -234,7 +226,7 @@ brw_create_texture_surface( struct brw_context *brw, else surf.ss1.base_addr = key->offset; - surf.ss2.mip_count = key->last_level - key->first_level; + surf.ss2.mip_count = key->last_level; surf.ss2.width = key->width - 1; surf.ss2.height = key->height - 1; brw_set_surface_tiling(&surf, key->tiling); @@ -270,41 +262,30 @@ brw_create_texture_surface( struct brw_context *brw, } static void -brw_update_texture_surface( GLcontext *ctx, GLuint unit ) +brw_update_texture_surface( struct brw_context *brw, GLuint unit ) { - struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; + struct pipe_texture *tex = brw->texture[unit]; struct brw_surface_key key; const GLuint surf = SURF_INDEX_TEXTURE(unit); memset(&key, 0, sizeof(key)); - if (intelObj->imageOverride) { - key.pitch = intelObj->pitchOverride / intelObj->mt->cpp; - key.depth = intelObj->depthOverride; - key.bo = NULL; - key.offset = intelObj->textureOffset; - } else { - key.format = firstImage->TexFormat->MesaFormat; - key.internal_format = firstImage->InternalFormat; - key.pitch = intelObj->mt->pitch; - key.depth = firstImage->Depth; - key.bo = intelObj->mt->region->buffer; - key.offset = 0; - } - - key.target = tObj->Target; - key.depthmode = tObj->DepthMode; - key.first_level = intelObj->firstLevel; - key.last_level = intelObj->lastLevel; - key.width = firstImage->Width; - key.height = firstImage->Height; - key.cpp = intelObj->mt->cpp; - key.tiling = intelObj->mt->region->tiling; - - dri_bo_unreference(brw->wm.surf_bo[surf]); + key.format = tex->base.format; + key.pitch = tex->pitch; + key.depth = tex->base.depth[0]; + key.bo = tex->buffer; + key.offset = 0; + + key.target = tObj->target; /* translated to BRW enum */ + /* key.depthmode = tObj->DepthMode; */ /* XXX: add this to gallium? or the state tracker? */ + key.first_level = 0; + key.last_level = tex->base.last_level; + key.width = tex->base.depth[0]; + key.height = tex->base.height[0]; + key.cpp = tex->cpp; + key.tiling = tex->tiling; + + brw->sws->bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), @@ -321,13 +302,13 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ -dri_bo * +struct brw_winsys_buffer * brw_create_constant_surface( struct brw_context *brw, struct brw_surface_key *key ) { const GLint w = key->width - 1; struct brw_surface_state surf; - dri_bo *bo; + struct brw_winsys_buffer *bo; memset(&surf, 0, sizeof(surf)); @@ -374,7 +355,6 @@ brw_create_constant_surface( struct brw_context *brw, static drm_intel_bo * brw_wm_update_constant_buffer(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; const struct gl_program_parameter_list *params = fp->program.Base.Parameters; @@ -399,7 +379,7 @@ brw_wm_update_constant_buffer(struct brw_context *brw) * The constant buffer will be (re)allocated here if needed. */ static void -brw_update_wm_constant_surface( GLcontext *ctx, +brw_update_wm_constant_surface( struct brw_context *brw, GLuint surf) { struct brw_context *brw = brw_context(ctx); @@ -412,7 +392,7 @@ brw_update_wm_constant_surface( GLcontext *ctx, /* If we're in this state update atom, we need to update WM constants, so * free the old buffer and create a new one for the new contents. */ - dri_bo_unreference(fp->const_buffer); + brw->sws->bo_unreference(fp->const_buffer); fp->const_buffer = brw_wm_update_constant_buffer(brw); /* If there's no constant buffer, then no surface BO is needed to point at @@ -426,7 +406,7 @@ brw_update_wm_constant_surface( GLcontext *ctx, memset(&key, 0, sizeof(key)); - key.format = MESA_FORMAT_RGBA_FLOAT32; + key.format = PIPE_FORMAT_RGBA_FLOAT32; key.internal_format = GL_RGBA; key.bo = fp->const_buffer; key.depthmode = GL_NONE; @@ -442,7 +422,7 @@ brw_update_wm_constant_surface( GLcontext *ctx, key.width, key.height, key.depth, key.cpp, key.pitch); */ - dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->sws->bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), @@ -464,7 +444,6 @@ brw_update_wm_constant_surface( GLcontext *ctx, */ static void prepare_wm_constant_surface(struct brw_context *brw ) { - GLcontext *ctx = &brw->intel.ctx; struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; @@ -507,8 +486,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, unsigned int unit) { - GLcontext *ctx = &brw->intel.ctx; - dri_bo *region_bo = NULL; + struct brw_winsys_buffer *region_bo = NULL; struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_region *region = irb ? irb->region : NULL; struct { @@ -528,16 +506,16 @@ brw_update_renderbuffer_surface(struct brw_context *brw, key.surface_type = BRW_SURFACE_2D; switch (irb->texformat->MesaFormat) { - case MESA_FORMAT_ARGB8888: + case PIPE_FORMAT_ARGB8888: key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; break; - case MESA_FORMAT_RGB565: + case PIPE_FORMAT_RGB565: key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; break; - case MESA_FORMAT_ARGB1555: + case PIPE_FORMAT_ARGB1555: key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; break; - case MESA_FORMAT_ARGB4444: + case PIPE_FORMAT_ARGB4444: key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; break; default: @@ -569,7 +547,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, key.color_blend = (!ctx->Color._LogicOpEnabled && ctx->Color.BlendEnabled); - dri_bo_unreference(brw->wm.surf_bo[unit]); + brw->sws->bo_unreference(brw->wm.surf_bo[unit]); brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &key, sizeof(key), @@ -646,10 +624,10 @@ brw_update_renderbuffer_surface(struct brw_context *brw, * Constructs the binding table for the WM surface state, which maps unit * numbers to surface state objects. */ -static dri_bo * +static struct brw_winsys_buffer * brw_wm_get_binding_table(struct brw_context *brw) { - dri_bo *bind_bo; + struct brw_winsys_buffer *bind_bo; assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); @@ -692,7 +670,6 @@ brw_wm_get_binding_table(struct brw_context *brw) static void prepare_wm_surfaces(struct brw_context *brw ) { - GLcontext *ctx = &brw->intel.ctx; GLuint i; int old_nr_surfaces; @@ -724,12 +701,12 @@ static void prepare_wm_surfaces(struct brw_context *brw ) brw_update_texture_surface(ctx, i); brw->wm.nr_surfaces = surf + 1; } else { - dri_bo_unreference(brw->wm.surf_bo[surf]); + brw->sws->bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = NULL; } } - dri_bo_unreference(brw->wm.bind_bo); + brw->sws->bo_unreference(brw->wm.bind_bo); brw->wm.bind_bo = brw_wm_get_binding_table(brw); if (brw->wm.nr_surfaces != old_nr_surfaces) diff --git a/src/gallium/drivers/i965/intel_batchbuffer.h b/src/gallium/drivers/i965/intel_batchbuffer.h index a595d2e0c5..be04656aec 100644 --- a/src/gallium/drivers/i965/intel_batchbuffer.h +++ b/src/gallium/drivers/i965/intel_batchbuffer.h @@ -1,9 +1,6 @@ #ifndef INTEL_BATCHBUFFER_H #define INTEL_BATCHBUFFER_H -#include "main/mtypes.h" - -#include "intel_context.h" #include "intel_bufmgr.h" #include "intel_reg.h" @@ -44,7 +41,7 @@ struct intel_batchbuffer { struct intel_context *intel; - dri_bo *buf; + struct brw_winsys_buffer *buf; GLubyte *buffer; @@ -89,7 +86,7 @@ void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, GLuint bytes); GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, - dri_bo *buffer, + struct brw_winsys_buffer *buffer, uint32_t read_domains, uint32_t write_domain, uint32_t offset); diff --git a/src/gallium/drivers/i965/intel_tex_format.c b/src/gallium/drivers/i965/intel_tex_format.c index 3322a71130..c62ecdadf0 100644 --- a/src/gallium/drivers/i965/intel_tex_format.c +++ b/src/gallium/drivers/i965/intel_tex_format.c @@ -1,206 +1,9 @@ #include "intel_context.h" #include "intel_tex.h" #include "intel_chipset.h" -#include "main/texformat.h" -#include "main/enums.h" -/** - * Choose hardware texture format given the user's glTexImage parameters. - * - * It works out that this function is fine for all the supported - * hardware. However, there is still a need to map the formats onto - * hardware descriptors. - * - * Note that the i915 can actually support many more formats than - * these if we take the step of simply swizzling the colors - * immediately after sampling... - */ -const struct gl_texture_format * -intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, - GLenum format, GLenum type) -{ - struct intel_context *intel = intel_context(ctx); - const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24); - -#if 0 - printf("%s intFmt=0x%x format=0x%x type=0x%x\n", - __FUNCTION__, internalFormat, format, type); -#endif - - switch (internalFormat) { - case 4: - case GL_RGBA: - case GL_COMPRESSED_RGBA: - if (format == GL_BGRA) { - if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) { - return &_mesa_texformat_argb8888; - } - else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) { - return &_mesa_texformat_argb4444; - } - else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) { - return &_mesa_texformat_argb1555; - } - } - return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444; - - case 3: - case GL_RGB: - case GL_COMPRESSED_RGB: - if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) { - return &_mesa_texformat_rgb565; - } - return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565; - - case GL_RGBA8: - case GL_RGB10_A2: - case GL_RGBA12: - case GL_RGBA16: - return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444; - - case GL_RGBA4: - case GL_RGBA2: - return &_mesa_texformat_argb4444; - - case GL_RGB5_A1: - return &_mesa_texformat_argb1555; - - case GL_RGB8: - case GL_RGB10: - case GL_RGB12: - case GL_RGB16: - return &_mesa_texformat_argb8888; - - case GL_RGB5: - case GL_RGB4: - case GL_R3_G3_B2: - return &_mesa_texformat_rgb565; - - case GL_ALPHA: - case GL_ALPHA4: - case GL_ALPHA8: - case GL_ALPHA12: - case GL_ALPHA16: - case GL_COMPRESSED_ALPHA: - return &_mesa_texformat_a8; - - case 1: - case GL_LUMINANCE: - case GL_LUMINANCE4: - case GL_LUMINANCE8: - case GL_LUMINANCE12: - case GL_LUMINANCE16: - case GL_COMPRESSED_LUMINANCE: - return &_mesa_texformat_l8; - - case 2: - case GL_LUMINANCE_ALPHA: - case GL_LUMINANCE4_ALPHA4: - case GL_LUMINANCE6_ALPHA2: - case GL_LUMINANCE8_ALPHA8: - case GL_LUMINANCE12_ALPHA4: - case GL_LUMINANCE12_ALPHA12: - case GL_LUMINANCE16_ALPHA16: - case GL_COMPRESSED_LUMINANCE_ALPHA: - return &_mesa_texformat_al88; - - case GL_INTENSITY: - case GL_INTENSITY4: - case GL_INTENSITY8: - case GL_INTENSITY12: - case GL_INTENSITY16: - case GL_COMPRESSED_INTENSITY: - return &_mesa_texformat_i8; - case GL_YCBCR_MESA: - if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE) - return &_mesa_texformat_ycbcr; - else - return &_mesa_texformat_ycbcr_rev; - - case GL_COMPRESSED_RGB_FXT1_3DFX: - return &_mesa_texformat_rgb_fxt1; - case GL_COMPRESSED_RGBA_FXT1_3DFX: - return &_mesa_texformat_rgba_fxt1; - - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return &_mesa_texformat_rgb_dxt1; - - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - return &_mesa_texformat_rgba_dxt1; - - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - return &_mesa_texformat_rgba_dxt3; - - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - return &_mesa_texformat_rgba_dxt5; - - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT16: - case GL_DEPTH_COMPONENT24: - case GL_DEPTH_COMPONENT32: -#if 0 - return &_mesa_texformat_z16; -#else - /* fall-through. - * 16bpp depth texture can't be paired with a stencil buffer so - * always used combined depth/stencil format. - */ -#endif - case GL_DEPTH_STENCIL_EXT: - case GL_DEPTH24_STENCIL8_EXT: - return &_mesa_texformat_s8_z24; - -#ifndef I915 - case GL_SRGB_EXT: - case GL_SRGB8_EXT: - case GL_SRGB_ALPHA_EXT: - case GL_SRGB8_ALPHA8_EXT: - case GL_COMPRESSED_SRGB_EXT: - case GL_COMPRESSED_SRGB_ALPHA_EXT: - case GL_COMPRESSED_SLUMINANCE_EXT: - case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT: - return &_mesa_texformat_sargb8; - case GL_SLUMINANCE_EXT: - case GL_SLUMINANCE8_EXT: - if (IS_G4X(intel->intelScreen->deviceID)) - return &_mesa_texformat_sl8; - else - return &_mesa_texformat_sargb8; - case GL_SLUMINANCE_ALPHA_EXT: - case GL_SLUMINANCE8_ALPHA8_EXT: - if (IS_G4X(intel->intelScreen->deviceID)) - return &_mesa_texformat_sla8; - else - return &_mesa_texformat_sargb8; - case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: - return &_mesa_texformat_srgb_dxt1; - - /* i915 could also do this */ - case GL_DUDV_ATI: - case GL_DU8DV8_ATI: - return &_mesa_texformat_dudv8; - case GL_RGBA_SNORM: - case GL_RGBA8_SNORM: - return &_mesa_texformat_signed_rgba8888_rev; -#endif - - default: - fprintf(stderr, "unexpected texture format %s in %s\n", - _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__); - return NULL; - } - - return NULL; /* never get here */ -} int intel_compressed_num_bytes(GLuint mesaFormat) { diff --git a/src/gallium/drivers/i965/intel_tex_layout.c b/src/gallium/drivers/i965/intel_tex_layout.c index 7d69ea4484..1cdab49e5e 100644 --- a/src/gallium/drivers/i965/intel_tex_layout.c +++ b/src/gallium/drivers/i965/intel_tex_layout.c @@ -33,7 +33,6 @@ #include "intel_mipmap_tree.h" #include "intel_tex_layout.h" #include "intel_context.h" -#include "main/macros.h" void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h) { @@ -86,7 +85,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, * constraints of mipmap placement push the right edge of the * 2nd mipmap out past the width of its parent. */ - if (mt->first_level != mt->last_level) { + if (mt->last_level) { GLuint mip1_width; if (mt->compressed) { @@ -108,7 +107,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch); mt->total_height = 0; - for ( level = mt->first_level ; level <= mt->last_level ; level++ ) { + for ( level = 0 ; level <= mt->last_level ; level++ ) { GLuint img_height; intel_miptree_set_level_info(mt, level, 1, x, y, width, @@ -127,7 +126,7 @@ void i945_miptree_layout_2d( struct intel_context *intel, /* Layout_below: step right after second mipmap. */ - if (level == mt->first_level + 1) { + if (level == 1) { x += ALIGN(width, align_w); } else { -- cgit v1.2.3 From d71af266dfe01953f2545708e16a8eb799113abb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 24 Oct 2009 11:53:43 +0100 Subject: i965g: first compiling file --- src/gallium/drivers/i965/brw_cc.c | 96 ++++++++++++++++++++++--------- src/gallium/drivers/i965/brw_context.h | 30 ++++++++++ src/gallium/drivers/i965/brw_pipe_blend.c | 4 ++ src/gallium/drivers/i965/brw_state.h | 3 +- src/gallium/drivers/i965/brw_structs.h | 16 +++--- 5 files changed, 114 insertions(+), 35 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index af432b1f52..bf2743ebbe 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -34,15 +34,41 @@ #include "brw_state.h" #include "brw_defines.h" + +struct sane_viewport { + float top; + float left; + float width; + float height; + float near; + float far; +}; + +static void calc_sane_viewport( const struct pipe_viewport_state *vp, + struct sane_viewport *svp ) +{ + /* XXX fix me, obviously. + */ + svp->top = 0; + svp->left = 0; + svp->width = 250; + svp->height = 250; + svp->near = 0; + svp->far = 1; +} + static void prepare_cc_vp( struct brw_context *brw ) { struct brw_cc_viewport ccv; + struct sane_viewport svp; memset(&ccv, 0, sizeof(ccv)); - /* _NEW_VIEWPORT */ - ccv.min_depth = ctx->Viewport.Near; - ccv.max_depth = ctx->Viewport.Far; + /* PIPE_NEW_VIEWPORT */ + calc_sane_viewport( &brw->vp, &svp ); + + ccv.min_depth = svp.near; + ccv.max_depth = svp.far; brw->sws->bo_unreference(brw->cc.vp_bo); brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); @@ -58,21 +84,38 @@ const struct brw_tracked_state brw_cc_vp = { }; struct brw_cc_unit_key { - struct pipe_depth_stencil_alpha_state dsa; - struct pipe_blend_state blend; /* no color mask */ + struct brw_cc0 cc0; + struct brw_cc1 cc1; + struct brw_cc2 cc2; + struct brw_cc3 cc3; + struct brw_cc5 cc5; + struct brw_cc6 cc6; + struct brw_cc7 cc7; }; -static void -cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key) +/* A long-winded way to OR two unsigned integers together: + */ +static INLINE struct brw_cc3 +combine_cc3( struct brw_cc3 a, struct brw_cc3 b ) { - memset(key, 0, sizeof(*key)); - - key->dsa = brw->dsa; - key->blend = brw->blend; + union { struct brw_cc3 cc3; unsigned i; } ca, cb; + ca.cc3 = a; + cb.cc3 = b; + ca.i |= cb.i; + return ca.cc3; +} - /* Clear non-respected values: - */ - key->blend.colormask = 0xf; +static void +cc_unit_populate_key(const struct brw_context *brw, + struct brw_cc_unit_key *key) +{ + key->cc0 = brw->dsa->cc0; + key->cc1 = brw->dsa->cc1; + key->cc2 = brw->dsa->cc2; + key->cc3 = combine_cc3( brw->dsa->cc3, brw->blend->cc3 ); + key->cc5 = brw->blend->cc5; + key->cc6 = brw->blend->cc6; + key->cc7 = brw->blend->cc7; } /** @@ -86,16 +129,17 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) memset(&cc, 0, sizeof(cc)); - cc.cc0 = brw->dsa.cc0; - cc.cc1 = brw->dsa.cc1; - cc.cc2 = brw->dsa.cc2; - cc.cc3 = brw->dsa.cc3 | brw->blend.cc3; + cc.cc0 = key->cc0; + cc.cc1 = key->cc1; + cc.cc2 = key->cc2; + cc.cc3 = key->cc3; /* CACHE_NEW_CC_VP */ cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */ - cc.cc5 = brw->blend.cc5 | brw->debug.cc5; - + cc.cc5 = key->cc5; + cc.cc6 = key->cc6; + cc.cc7 = key->cc7; bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, key, sizeof(*key), @@ -104,12 +148,12 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) NULL, NULL); /* Emit CC viewport relocation */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, - 0, - 0, - offsetof(struct brw_cc_unit_state, cc4), - brw->cc.vp_bo); + brw->sws->bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, + 0, + offsetof(struct brw_cc_unit_state, cc4), + brw->cc.vp_bo); return bo; } diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 0fcb75a440..6699d3bdb6 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -119,6 +119,33 @@ struct brw_context; +struct brw_depth_stencil_alpha_state { + struct pipe_depth_stencil_alpha_state templ; /* for draw module */ + + /* Precalculated hardware state: + */ + struct brw_cc0 cc0; + struct brw_cc1 cc1; + struct brw_cc2 cc2; + struct brw_cc3 cc3; +}; + + +struct brw_blend_state { + struct pipe_depth_stencil_alpha_state templ; /* for draw module */ + + /* Precalculated hardware state: + */ + struct brw_cc3 cc3; + struct brw_cc5 cc5; + struct brw_cc6 cc6; + struct brw_cc7 cc7; +}; + + + + + #define PIPE_NEW_DEPTH_STENCIL_ALPHA 0x1 #define PIPE_NEW_RAST 0x2 #define PIPE_NEW_BLEND 0x2 @@ -440,6 +467,9 @@ struct brw_context const struct gl_vertex_program *vertex_program; const struct gl_fragment_program *fragment_program; struct pipe_framebuffer_state fb; + struct brw_depth_stencil_alpha_state *dsa; + struct brw_blend_state *blend; + struct pipe_viewport_state vp; struct { struct brw_state_flags dirty; diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c index b351794dce..17895d2782 100644 --- a/src/gallium/drivers/i965/brw_pipe_blend.c +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -1,4 +1,5 @@ + /* _NEW_COLOR */ if (key->logic_op != GL_COPY) { cc.cc2.logicop_enable = 1; @@ -39,3 +40,6 @@ cc.cc6.x_dither_offset = 0; } + if (INTEL_DEBUG & DEBUG_STATS) + cc.cc5.statistics_enable = 1; +} diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index a007d542d0..b716097bfc 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -34,11 +34,12 @@ #define BRW_STATE_H #include "brw_context.h" +#include "util/u_memory.h" static inline void brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo) { - assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos)); + assert(brw->state.validated_bo_count < Elements(brw->state.validated_bos)); if (bo != NULL) { brw->sws->bo_reference(bo); diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h index 27d264c3de..11372697f9 100644 --- a/src/gallium/drivers/i965/brw_structs.h +++ b/src/gallium/drivers/i965/brw_structs.h @@ -663,7 +663,7 @@ struct brw_clip_unit_state struct brw_cc_unit_state { - struct + struct brw_cc0 { GLuint pad0:3; GLuint bf_stencil_pass_depth_pass_op:3; @@ -681,7 +681,7 @@ struct brw_cc_unit_state } cc0; - struct + struct brw_cc1 { GLuint bf_stencil_ref:8; GLuint stencil_write_mask:8; @@ -690,7 +690,7 @@ struct brw_cc_unit_state } cc1; - struct + struct brw_cc2 { GLuint logicop_enable:1; GLuint pad0:10; @@ -702,7 +702,7 @@ struct brw_cc_unit_state } cc2; - struct + struct brw_cc3 { GLuint pad0:8; GLuint alpha_test_func:3; @@ -714,13 +714,13 @@ struct brw_cc_unit_state GLuint pad2:16; } cc3; - struct + struct brw_cc4 { GLuint pad0:5; GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ } cc4; - struct + struct brw_cc5 { GLuint pad0:2; GLuint ia_dest_blend_factor:5; @@ -732,7 +732,7 @@ struct brw_cc_unit_state GLuint dither_enable:1; } cc5; - struct + struct brw_cc6 { GLuint clamp_post_alpha_blend:1; GLuint clamp_pre_alpha_blend:1; @@ -745,7 +745,7 @@ struct brw_cc_unit_state GLuint blend_function:3; } cc6; - struct { + struct brw_cc7 { union { GLfloat f; GLubyte ub[4]; -- cgit v1.2.3 From 074606a806df755ecbb84e0a1182c66fd0b2a8dd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 24 Oct 2009 13:18:34 +0100 Subject: i965g: more files compiling --- src/gallium/drivers/i965/brw_batchbuffer.h | 124 ++++++++++++ src/gallium/drivers/i965/brw_cc.c | 16 +- src/gallium/drivers/i965/brw_clip.c | 80 +++----- src/gallium/drivers/i965/brw_clip.h | 7 +- src/gallium/drivers/i965/brw_clip_unfilled.c | 2 +- src/gallium/drivers/i965/brw_clip_util.c | 2 +- src/gallium/drivers/i965/brw_context.c | 2 +- src/gallium/drivers/i965/brw_context.h | 89 ++++----- src/gallium/drivers/i965/brw_curbe.c | 10 +- src/gallium/drivers/i965/brw_defines.h | 4 +- src/gallium/drivers/i965/brw_draw.c | 12 +- src/gallium/drivers/i965/brw_draw_upload.c | 2 +- src/gallium/drivers/i965/brw_eu.h | 32 +++- src/gallium/drivers/i965/brw_eu_emit.c | 4 +- src/gallium/drivers/i965/brw_gs.c | 2 +- src/gallium/drivers/i965/brw_gs_emit.c | 2 +- src/gallium/drivers/i965/brw_misc_state.c | 2 +- src/gallium/drivers/i965/brw_pipe_flush.c | 2 +- src/gallium/drivers/i965/brw_pipe_query.c | 4 +- src/gallium/drivers/i965/brw_pipe_rast.c | 46 +++++ src/gallium/drivers/i965/brw_pipe_rast.h | 14 ++ src/gallium/drivers/i965/brw_pipe_shader.c | 159 ++++++++++++++++ src/gallium/drivers/i965/brw_reg.h | 79 ++++++++ src/gallium/drivers/i965/brw_screen.h | 78 ++++++++ src/gallium/drivers/i965/brw_screen_surface.c | 4 +- src/gallium/drivers/i965/brw_sf.c | 2 +- src/gallium/drivers/i965/brw_sf.h | 1 - src/gallium/drivers/i965/brw_sf_emit.c | 2 +- src/gallium/drivers/i965/brw_state.h | 2 +- src/gallium/drivers/i965/brw_state_batch.c | 6 +- src/gallium/drivers/i965/brw_state_cache.c | 2 +- src/gallium/drivers/i965/brw_state_upload.c | 2 +- src/gallium/drivers/i965/brw_tex_layout.c | 2 +- src/gallium/drivers/i965/brw_urb.c | 2 +- src/gallium/drivers/i965/brw_util.h | 5 +- src/gallium/drivers/i965/brw_vs.c | 3 +- src/gallium/drivers/i965/brw_vs.h | 1 - src/gallium/drivers/i965/brw_vs_emit.c | 82 ++++---- src/gallium/drivers/i965/brw_winsys.h | 243 ++++++++++++++++++++++++ src/gallium/drivers/i965/brw_wm.h | 1 - src/gallium/drivers/i965/brw_wm_debug.c | 2 +- src/gallium/drivers/i965/brw_wm_emit.c | 84 ++++---- src/gallium/drivers/i965/brw_wm_fp.c | 60 +++--- src/gallium/drivers/i965/brw_wm_pass0.c | 1 - src/gallium/drivers/i965/brw_wm_pass1.c | 68 +++---- src/gallium/drivers/i965/brw_wm_surface_state.c | 2 +- src/gallium/drivers/i965/intel_batchbuffer.h | 168 ---------------- 47 files changed, 1027 insertions(+), 492 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_batchbuffer.h create mode 100644 src/gallium/drivers/i965/brw_pipe_rast.c create mode 100644 src/gallium/drivers/i965/brw_pipe_rast.h create mode 100644 src/gallium/drivers/i965/brw_pipe_shader.c create mode 100644 src/gallium/drivers/i965/brw_reg.h create mode 100644 src/gallium/drivers/i965/brw_screen.h create mode 100644 src/gallium/drivers/i965/brw_winsys.h delete mode 100644 src/gallium/drivers/i965/intel_batchbuffer.h (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h new file mode 100644 index 0000000000..76b3c1bf69 --- /dev/null +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -0,0 +1,124 @@ +#ifndef BRW_BATCHBUFFER_H +#define BRW_BATCHBUFFER_H + +#include "brw_types.h" +#include "brw_winsys.h" +#include "brw_reg.h" + +#define BATCH_SZ 16384 +#define BATCH_RESERVED 16 + +/* All ignored: + */ +enum cliprect_mode { + IGNORE_CLIPRECTS, + LOOP_CLIPRECTS, + NO_LOOP_CLIPRECTS, + REFERENCES_CLIPRECTS +}; + +void brw_batchbuffer_free(struct brw_batchbuffer *batch); + +void _brw_batchbuffer_flush(struct brw_batchbuffer *batch, + const char *file, int line); + +#define brw_batchbuffer_flush(batch) \ + _brw_batchbuffer_flush(batch, __FILE__, __LINE__) + +void brw_batchbuffer_reset(struct brw_batchbuffer *batch); + + +/* Unlike bmBufferData, this currently requires the buffer be mapped. + * Consider it a convenience function wrapping multple + * intel_buffer_dword() calls. + */ +void brw_batchbuffer_data(struct brw_batchbuffer *batch, + const void *data, GLuint bytes, + enum cliprect_mode cliprect_mode); + +void brw_batchbuffer_release_space(struct brw_batchbuffer *batch, + GLuint bytes); + +GLboolean brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, + struct brw_winsys_buffer *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); + +/* Inline functions - might actually be better off with these + * non-inlined. Certainly better off switching all command packets to + * be passed as structs rather than dwords, but that's a little bit of + * work... + */ +static INLINE GLint +brw_batchbuffer_space(struct brw_batchbuffer *batch) +{ + return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); +} + + +static INLINE void +brw_batchbuffer_emit_dword(struct brw_batchbuffer *batch, GLuint dword) +{ + assert(batch->map); + assert(brw_batchbuffer_space(batch) >= 4); + *(GLuint *) (batch->ptr) = dword; + batch->ptr += 4; +} + +static INLINE boolean +brw_batchbuffer_require_space(struct brw_batchbuffer *batch, + GLuint sz, + enum cliprect_mode cliprect_mode) +{ + assert(sz < batch->size - 8); + if (brw_batchbuffer_space(batch) < sz) { + assert(0); + return FALSE; + } + + /* All commands should be executed once regardless of cliprect + * mode. + */ + (void)cliprect_mode; +} + +/* Here are the crusty old macros, to be removed: + */ +#define BATCH_LOCALS + +#define BEGIN_BATCH(n, cliprect_mode) do { \ + brw_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \ + assert(intel->batch->emit.start_ptr == NULL); \ + intel->batch->emit.total = (n) * 4; \ + intel->batch->emit.start_ptr = intel->batch->ptr; \ +} while (0) + +#define OUT_BATCH(d) brw_batchbuffer_emit_dword(intel->batch, d) + +#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ + assert((unsigned) (delta) < buf->size); \ + brw_batchbuffer_emit_reloc(intel->batch, buf, \ + read_domains, write_domain, delta); \ +} while (0) + +#define ADVANCE_BATCH() do { \ + unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \ + assert(intel->batch->emit.start_ptr != NULL); \ + if (_n != intel->batch->emit.total) { \ + fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", \ + _n, intel->batch->emit.total); \ + abort(); \ + } \ + intel->batch->emit.start_ptr = NULL; \ +} while(0) + + +static INLINE void +brw_batchbuffer_emit_mi_flush(struct brw_batchbuffer *batch) +{ + brw_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS); + brw_batchbuffer_emit_dword(batch, MI_FLUSH); +} + +#endif diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index bf2743ebbe..c8e7851d75 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -65,7 +65,7 @@ static void prepare_cc_vp( struct brw_context *brw ) memset(&ccv, 0, sizeof(ccv)); /* PIPE_NEW_VIEWPORT */ - calc_sane_viewport( &brw->vp, &svp ); + calc_sane_viewport( &brw->curr.vp, &svp ); ccv.min_depth = svp.near; ccv.max_depth = svp.far; @@ -109,13 +109,13 @@ static void cc_unit_populate_key(const struct brw_context *brw, struct brw_cc_unit_key *key) { - key->cc0 = brw->dsa->cc0; - key->cc1 = brw->dsa->cc1; - key->cc2 = brw->dsa->cc2; - key->cc3 = combine_cc3( brw->dsa->cc3, brw->blend->cc3 ); - key->cc5 = brw->blend->cc5; - key->cc6 = brw->blend->cc6; - key->cc7 = brw->blend->cc7; + key->cc0 = brw->curr.dsa->cc0; + key->cc1 = brw->curr.dsa->cc1; + key->cc2 = brw->curr.dsa->cc2; + key->cc3 = combine_cc3( brw->curr.dsa->cc3, brw->curr.blend->cc3 ); + key->cc5 = brw->curr.blend->cc5; + key->cc6 = brw->curr.blend->cc6; + key->cc7 = brw->curr.blend->cc7; } /** diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index d82ebeb9a9..591e904705 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -33,13 +33,14 @@ #include "util/u_math.h" -#include "intel_batchbuffer.h" - +#include "brw_screen.h" +#include "brw_batchbuffer.h" #include "brw_defines.h" #include "brw_context.h" #include "brw_eu.h" #include "brw_util.h" #include "brw_state.h" +#include "brw_pipe_rast.h" #include "brw_clip.h" @@ -77,13 +78,16 @@ static void compile_clip_prog( struct brw_context *brw, else delta = REG_SIZE; - for (i = 0; i < VERT_RESULT_MAX; i++) - if (c.key.attrs & (1<curr.rast->clip_key, sizeof key); + /* BRW_NEW_REDUCED_PRIMITIVE */ key.primitive = brw->reduced_primitive; - /* CACHE_NEW_VS_PROG */ - key.attrs = brw->vs.prog_data->outputs_written; - /* PIPE_NEW_RAST */ - key.do_flat_shading = brw->rast.base.flatshade; - /* PIPE_NEW_UCP */ - key.nr_userclip = brw->nr_ucp; - if (BRW_IS_IGDNG(brw)) - key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; - else - key.clip_mode = BRW_CLIPMODE_NORMAL; + /* PIPE_NEW_VS */ + key.nr_attrs = brw->curr.vs->info.file_max[TGSI_FILE_OUTPUT] + 1; - /* PIPE_NEW_RAST */ - if (key.primitive == PIPE_PRIM_TRIANGLES) { - if (brw->rast->cull_mode = PIPE_WINDING_BOTH) - key.clip_mode = BRW_CLIPMODE_REJECT_ALL; - else { - key.fill_ccw = CLIP_CULL; - key.fill_cw = CLIP_CULL; - - if (!(brw->rast->cull_mode & PIPE_WINDING_CCW)) { - key.fill_ccw = translate_fill(brw->rast.fill_ccw); - } - - if (!(brw->rast->cull_mode & PIPE_WINDING_CW)) { - key.fill_cw = translate_fill(brw->rast.fill_cw); - } - - if (key.fill_cw != CLIP_FILL || - key.fill_ccw != CLIP_FILL) { - key.do_unfilled = 1; - key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; - } - - key.offset_ccw = brw->rast.offset_ccw; - key.offset_cw = brw->rast.offset_cw; - - if (brw->rast.light_twoside && - key.fill_cw != CLIP_CULL) - key.copy_bfc_cw = 1; - - if (brw->rast.light_twoside && - key.fill_ccw != CLIP_CULL) - key.copy_bfc_ccw = 1; - } - } - } + /* PIPE_NEW_CLIP */ + key.nr_userclip = brw->curr.ucp.nr; brw->sws->bo_unreference(brw->clip.prog_bo); brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG, @@ -212,7 +178,7 @@ static void upload_clip_prog(struct brw_context *brw) const struct brw_tracked_state brw_clip_prog = { .dirty = { .mesa = (PIPE_NEW_RAST | - PIPE_NEW_UCP), + PIPE_NEW_CLIP), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index d80ec819b9..cfe51bf292 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -42,8 +42,7 @@ * up polygon offset and flatshading at this point: */ struct brw_clip_prog_key { - GLuint attrs:32; - + GLuint nr_attrs:5; GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; @@ -55,7 +54,7 @@ struct brw_clip_prog_key { GLuint copy_bfc_cw:1; GLuint copy_bfc_ccw:1; GLuint clip_mode:3; - GLuint pad1:12; + GLuint pad1:7; GLfloat offset_factor; GLfloat offset_units; @@ -117,7 +116,7 @@ struct brw_clip_compile { GLuint last_mrf; GLuint header_position_offset; - GLuint offset[VERT_ATTRIB_MAX]; + GLuint offset[PIPE_MAX_SHADER_OUTPUTS]; GLboolean need_ff_sync; }; diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c index 4baff55806..8501599aef 100644 --- a/src/gallium/drivers/i965/brw_clip_unfilled.c +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -29,7 +29,7 @@ * Keith Whitwell */ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "brw_defines.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 7a6c46ce07..60bfd3538e 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -93,7 +93,7 @@ void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) /* value.xyz *= value.rhw */ brw_set_access_mode(p, BRW_ALIGN_16); - brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); + brw_MUL(p, brw_writemask(pos, BRW_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); brw_set_access_mode(p, BRW_ALIGN_1); } diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index 063ada5772..07a5420d6e 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -38,7 +38,7 @@ #include "brw_state.h" #include "brw_vs.h" #include "brw_screen_tex.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 6699d3bdb6..3a2fece45c 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -36,6 +36,8 @@ #include "brw_structs.h" #include "brw_winsys.h" #include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "tgsi/tgsi_scan.h" /* Glossary: @@ -143,6 +145,27 @@ struct brw_blend_state { }; +struct brw_rasterizer_state; + + +struct brw_vertex_shader { + const struct tgsi_token *tokens; + struct tgsi_shader_info info; + + struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; +}; + + +struct brw_fragment_shader { + const struct tgsi_token *tokens; + struct tgsi_shader_info info; + + GLboolean isGLSL; + + struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ + GLboolean use_const_buffer; +}; @@ -157,6 +180,7 @@ struct brw_blend_state { #define PIPE_NEW_VERTEX_SHADER 0x2 #define PIPE_NEW_FRAGMENT_CONSTS 0x2 #define PIPE_NEW_VERTEX_CONSTS 0x2 +#define PIPE_NEW_CLIP 0x2 #define BRW_NEW_URB_FENCE 0x1 @@ -196,25 +220,6 @@ struct brw_state_flags { }; -struct brw_vertex_program { - const struct tgsi_token *tokens; - GLuint id; - struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ - GLboolean use_const_buffer; -}; - - -/** Subclass of Mesa fragment program */ -struct brw_fragment_program { - const struct tgsi_token *tokens; - - GLuint id; /**< serial no. to identify frag progs, never re-used */ - GLboolean isGLSL; /**< any IF/LOOP/CONT/BREAK instructions */ - - struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ - GLboolean use_const_buffer; -}; - /* Data about a particular attempt to compile a program. Note that * there can be many of these, each in a different GL state @@ -452,24 +457,29 @@ struct brw_query_object { */ struct brw_context { - struct pipe_context *pipe; - struct pipe_screen *screen; - + struct pipe_context pipe; + + struct brw_screen *brw_screen; struct brw_winsys_screen *sws; GLuint primitive; + GLuint reduced_primitive; GLboolean emit_state_always; GLboolean no_batch_wrap; /* Active vertex program: */ - const struct gl_vertex_program *vertex_program; - const struct gl_fragment_program *fragment_program; - struct pipe_framebuffer_state fb; - struct brw_depth_stencil_alpha_state *dsa; - struct brw_blend_state *blend; - struct pipe_viewport_state vp; + struct { + const struct brw_vertex_shader *vs; + const struct brw_fragment_shader *fs; + const struct brw_blend_state *blend; + const struct brw_rasterizer_state *rast; + const struct brw_depth_stencil_alpha_state *dsa; + struct pipe_framebuffer_state fb; + struct pipe_viewport_state vp; + struct pipe_clip_state ucp; + } curr; struct { struct brw_state_flags dirty; @@ -719,29 +729,6 @@ brw_context( struct pipe_context *ctx ) return (struct brw_context *)ctx; } -static INLINE struct brw_vertex_program * -brw_vertex_program(struct gl_vertex_program *p) -{ - return (struct brw_vertex_program *) p; -} - -static INLINE const struct brw_vertex_program * -brw_vertex_program_const(const struct gl_vertex_program *p) -{ - return (const struct brw_vertex_program *) p; -} - -static INLINE struct brw_fragment_program * -brw_fragment_program(struct gl_fragment_program *p) -{ - return (struct brw_fragment_program *) p; -} - -static INLINE const struct brw_fragment_program * -brw_fragment_program_const(const struct gl_fragment_program *p) -{ - return (const struct brw_fragment_program *) p; -} diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 33ea9a00f7..f2524d75e2 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -30,7 +30,7 @@ */ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "intel_regions.h" #include "brw_context.h" #include "brw_defines.h" @@ -55,8 +55,8 @@ static void calculate_curbe_offsets( struct brw_context *brw ) GLuint nr_clip_regs = 0; GLuint total_regs; - /* PIPE_NEW_UCP */ - if (brw->nr_ucp) { + /* PIPE_NEW_CLIP */ + if (brw->curr.ucp.nr) { GLuint nr_planes = 6 + brw->nr_ucp; nr_clip_regs = (nr_planes * 4 + 15) / 16; } @@ -106,7 +106,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) const struct brw_tracked_state brw_curbe_offsets = { .dirty = { - .mesa = PIPE_NEW_UCP, + .mesa = PIPE_NEW_CLIP, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = CACHE_NEW_WM_PROG }, @@ -327,7 +327,7 @@ const struct brw_tracked_state brw_constant_buffer = { .dirty = { .mesa = (PIPE_NEW_FS_CONSTANTS | PIPE_NEW_VS_CONSTANTS | - PIPE_NEW_UCP), + PIPE_NEW_CLIP), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h index 282c5b18f4..1dc64ddc8f 100644 --- a/src/gallium/drivers/i965/brw_defines.h +++ b/src/gallium/drivers/i965/brw_defines.h @@ -840,8 +840,8 @@ #include "intel_chipset.h" -#define BRW_IS_G4X(brw) (IS_G4X((brw)->brw_screen->deviceID)) -#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->brw_screen->deviceID)) +#define BRW_IS_G4X(brw) (IS_G4X((brw)->brw_screen->pci_id)) +#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->brw_screen->pci_id)) #define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) #define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) #define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 856999f3ef..741537309a 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -31,7 +31,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "intel_buffer_objects.h" #define FILE_DEBUG_FLAG DEBUG_BATCH @@ -133,7 +133,7 @@ static void brw_emit_prim(struct brw_context *brw, ADVANCE_BATCH(); } if (prim_packet.verts_per_instance) { - intel_batchbuffer_data( brw->intel.batch, &prim_packet, + brw_batchbuffer_data( brw->intel.batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); } if (intel->always_flush_cache) { @@ -224,7 +224,7 @@ static GLboolean brw_try_draw_prims( struct brw_context *brw, return ret; if (intel->always_flush_batch) - intel_batchbuffer_flush(intel->batch); + brw_batchbuffer_flush(intel->batch); return 0; } @@ -249,12 +249,10 @@ void brw_draw_prims( struct brw_context *brw, */ ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); - /* Otherwise, we really are out of memory. Pass the drawing - * command to the software tnl module and which will in turn call - * swrast to do the drawing. + /* Otherwise, flush and retry: */ if (ret != 0) { - intel_batchbuffer_flush(intel->batch); + brw_batchbuffer_flush(intel->batch); ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); assert(ret == 0); } diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index dce015d79f..1ab65d60c4 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -35,7 +35,7 @@ #include "brw_state.h" #include "brw_fallback.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "intel_buffer_objects.h" #include "intel_tex.h" diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h index 30603bdd0e..46d52a473b 100644 --- a/src/gallium/drivers/i965/brw_eu.h +++ b/src/gallium/drivers/i965/brw_eu.h @@ -35,7 +35,6 @@ #include "brw_structs.h" #include "brw_defines.h" -#include "shader/prog_instruction.h" #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) @@ -45,6 +44,23 @@ #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) +#define BRW_WRITEMASK_NONE 0x00 +#define BRW_WRITEMASK_X 0x01 +#define BRW_WRITEMASK_Y 0x02 +#define BRW_WRITEMASK_XY 0x03 +#define BRW_WRITEMASK_Z 0x04 +#define BRW_WRITEMASK_XZ 0x05 +#define BRW_WRITEMASK_YZ 0x06 +#define BRW_WRITEMASK_XYZ 0x07 +#define BRW_WRITEMASK_W 0x08 +#define BRW_WRITEMASK_XW 0x09 +#define BRW_WRITEMASK_YW 0x0A +#define BRW_WRITEMASK_XYW 0x0B +#define BRW_WRITEMASK_ZW 0x0C +#define BRW_WRITEMASK_XZW 0x0D +#define BRW_WRITEMASK_YZW 0x0E +#define BRW_WRITEMASK_XYZW 0x0F + #define REG_SIZE (8*4) @@ -157,7 +173,7 @@ static INLINE int type_sz( GLuint type ) * \param width one of BRW_WIDTH_x * \param hstride one of BRW_HORIZONTAL_STRIDE_x * \param swizzle one of BRW_SWIZZLE_x - * \param writemask WRITEMASK_X/Y/Z/W bitfield + * \param writemask BRW_WRITEMASK_X/Y/Z/W bitfield */ static INLINE struct brw_reg brw_reg( GLuint file, GLuint nr, @@ -215,7 +231,7 @@ static INLINE struct brw_reg brw_vec16_reg( GLuint file, BRW_WIDTH_16, BRW_HORIZONTAL_STRIDE_1, BRW_SWIZZLE_XYZW, - WRITEMASK_XYZW); + BRW_WRITEMASK_XYZW); } /** Construct float[8] register */ @@ -231,7 +247,7 @@ static INLINE struct brw_reg brw_vec8_reg( GLuint file, BRW_WIDTH_8, BRW_HORIZONTAL_STRIDE_1, BRW_SWIZZLE_XYZW, - WRITEMASK_XYZW); + BRW_WRITEMASK_XYZW); } /** Construct float[4] register */ @@ -247,7 +263,7 @@ static INLINE struct brw_reg brw_vec4_reg( GLuint file, BRW_WIDTH_4, BRW_HORIZONTAL_STRIDE_1, BRW_SWIZZLE_XYZW, - WRITEMASK_XYZW); + BRW_WRITEMASK_XYZW); } /** Construct float[2] register */ @@ -263,7 +279,7 @@ static INLINE struct brw_reg brw_vec2_reg( GLuint file, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_1, BRW_SWIZZLE_XYXY, - WRITEMASK_XY); + BRW_WRITEMASK_XY); } /** Construct float[1] register */ @@ -279,7 +295,7 @@ static INLINE struct brw_reg brw_vec1_reg( GLuint file, BRW_WIDTH_1, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_XXXX, - WRITEMASK_X); + BRW_WRITEMASK_X); } @@ -510,7 +526,7 @@ static INLINE struct brw_reg brw_ip_reg( void ) BRW_WIDTH_1, BRW_HORIZONTAL_STRIDE_0, BRW_SWIZZLE_XYZW, /* NOTE! */ - WRITEMASK_XYZW); /* NOTE! */ + BRW_WRITEMASK_XYZW); /* NOTE! */ } static INLINE struct brw_reg brw_acc_reg( void ) diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index 241cdc33f8..f6b8843e01 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -1276,7 +1276,7 @@ void brw_SAMPLE(struct brw_compile *p, * instruction, so that is a guide for whether a workaround is * needed. */ - if (writemask != WRITEMASK_XYZW) { + if (writemask != BRW_WRITEMASK_XYZW) { GLuint dst_offset = 0; GLuint i, newmask = 0, len = 0; @@ -1299,7 +1299,7 @@ void brw_SAMPLE(struct brw_compile *p, else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); - newmask = ~newmask & WRITEMASK_XYZW; + newmask = ~newmask & BRW_WRITEMASK_XYZW; brw_push_insn_state(p); diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 58930e7964..692ce46679 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -29,7 +29,7 @@ * Keith Whitwell */ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "brw_defines.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c index 9ec206d7e8..fd8e2acced 100644 --- a/src/gallium/drivers/i965/brw_gs_emit.c +++ b/src/gallium/drivers/i965/brw_gs_emit.c @@ -30,7 +30,7 @@ */ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "brw_defines.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index d33bf40a01..eb39be8545 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -31,7 +31,7 @@ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "intel_regions.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index d5b7bd3b83..e85a1a9c1b 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -1,6 +1,6 @@ /** - * called from intel_batchbuffer_flush and children before sending a + * called from brw_batchbuffer_flush and children before sending a * batchbuffer off. */ static void brw_finish_batch(struct intel_context *intel) diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c index 0b9ba0c0ed..55242ac6ad 100644 --- a/src/gallium/drivers/i965/brw_pipe_query.c +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -42,7 +42,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "intel_reg.h" /** Waits on the query object's BO and totals the results for this query */ @@ -122,7 +122,7 @@ brw_end_query(struct pipe_context *pipe, struct pipe_query *q) */ if (query->bo) { brw_emit_query_end(brw); - intel_batchbuffer_flush(brw->batch); + brw_batchbuffer_flush(brw->batch); brw->sws->bo_unreference(brw->query.bo); brw->query.bo = NULL; diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c new file mode 100644 index 0000000000..ff64dbd48d --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_rast.c @@ -0,0 +1,46 @@ + +static void +calculate_clip_key_rast() +{ + if (BRW_IS_IGDNG(brw)) + key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; + else + key.clip_mode = BRW_CLIPMODE_NORMAL; + + key.do_flat_shading = brw->rast->templ.flatshade; + + if (key.primitive == PIPE_PRIM_TRIANGLES) { + if (brw->rast->templ.cull_mode = PIPE_WINDING_BOTH) + key.clip_mode = BRW_CLIPMODE_REJECT_ALL; + else { + key.fill_ccw = CLIP_CULL; + key.fill_cw = CLIP_CULL; + + if (!(brw->rast->templ.cull_mode & PIPE_WINDING_CCW)) { + key.fill_ccw = translate_fill(brw->rast.fill_ccw); + } + + if (!(brw->rast->templ.cull_mode & PIPE_WINDING_CW)) { + key.fill_cw = translate_fill(brw->rast.fill_cw); + } + + if (key.fill_cw != CLIP_FILL || + key.fill_ccw != CLIP_FILL) { + key.do_unfilled = 1; + key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + } + + key.offset_ccw = brw->rast.templ.offset_ccw; + key.offset_cw = brw->rast.templ.offset_cw; + + if (brw->rast.templ.light_twoside && + key.fill_cw != CLIP_CULL) + key.copy_bfc_cw = 1; + + if (brw->rast.templ.light_twoside && + key.fill_ccw != CLIP_CULL) + key.copy_bfc_ccw = 1; + } + } + } +} diff --git a/src/gallium/drivers/i965/brw_pipe_rast.h b/src/gallium/drivers/i965/brw_pipe_rast.h new file mode 100644 index 0000000000..6ceaa1fb09 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_rast.h @@ -0,0 +1,14 @@ +#ifndef BRW_PIPE_RAST_H +#define BRW_PIPE_RAST_H + +#include "brw_clip.h" + +struct brw_rasterizer_state { + struct pipe_rasterizer_state templ; /* for draw module */ + + /* Precalculated hardware state: + */ + struct brw_clip_prog_key clip_key; +}; + +#endif diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c new file mode 100644 index 0000000000..fbb772d18c --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -0,0 +1,159 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "brw_context.h" +#include "brw_util.h" +#include "brw_wm.h" + +static void brwBindProgram( struct brw_context *brw, + GLenum target, + struct gl_program *prog ) +{ + struct brw_context *brw = brw_context(ctx); + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + break; + case GL_FRAGMENT_PROGRAM_ARB: + brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; + break; + } +} + +static struct gl_program *brwNewProgram( structg brw_context *brw, + GLenum target, + GLuint id ) +{ + struct brw_context *brw = brw_context(ctx); + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: { + struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program); + if (prog) { + prog->id = brw->program_id++; + + return _mesa_init_vertex_program( ctx, &prog->program, + target, id ); + } + else + return NULL; + } + + case GL_FRAGMENT_PROGRAM_ARB: { + struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program); + if (prog) { + prog->id = brw->program_id++; + + return _mesa_init_fragment_program( ctx, &prog->program, + target, id ); + } + else + return NULL; + } + + default: + return _mesa_new_program(ctx, target, id); + } +} + +static void brwDeleteProgram( struct brw_context *brw, + struct gl_program *prog ) +{ + if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; + struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); + brw->sws->bo_unreference(brw_fprog->const_buffer); + } + + _mesa_delete_program( ctx, prog ); +} + + +static GLboolean brwIsProgramNative( struct brw_context *brw, + GLenum target, + struct gl_program *prog ) +{ + return GL_TRUE; +} + +static void brwProgramStringNotify( struct brw_context *brw, + GLenum target, + struct gl_program *prog ) +{ + struct brw_context *brw = brw_context(ctx); + + if (target == GL_FRAGMENT_PROGRAM_ARB) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; + struct brw_fragment_program *newFP = brw_fragment_program(fprog); + const struct brw_fragment_program *curFP = + brw_fragment_program_const(brw->fragment_program); + + if (fprog->FogOption) { + _mesa_append_fog_code(ctx, fprog); + fprog->FogOption = GL_NONE; + } + + if (newFP == curFP) + brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; + newFP->id = brw->program_id++; + newFP->isGLSL = brw_wm_is_glsl(fprog); + } + else if (target == GL_VERTEX_PROGRAM_ARB) { + struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; + struct brw_vertex_program *newVP = brw_vertex_program(vprog); + const struct brw_vertex_program *curVP = + brw_vertex_program_const(brw->vertex_program); + + if (newVP == curVP) + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; + if (newVP->program.IsPositionInvariant) { + _mesa_insert_mvp_code(ctx, &newVP->program); + } + newVP->id = brw->program_id++; + + /* Also tell tnl about it: + */ + _tnl_program_string(ctx, target, prog); + } +} + +void brwInitFragProgFuncs( struct dd_function_table *functions ) +{ + assert(functions->ProgramStringNotify == _tnl_program_string); + + functions->BindProgram = brwBindProgram; + functions->NewProgram = brwNewProgram; + functions->DeleteProgram = brwDeleteProgram; + functions->IsProgramNative = brwIsProgramNative; + functions->ProgramStringNotify = brwProgramStringNotify; +} + diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h new file mode 100644 index 0000000000..a640104d71 --- /dev/null +++ b/src/gallium/drivers/i965/brw_reg.h @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_REG_H +#define BRW_REG_H + +#define CMD_MI (0x0 << 29) +#define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) + +#define MI_NOOP (CMD_MI | 0) +#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) +#define MI_FLUSH (CMD_MI | (4 << 23)) + +#define _3DSTATE_DRAWRECT_INFO_I965 (CMD_3D | (3 << 27) | (1 << 24) | 0x2) + +/** @{ + * + * PIPE_CONTROL operation, a combination MI_FLUSH and register write with + * additional flushing control. + */ +#define _3DSTATE_PIPE_CONTROL (CMD_3D | (3 << 27) | (2 << 24) | 2) +#define PIPE_CONTROL_NO_WRITE (0 << 14) +#define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14) +#define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14) +#define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14) +#define PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define PIPE_CONTROL_WRITE_FLUSH (1 << 12) +#define PIPE_CONTROL_INSTRUCTION_FLUSH (1 << 11) +#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) +#define PIPE_CONTROL_PPGTT_WRITE (0 << 2) +#define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2) + +/** @} */ + +#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6) +#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4) +#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6) + +/* BR00 */ +#define XY_BLT_WRITE_ALPHA (1 << 21) +#define XY_BLT_WRITE_RGB (1 << 20) +#define XY_SRC_TILED (1 << 15) +#define XY_DST_TILED (1 << 11) + +/* BR13 */ +#define BR13_565 (0x1 << 24) +#define BR13_8888 (0x3 << 24) + +#define FENCE_LINEAR 0 +#define FENCE_XMAJOR 1 +#define FENCE_YMAJOR 2 + + +#endif diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h new file mode 100644 index 0000000000..716b55c52b --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen.h @@ -0,0 +1,78 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_SCREEN_H +#define BRW_SCREEN_H + +#include "pipe/p_state.h" +#include "pipe/p_screen.h" + + +struct brw_winsys_screen; + + +/** + * Subclass of pipe_screen + */ +struct brw_screen +{ + struct pipe_screen base; + + struct brw_winsys_screen *sws; + + boolean is_i945; + uint pci_id; +}; + +/** + * Subclass of pipe_transfer + */ +struct brw_transfer +{ + struct pipe_transfer base; + + unsigned offset; +}; + + +/* + * Cast wrappers + */ +static INLINE struct brw_screen * +brw_screen(struct pipe_screen *pscreen) +{ + return (struct brw_screen *) pscreen; +} + +static INLINE struct brw_transfer * +brw_transfer(struct pipe_transfer *transfer) +{ + return (struct brw_transfer *)transfer; +} + + +#endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c index d199d0b81a..544be6a089 100644 --- a/src/gallium/drivers/i965/brw_screen_surface.c +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -1,6 +1,6 @@ /* _NEW_BUFFERS */ - if (IS_965(intel->intelScreen->deviceID) && - !IS_G4X(intel->intelScreen->deviceID)) { + if (IS_965(brw->brw_screen->pci_id) && + !IS_G4X(brw->brw_screen->pci_id)) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; struct intel_renderbuffer *irb = intel_renderbuffer(rb); diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index 0115f77c08..54202cbd12 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -30,7 +30,7 @@ */ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "brw_defines.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h index 26c2e8891a..c99116b8b1 100644 --- a/src/gallium/drivers/i965/brw_sf.h +++ b/src/gallium/drivers/i965/brw_sf.h @@ -34,7 +34,6 @@ #define BRW_SF_H -#include "shader/program.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index c98d7ec13a..4acb2b7d72 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -30,7 +30,7 @@ */ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "brw_defines.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index b716097bfc..02657eaba7 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -157,7 +157,7 @@ void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer /*********************************************************************** * brw_state_batch.c */ -#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) +#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) GLboolean brw_cached_batch_struct( struct brw_context *brw, diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c index 9568794625..b285837070 100644 --- a/src/gallium/drivers/i965/brw_state_batch.c +++ b/src/gallium/drivers/i965/brw_state_batch.c @@ -32,7 +32,7 @@ #include "brw_state.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" @@ -47,7 +47,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, struct header *newheader = (struct header *)data; if (brw->emit_state_always) { - intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } @@ -74,7 +74,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, emit: memcpy(item->header, newheader, sz); - intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c index 91d0f80297..1b5f27cc16 100644 --- a/src/gallium/drivers/i965/brw_state_cache.c +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -57,7 +57,7 @@ */ #include "brw_state.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" /* XXX: Fixme - have to include these to get the sizes of the prog_key * structs: diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index b68b6cb21a..842380e38f 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -33,7 +33,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" /* This is used to initialize brw->state.atoms[]. We could use this * list directly except for a single atom, brw_constant_buffer, which diff --git a/src/gallium/drivers/i965/brw_tex_layout.c b/src/gallium/drivers/i965/brw_tex_layout.c index 75cdc18912..813cd31f49 100644 --- a/src/gallium/drivers/i965/brw_tex_layout.c +++ b/src/gallium/drivers/i965/brw_tex_layout.c @@ -47,7 +47,7 @@ GLboolean brw_miptree_layout(struct brw_context *brw, switch (mt->target) { case GL_TEXTURE_CUBE_MAP: - if (IS_IGDNG(intel->intelScreen->deviceID)) { + if (IS_IGDNG(brw->brw_screen->pci_id)) { GLuint align_h = 2, align_w = 4; GLuint level; GLuint x = 0; diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c index 8c6f4355a6..18d79c5ebb 100644 --- a/src/gallium/drivers/i965/brw_urb.c +++ b/src/gallium/drivers/i965/brw_urb.c @@ -31,7 +31,7 @@ -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_util.h b/src/gallium/drivers/i965/brw_util.h index 37c3acbc11..b5f9a36e7b 100644 --- a/src/gallium/drivers/i965/brw_util.h +++ b/src/gallium/drivers/i965/brw_util.h @@ -36,9 +36,8 @@ #include "brw_types.h" extern GLuint brw_count_bits( GLuint val ); -extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList); -extern GLuint brw_translate_blend_factor( GLenum factor ); -extern GLuint brw_translate_blend_equation( GLenum mode ); +extern GLuint brw_translate_blend_factor( unsigned factor ); +extern GLuint brw_translate_blend_equation( unsigned mode ); diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index 97e523c3ee..dcd687ac34 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -34,7 +34,6 @@ #include "brw_vs.h" #include "brw_util.h" #include "brw_state.h" -#include "shader/prog_print.h" @@ -113,7 +112,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = PIPE_NEW_UCP | PIPE_NEW_RAST, + .mesa = PIPE_NEW_CLIP | PIPE_NEW_RAST, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h index 4a591365c9..54f7d7d7c4 100644 --- a/src/gallium/drivers/i965/brw_vs.h +++ b/src/gallium/drivers/i965/brw_vs.h @@ -36,7 +36,6 @@ #include "brw_context.h" #include "brw_eu.h" -#include "shader/program.h" struct brw_vs_prog_key { diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 6adb743017..e946944295 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -192,7 +192,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) BRW_WIDTH_8, BRW_HORIZONTAL_STRIDE_1, BRW_SWIZZLE_XXXX, - WRITEMASK_X); + BRW_WRITEMASK_X); reg++; } @@ -487,7 +487,7 @@ static void emit_exp_noalias( struct brw_vs_compile *c, struct brw_compile *p = &c->func; - if (dst.dw1.bits.writemask & WRITEMASK_X) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_X) { struct brw_reg tmp = get_tmp(c); struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); @@ -499,23 +499,23 @@ static void emit_exp_noalias( struct brw_vs_compile *c, /* Adjust exponent for floating point: * exp += 127 */ - brw_ADD(p, brw_writemask(tmp_d, WRITEMASK_X), tmp_d, brw_imm_d(127)); + brw_ADD(p, brw_writemask(tmp_d, BRW_WRITEMASK_X), tmp_d, brw_imm_d(127)); /* Install exponent and sign. * Excess drops off the edge: */ - brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), WRITEMASK_X), + brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), BRW_WRITEMASK_X), tmp_d, brw_imm_d(23)); release_tmp(c, tmp); } - if (dst.dw1.bits.writemask & WRITEMASK_Y) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y) { /* result[1] = arg0.x - floor(arg0.x) */ - brw_FRC(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0, 0)); + brw_FRC(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0, 0)); } - if (dst.dw1.bits.writemask & WRITEMASK_Z) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) { /* As with the LOG instruction, we might be better off just * doing a taylor expansion here, seeing as we have to do all * the prep work. @@ -525,14 +525,14 @@ static void emit_exp_noalias( struct brw_vs_compile *c, */ emit_math1(c, BRW_MATH_FUNCTION_EXP, - brw_writemask(dst, WRITEMASK_Z), + brw_writemask(dst, BRW_WRITEMASK_Z), brw_swizzle1(arg0, 0), BRW_MATH_PRECISION_FULL); } - if (dst.dw1.bits.writemask & WRITEMASK_W) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) { /* result[3] = 1.0; */ - brw_MOV(p, brw_writemask(dst, WRITEMASK_W), brw_imm_f(1)); + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), brw_imm_f(1)); } } @@ -562,36 +562,36 @@ static void emit_log_noalias( struct brw_vs_compile *c, * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 * result[1].i = (x.i & ((1<<23)-1) + (127<<23) */ - if (dst.dw1.bits.writemask & WRITEMASK_XZ) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_XZ) { brw_AND(p, - brw_writemask(tmp_ud, WRITEMASK_X), + brw_writemask(tmp_ud, BRW_WRITEMASK_X), brw_swizzle1(arg0_ud, 0), brw_imm_ud((1U<<31)-1)); brw_SHR(p, - brw_writemask(tmp_ud, WRITEMASK_X), + brw_writemask(tmp_ud, BRW_WRITEMASK_X), tmp_ud, brw_imm_ud(23)); brw_ADD(p, - brw_writemask(tmp, WRITEMASK_X), + brw_writemask(tmp, BRW_WRITEMASK_X), retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ brw_imm_d(-127)); } - if (dst.dw1.bits.writemask & WRITEMASK_YZ) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_YZ) { brw_AND(p, - brw_writemask(tmp_ud, WRITEMASK_Y), + brw_writemask(tmp_ud, BRW_WRITEMASK_Y), brw_swizzle1(arg0_ud, 0), brw_imm_ud((1<<23)-1)); brw_OR(p, - brw_writemask(tmp_ud, WRITEMASK_Y), + brw_writemask(tmp_ud, BRW_WRITEMASK_Y), tmp_ud, brw_imm_ud(127<<23)); } - if (dst.dw1.bits.writemask & WRITEMASK_Z) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) { /* result[2] = result[0] + LOG2(result[1]); */ /* Why bother? The above is just a hint how to do this with a @@ -606,19 +606,19 @@ static void emit_log_noalias( struct brw_vs_compile *c, */ emit_math1(c, BRW_MATH_FUNCTION_LOG, - brw_writemask(tmp, WRITEMASK_Z), + brw_writemask(tmp, BRW_WRITEMASK_Z), brw_swizzle1(tmp, 1), BRW_MATH_PRECISION_FULL); brw_ADD(p, - brw_writemask(tmp, WRITEMASK_Z), + brw_writemask(tmp, BRW_WRITEMASK_Z), brw_swizzle1(tmp, 2), brw_swizzle1(tmp, 0)); } - if (dst.dw1.bits.writemask & WRITEMASK_W) { + if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) { /* result[3] = 1.0; */ - brw_MOV(p, brw_writemask(tmp, WRITEMASK_W), brw_imm_f(1)); + brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_W), brw_imm_f(1)); } if (need_tmp) { @@ -639,14 +639,14 @@ static void emit_dst_noalias( struct brw_vs_compile *c, /* There must be a better way to do this: */ - if (dst.dw1.bits.writemask & WRITEMASK_X) - brw_MOV(p, brw_writemask(dst, WRITEMASK_X), brw_imm_f(1.0)); - if (dst.dw1.bits.writemask & WRITEMASK_Y) - brw_MUL(p, brw_writemask(dst, WRITEMASK_Y), arg0, arg1); - if (dst.dw1.bits.writemask & WRITEMASK_Z) - brw_MOV(p, brw_writemask(dst, WRITEMASK_Z), arg0); - if (dst.dw1.bits.writemask & WRITEMASK_W) - brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1); + if (dst.dw1.bits.writemask & BRW_WRITEMASK_X) + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_X), brw_imm_f(1.0)); + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y) + brw_MUL(p, brw_writemask(dst, BRW_WRITEMASK_Y), arg0, arg1); + if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Z), arg0); + if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), arg1); } @@ -672,8 +672,8 @@ static void emit_lit_noalias( struct brw_vs_compile *c, if (need_tmp) tmp = get_tmp(c); - brw_MOV(p, brw_writemask(dst, WRITEMASK_YZ), brw_imm_f(0)); - brw_MOV(p, brw_writemask(dst, WRITEMASK_XW), brw_imm_f(1)); + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_YZ), brw_imm_f(0)); + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_XW), brw_imm_f(1)); /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order * to get all channels active inside the IF. In the clipping code @@ -683,15 +683,15 @@ static void emit_lit_noalias( struct brw_vs_compile *c, brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0)); if_insn = brw_IF(p, BRW_EXECUTE_8); { - brw_MOV(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0,0)); + brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0,0)); brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0)); - brw_MOV(p, brw_writemask(tmp, WRITEMASK_Z), brw_swizzle1(arg0,1)); + brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_Z), brw_swizzle1(arg0,1)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); emit_math2(c, BRW_MATH_FUNCTION_POW, - brw_writemask(dst, WRITEMASK_Z), + brw_writemask(dst, BRW_WRITEMASK_Z), brw_swizzle1(tmp, 2), brw_swizzle1(arg0, 3), BRW_MATH_PRECISION_PARTIAL); @@ -1045,7 +1045,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* ndc = 1.0 / pos.w */ emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); /* ndc.xyz = pos * ndc */ - brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + brw_MUL(p, brw_writemask(ndc, BRW_WRITEMASK_XYZ), pos, ndc); /* Update the header for point size, user clipping flags, and -ve rhw * workaround. @@ -1062,14 +1062,14 @@ static void emit_vertex_write( struct brw_vs_compile *c) if (c->prog_data.outputs_written & (1<regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; - brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); - brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); + brw_MUL(p, brw_writemask(header1, BRW_WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); + brw_AND(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); } for (i = 0; i < c->key.nr_userclip; i++) { brw_set_conditionalmod(p, BRW_CONDITIONAL_L); brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); - brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<first_overflow_output > 0) { /* Not all of the vertex outputs/results fit into the MRF. * Move the overflowed attributes from the GRF to the MRF and diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h new file mode 100644 index 0000000000..2142db5a4d --- /dev/null +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -0,0 +1,243 @@ +/************************************************************************** + * + * Copyright © 2009 Jakob Bornecrantz + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_WINSYS_H +#define BRW_WINSYS_H + +#include "pipe/p_compiler.h" + +struct brw_winsys; +struct pipe_fence_handle; + +/* This currently just wraps dri_bo: + */ +struct brw_winsys_buffer { + struct brw_winsys_screen *sws; + void *bo; + unsigned offset; +}; + +enum brw_buffer_usage { + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_SAMPLER, + I915_GEM_DOMAIN_VERTEX, + I915_GEM_DOMAIN_INSTRUCTION, + + + /* XXX: migrate from domains to explicit usage cases, eg below: + */ + + /* use on textures */ + BRW_USAGE_RENDER = 0x01, + BRW_USAGE_SAMPLER = 0x02, + BRW_USAGE_2D_TARGET = 0x04, + BRW_USAGE_2D_SOURCE = 0x08, + /* use on vertex */ + BRW_USAGE_VERTEX = 0x10, +}; + +enum brw_buffer_type +{ + BRW_BUFFER_TYPE_TEXTURE, + BRW_BUFFER_TYPE_SCANOUT, /**< a texture used for scanning out from */ + BRW_BUFFER_TYPE_VERTEX, +}; + + +/* AKA winsys context: + */ +struct brw_batchbuffer { + + struct brw_winsys *iws; + struct brw_winsys_buffer *buf; + + /** + * Values exported to speed up the writing the batchbuffer, + * instead of having to go trough a accesor function for + * each dword written. + */ + /*{@*/ + uint8_t *map; + uint8_t *ptr; + size_t size; + + size_t relocs; + size_t max_relocs; + /*@}*/ +}; + +struct brw_winsys_screen { + + /** + * Batchbuffer functions. + */ + /*@{*/ + /** + * Create a new batchbuffer. + */ + struct brw_batchbuffer *(*batchbuffer_create)(struct brw_winsys_screen *iws); + + /** + * Emit a relocation to a buffer. + * Target position in batchbuffer is the same as ptr. + */ + int (*batchbuffer_reloc)(struct brw_batchbuffer *batch, + unsigned offset, + struct brw_winsys_buffer *reloc, + unsigned pre_add, + enum brw_buffer_usage usage); + + /** + * Flush a bufferbatch. + */ + void (*batchbuffer_flush)(struct brw_batchbuffer *batch, + struct pipe_fence_handle **fence); + + /** + * Destroy a batchbuffer. + */ + void (*batchbuffer_destroy)(struct brw_batchbuffer *batch); + /*@}*/ + + + /** + * Buffer functions. + */ + /*@{*/ + /** + * Create a buffer. + */ + struct brw_winsys_buffer *(*buffer_create)(struct brw_winsys *iws, + unsigned size, + unsigned alignment, + enum brw_buffer_type type); + + + /* Reference and unreference buffers: + */ + void (*bo_reference)( struct brw_winsys_buffer *buffer ); + void (*bo_unreference)( struct brw_winsys_buffer *buffer ); + void (*bo_emit_reloc)( struct brw_winsys_buffer *buffer, + unsigned domain, + unsigned a, + unsigned b, + unsigned offset, + struct brw_winsys_buffer *b2); + + /** + * Map a buffer. + */ + void *(*buffer_map)(struct brw_winsys *iws, + struct brw_winsys_buffer *buffer, + boolean write); + + /** + * Unmap a buffer. + */ + void (*buffer_unmap)(struct brw_winsys *iws, + struct brw_winsys_buffer *buffer); + + /** + * Write to a buffer. + * + * Arguments follows pipe_buffer_write. + */ + int (*buffer_write)(struct brw_winsys *iws, + struct brw_winsys_buffer *dst, + size_t offset, + size_t size, + const void *data); + + void (*buffer_destroy)(struct brw_winsys *iws, + struct brw_winsys_buffer *buffer); + /*@}*/ + + + /** + * Fence functions. + */ + /*@{*/ + /** + * Reference fence and set ptr to fence. + */ + void (*fence_reference)(struct brw_winsys *iws, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence); + + /** + * Check if a fence has finished. + */ + int (*fence_signalled)(struct brw_winsys *iws, + struct pipe_fence_handle *fence); + + /** + * Wait on a fence to finish. + */ + int (*fence_finish)(struct brw_winsys *iws, + struct pipe_fence_handle *fence); + /*@}*/ + + + /** + * Destroy the winsys. + */ + void (*destroy)(struct brw_winsys *iws); +}; + + +/** + * Create i915 pipe_screen. + */ +struct pipe_screen *i915_create_screen(struct brw_winsys *iws, unsigned pci_id); + +/** + * Create a i915 pipe_context. + */ +struct pipe_context *i915_create_context(struct pipe_screen *screen); + +/** + * Get the brw_winsys buffer backing the texture. + * + * TODO UGLY + */ +struct pipe_texture; +boolean i915_get_texture_buffer_brw(struct pipe_texture *texture, + struct brw_winsys_buffer **buffer, + unsigned *stride); + +/** + * Wrap a brw_winsys buffer with a texture blanket. + * + * TODO UGLY + */ +struct pipe_texture * i915_texture_blanket_brw(struct pipe_screen *screen, + struct pipe_texture *tmplt, + unsigned pitch, + struct brw_winsys_buffer *buffer); + + + + +#endif diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 756a680150..18775830f9 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -34,7 +34,6 @@ #define BRW_WM_H -#include "shader/prog_instruction.h" #include "brw_context.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c index 220821087c..c6659646f2 100644 --- a/src/gallium/drivers/i965/brw_wm_debug.c +++ b/src/gallium/drivers/i965/brw_wm_debug.c @@ -98,7 +98,7 @@ void brw_wm_print_insn( struct brw_wm_compile *c, } _mesa_printf("]"); - if (inst->writemask != WRITEMASK_XYZW) + if (inst->writemask != BRW_WRITEMASK_XYZW) _mesa_printf(".%s%s%s%s", GET_BIT(inst->writemask, 0) ? "x" : "", GET_BIT(inst->writemask, 1) ? "y" : "", diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index fec33f74eb..7df9b79d7a 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -72,14 +72,14 @@ static void emit_pixel_xy(struct brw_compile *p, /* Calculate pixel centers by adding 1 or 0 to each of the * micro-tile coordinates passed in r1. */ - if (mask & WRITEMASK_X) { + if (mask & BRW_WRITEMASK_X) { brw_ADD(p, vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw, 4), 2, 4, 0), brw_imm_v(0x10101010)); } - if (mask & WRITEMASK_Y) { + if (mask & BRW_WRITEMASK_Y) { brw_ADD(p, vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)), stride(suboffset(r1_uw,5), 2, 4, 0), @@ -101,14 +101,14 @@ static void emit_delta_xy(struct brw_compile *p, /* Calc delta X,Y by subtracting origin in r1 from the pixel * centers. */ - if (mask & WRITEMASK_X) { + if (mask & BRW_WRITEMASK_X) { brw_ADD(p, dst[0], retype(arg0[0], BRW_REGISTER_TYPE_UW), negate(r1)); } - if (mask & WRITEMASK_Y) { + if (mask & BRW_WRITEMASK_Y) { brw_ADD(p, dst[1], retype(arg0[1], BRW_REGISTER_TYPE_UW), @@ -124,7 +124,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c, { struct brw_compile *p = &c->func; - if (mask & WRITEMASK_X) { + if (mask & BRW_WRITEMASK_X) { /* X' = X */ brw_MOV(p, dst[0], @@ -133,7 +133,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c, /* XXX: is this needed any more, or is this a NOOP? */ - if (mask & WRITEMASK_Y) { + if (mask & BRW_WRITEMASK_Y) { /* Y' = height - 1 - Y */ brw_ADD(p, dst[1], @@ -152,7 +152,7 @@ static void emit_pixel_w( struct brw_compile *p, /* Don't need this if all you are doing is interpolating color, for * instance. */ - if (mask & WRITEMASK_W) { + if (mask & BRW_WRITEMASK_W) { struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4); /* Calc 1/w - just linterp wpos[3] optimized by putting the @@ -255,7 +255,7 @@ static void emit_frontfacing( struct brw_compile *p, struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); GLuint i; - if (!(mask & WRITEMASK_XYZW)) + if (!(mask & BRW_WRITEMASK_XYZW)) return; for (i = 0; i < 4; i++) { @@ -321,26 +321,26 @@ void emit_ddxy(struct brw_compile *p, BRW_VERTICAL_STRIDE_2, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW); src1 = brw_reg(arg0[i].file, arg0[i].nr, 0, BRW_REGISTER_TYPE_F, BRW_VERTICAL_STRIDE_2, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW); } else { src0 = brw_reg(arg0[i].file, arg0[i].nr, 0, BRW_REGISTER_TYPE_F, BRW_VERTICAL_STRIDE_4, BRW_WIDTH_4, BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW); src1 = brw_reg(arg0[i].file, arg0[i].nr, 2, BRW_REGISTER_TYPE_F, BRW_VERTICAL_STRIDE_4, BRW_WIDTH_4, BRW_HORIZONTAL_STRIDE_0, - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); + BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW); } brw_ADD(p, dst[i], src0, negate(src1)); } @@ -611,12 +611,12 @@ static void emit_dp3( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; - if (!(mask & WRITEMASK_XYZW)) + if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & WRITEMASK_XYZW)); + assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); @@ -633,12 +633,12 @@ static void emit_dp4( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; - if (!(mask & WRITEMASK_XYZW)) + if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & WRITEMASK_XYZW)); + assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); @@ -656,12 +656,12 @@ static void emit_dph( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + const int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; - if (!(mask & WRITEMASK_XYZW)) + if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & WRITEMASK_XYZW)); + assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); @@ -681,7 +681,7 @@ static void emit_xpd( struct brw_compile *p, { GLuint i; - assert(!(mask & WRITEMASK_W) == WRITEMASK_X); + assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_X); for (i = 0 ; i < 3; i++) { if (mask & (1<tex_idx) { case TEXTURE_1D_INDEX: - emit = WRITEMASK_X; + emit = BRW_WRITEMASK_X; nr = 1; break; case TEXTURE_2D_INDEX: case TEXTURE_RECT_INDEX: - emit = WRITEMASK_XY; + emit = BRW_WRITEMASK_XY; nr = 2; break; case TEXTURE_3D_INDEX: case TEXTURE_CUBE_INDEX: - emit = WRITEMASK_XYZ; + emit = BRW_WRITEMASK_XYZ; nr = 3; break; default: @@ -815,7 +815,7 @@ static void emit_tex( struct brw_wm_compile *c, if (inst->tex_shadow) { nr = 4; - emit |= WRITEMASK_W; + emit |= BRW_WRITEMASK_W; } msgLength = 1; @@ -922,18 +922,18 @@ static void emit_lit( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { - assert((mask & WRITEMASK_XW) == 0); + assert((mask & BRW_WRITEMASK_XW) == 0); - if (mask & WRITEMASK_Y) { + if (mask & BRW_WRITEMASK_Y) { brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); brw_MOV(p, dst[1], arg0[0]); brw_set_saturate(p, 0); } - if (mask & WRITEMASK_Z) { + if (mask & BRW_WRITEMASK_Z) { emit_math2(p, BRW_MATH_FUNCTION_POW, &dst[2], - WRITEMASK_X | (mask & SATURATE), + BRW_WRITEMASK_X | (mask & SATURATE), &arg0[1], &arg0[3]); } @@ -944,10 +944,10 @@ static void emit_lit( struct brw_compile *p, */ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0)); { - if (mask & WRITEMASK_Y) + if (mask & BRW_WRITEMASK_Y) brw_MOV(p, dst[1], brw_imm_f(0)); - if (mask & WRITEMASK_Z) + if (mask & BRW_WRITEMASK_Z) brw_MOV(p, dst[2], brw_imm_f(0)); } brw_set_predicate_control(p, BRW_PREDICATE_NONE); @@ -1414,10 +1414,10 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* There is an scs math function, but it would need some * fixup for 16-element execution. */ - if (dst_flags & WRITEMASK_X) - emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); - if (dst_flags & WRITEMASK_Y) - emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]); + if (dst_flags & BRW_WRITEMASK_X) + emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]); + if (dst_flags & BRW_WRITEMASK_Y) + emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]); break; case OPCODE_POW: diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 5f47d86f71..be240031c7 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -115,7 +115,7 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx) struct prog_dst_register reg; reg.File = file; reg.Index = idx; - reg.WriteMask = WRITEMASK_XYZW; + reg.WriteMask = BRW_WRITEMASK_XYZW; reg.RelAddr = 0; reg.CondMask = COND_TR; reg.CondSwizzle = 0; @@ -249,7 +249,7 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) */ emit_op(c, WM_PIXELXY, - dst_mask(pixel_xy, WRITEMASK_XY), + dst_mask(pixel_xy, BRW_WRITEMASK_XY), 0, payload_r0_depth, src_undef(), @@ -272,7 +272,7 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) */ emit_op(c, WM_DELTAXY, - dst_mask(delta_xy, WRITEMASK_XY), + dst_mask(delta_xy, BRW_WRITEMASK_XY), 0, pixel_xy, payload_r0_depth, @@ -295,7 +295,7 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) */ emit_op(c, WM_PIXELW, - dst_mask(pixel_w, WRITEMASK_W), + dst_mask(pixel_w, BRW_WRITEMASK_W), 0, interp_wpos, deltas, @@ -327,13 +327,13 @@ static void emit_interp( struct brw_wm_compile *c, */ emit_op(c, WM_WPOSXY, - dst_mask(dst, WRITEMASK_XY), + dst_mask(dst, BRW_WRITEMASK_XY), 0, get_pixel_xy(c), src_undef(), src_undef()); - dst = dst_mask(dst, WRITEMASK_ZW); + dst = dst_mask(dst, BRW_WRITEMASK_ZW); /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw */ @@ -370,7 +370,7 @@ static void emit_interp( struct brw_wm_compile *c, /* Interpolate the fog coordinate */ emit_op(c, WM_PINTERP, - dst_mask(dst, WRITEMASK_X), + dst_mask(dst, BRW_WRITEMASK_X), 0, interp, deltas, @@ -378,7 +378,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, TGSI_OPCODE_MOV, - dst_mask(dst, WRITEMASK_YZW), + dst_mask(dst, BRW_WRITEMASK_YZW), 0, src_swizzle(interp, SWIZZLE_ZERO, @@ -393,7 +393,7 @@ static void emit_interp( struct brw_wm_compile *c, /* XXX review/test this case */ emit_op(c, WM_FRONTFACING, - dst_mask(dst, WRITEMASK_X), + dst_mask(dst, BRW_WRITEMASK_X), 0, src_undef(), src_undef(), @@ -404,7 +404,7 @@ static void emit_interp( struct brw_wm_compile *c, /* XXX review/test this case */ emit_op(c, WM_PINTERP, - dst_mask(dst, WRITEMASK_XY), + dst_mask(dst, BRW_WRITEMASK_XY), 0, interp, deltas, @@ -412,7 +412,7 @@ static void emit_interp( struct brw_wm_compile *c, emit_op(c, TGSI_OPCODE_MOV, - dst_mask(dst, WRITEMASK_ZW), + dst_mask(dst, BRW_WRITEMASK_ZW), 0, src_swizzle(interp, SWIZZLE_ZERO, @@ -518,19 +518,19 @@ static void precalc_dst( struct brw_wm_compile *c, struct prog_src_register src1 = inst->SrcReg[1]; struct prog_dst_register dst = inst->DstReg; - if (dst.WriteMask & WRITEMASK_Y) { + if (dst.WriteMask & BRW_WRITEMASK_Y) { /* dst.y = mul src0.y, src1.y */ emit_op(c, TGSI_OPCODE_MUL, - dst_mask(dst, WRITEMASK_Y), + dst_mask(dst, BRW_WRITEMASK_Y), inst->SaturateMode, src0, src1, src_undef()); } - if (dst.WriteMask & WRITEMASK_XZ) { + if (dst.WriteMask & BRW_WRITEMASK_XZ) { struct prog_instruction *swz; GLuint z = GET_SWZ(src0.Swizzle, Z); @@ -538,7 +538,7 @@ static void precalc_dst( struct brw_wm_compile *c, */ swz = emit_op(c, TGSI_OPCODE_MOV, - dst_mask(dst, WRITEMASK_XZ), + dst_mask(dst, BRW_WRITEMASK_XZ), inst->SaturateMode, src_swizzle(src0, SWIZZLE_ONE, z, z, z), src_undef(), @@ -546,12 +546,12 @@ static void precalc_dst( struct brw_wm_compile *c, /* Avoid letting negation flag of src0 affect our 1 constant. */ swz->SrcReg[0].Negate &= ~NEGATE_X; } - if (dst.WriteMask & WRITEMASK_W) { + if (dst.WriteMask & BRW_WRITEMASK_W) { /* dst.w = mov src1.w */ emit_op(c, TGSI_OPCODE_MOV, - dst_mask(dst, WRITEMASK_W), + dst_mask(dst, BRW_WRITEMASK_W), inst->SaturateMode, src1, src_undef(), @@ -566,14 +566,14 @@ static void precalc_lit( struct brw_wm_compile *c, struct prog_src_register src0 = inst->SrcReg[0]; struct prog_dst_register dst = inst->DstReg; - if (dst.WriteMask & WRITEMASK_XW) { + if (dst.WriteMask & BRW_WRITEMASK_XW) { struct prog_instruction *swz; /* dst.xw = swz src0.1111 */ swz = emit_op(c, TGSI_OPCODE_MOV, - dst_mask(dst, WRITEMASK_XW), + dst_mask(dst, BRW_WRITEMASK_XW), 0, src_swizzle1(src0, SWIZZLE_ONE), src_undef(), @@ -582,10 +582,10 @@ static void precalc_lit( struct brw_wm_compile *c, swz->SrcReg[0].Negate = NEGATE_NONE; } - if (dst.WriteMask & WRITEMASK_YZ) { + if (dst.WriteMask & BRW_WRITEMASK_YZ) { emit_op(c, TGSI_OPCODE_LIT, - dst_mask(dst, WRITEMASK_YZ), + dst_mask(dst, BRW_WRITEMASK_YZ), inst->SaturateMode, src0, src_undef(), @@ -649,7 +649,7 @@ static void precalc_tex( struct brw_wm_compile *c, /* tmp0 = 1 / tmp1 */ emit_op(c, TGSI_OPCODE_RCP, - dst_mask(tmp0, WRITEMASK_X), + dst_mask(tmp0, BRW_WRITEMASK_X), 0, tmp1src, src_undef(), @@ -740,7 +740,7 @@ static void precalc_tex( struct brw_wm_compile *c, */ emit_op(c, TGSI_OPCODE_ADD, - dst_mask(tmp, WRITEMASK_XYZ), + dst_mask(tmp, BRW_WRITEMASK_XYZ), 0, tmpsrc, C0, @@ -751,7 +751,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, TGSI_OPCODE_MUL, - dst_mask(tmp, WRITEMASK_Y), + dst_mask(tmp, BRW_WRITEMASK_Y), 0, tmpsrc, src_swizzle1(C0, W), @@ -766,7 +766,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op(c, TGSI_OPCODE_MAD, - dst_mask(dst, WRITEMASK_XYZ), + dst_mask(dst, BRW_WRITEMASK_XYZ), 0, swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), C1, @@ -776,7 +776,7 @@ static void precalc_tex( struct brw_wm_compile *c, */ emit_op(c, TGSI_OPCODE_MAD, - dst_mask(dst, WRITEMASK_Y), + dst_mask(dst, BRW_WRITEMASK_Y), 0, src_swizzle1(tmpsrc, Z), src_swizzle1(C1, W), @@ -863,7 +863,7 @@ static void precalc_txp( struct brw_wm_compile *c, */ emit_op(c, TGSI_OPCODE_RCP, - dst_mask(tmp, WRITEMASK_W), + dst_mask(tmp, BRW_WRITEMASK_W), 0, src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), src_undef(), @@ -873,7 +873,7 @@ static void precalc_txp( struct brw_wm_compile *c, */ emit_op(c, TGSI_OPCODE_MUL, - dst_mask(tmp, WRITEMASK_XYZ), + dst_mask(tmp, BRW_WRITEMASK_XYZ), 0, src0, src_swizzle1(src_reg_from_dst(tmp), W), @@ -1053,7 +1053,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) out = emit_insn(c, inst); /* This should probably be done in the parser. */ - out->DstReg.WriteMask &= WRITEMASK_XY; + out->DstReg.WriteMask &= BRW_WRITEMASK_XY; break; case TGSI_OPCODE_DST: @@ -1082,7 +1082,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) out = emit_insn(c, inst); /* This should probably be done in the parser. */ - out->DstReg.WriteMask &= WRITEMASK_XYZ; + out->DstReg.WriteMask &= BRW_WRITEMASK_XYZ; break; case TGSI_OPCODE_KIL: diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 0c411b57f5..de5f5fe821 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -32,7 +32,6 @@ #include "brw_context.h" #include "brw_wm.h" -#include "shader/prog_parameter.h" diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index d940ec09a9..f2ae3a958f 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -91,15 +91,15 @@ static GLuint get_texcoord_mask( GLuint tex_idx ) { switch (tex_idx) { case TEXTURE_1D_INDEX: - return WRITEMASK_X; + return BRW_WRITEMASK_X; case TEXTURE_2D_INDEX: - return WRITEMASK_XY; + return BRW_WRITEMASK_XY; case TEXTURE_3D_INDEX: - return WRITEMASK_XYZ; + return BRW_WRITEMASK_XYZ; case TEXTURE_CUBE_INDEX: - return WRITEMASK_XYZ; + return BRW_WRITEMASK_XYZ; case TEXTURE_RECT_INDEX: - return WRITEMASK_XY; + return BRW_WRITEMASK_XY; default: return 0; } } @@ -121,16 +121,16 @@ void brw_wm_pass1( struct brw_wm_compile *c ) GLuint read0, read1, read2; if (inst->opcode == TGSI_OPCODE_KIL) { - track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */ + track_arg(c, inst, 0, BRW_WRITEMASK_XYZW); /* All args contribute to final */ continue; } if (inst->opcode == WM_FB_WRITE) { - track_arg(c, inst, 0, WRITEMASK_XYZW); - track_arg(c, inst, 1, WRITEMASK_XYZW); + track_arg(c, inst, 0, BRW_WRITEMASK_XYZW); + track_arg(c, inst, 1, BRW_WRITEMASK_XYZW); if (c->key.source_depth_to_render_target && c->key.computes_depth) - track_arg(c, inst, 2, WRITEMASK_Z); + track_arg(c, inst, 2, BRW_WRITEMASK_Z); else track_arg(c, inst, 2, 0); continue; @@ -191,9 +191,9 @@ void brw_wm_pass1( struct brw_wm_compile *c ) break; case TGSI_OPCODE_XPD: - if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ; - if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ; - if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY; + if (writemask & BRW_WRITEMASK_X) read0 |= BRW_WRITEMASK_YZ; + if (writemask & BRW_WRITEMASK_Y) read0 |= BRW_WRITEMASK_XZ; + if (writemask & BRW_WRITEMASK_Z) read0 |= BRW_WRITEMASK_XY; read1 = read0; break; @@ -206,12 +206,12 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case TGSI_OPCODE_SCS: case WM_CINTERP: case WM_PIXELXY: - read0 = WRITEMASK_X; + read0 = BRW_WRITEMASK_X; break; case TGSI_OPCODE_POW: - read0 = WRITEMASK_X; - read1 = WRITEMASK_X; + read0 = BRW_WRITEMASK_X; + read1 = BRW_WRITEMASK_X; break; case TGSI_OPCODE_TEX: @@ -219,57 +219,57 @@ void brw_wm_pass1( struct brw_wm_compile *c ) read0 = get_texcoord_mask(inst->tex_idx); if (inst->tex_shadow) - read0 |= WRITEMASK_Z; + read0 |= BRW_WRITEMASK_Z; break; case TGSI_OPCODE_TXB: /* Shadow ignored for txb. */ - read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W; + read0 = get_texcoord_mask(inst->tex_idx) | BRW_WRITEMASK_W; break; case WM_WPOSXY: - read0 = writemask & WRITEMASK_XY; + read0 = writemask & BRW_WRITEMASK_XY; break; case WM_DELTAXY: - read0 = writemask & WRITEMASK_XY; - read1 = WRITEMASK_X; + read0 = writemask & BRW_WRITEMASK_XY; + read1 = BRW_WRITEMASK_X; break; case WM_PIXELW: - read0 = WRITEMASK_X; - read1 = WRITEMASK_XY; + read0 = BRW_WRITEMASK_X; + read1 = BRW_WRITEMASK_XY; break; case WM_LINTERP: - read0 = WRITEMASK_X; - read1 = WRITEMASK_XY; + read0 = BRW_WRITEMASK_X; + read1 = BRW_WRITEMASK_XY; break; case WM_PINTERP: - read0 = WRITEMASK_X; /* interpolant */ - read1 = WRITEMASK_XY; /* deltas */ - read2 = WRITEMASK_W; /* pixel w */ + read0 = BRW_WRITEMASK_X; /* interpolant */ + read1 = BRW_WRITEMASK_XY; /* deltas */ + read2 = BRW_WRITEMASK_W; /* pixel w */ break; case TGSI_OPCODE_DP3: - read0 = WRITEMASK_XYZ; - read1 = WRITEMASK_XYZ; + read0 = BRW_WRITEMASK_XYZ; + read1 = BRW_WRITEMASK_XYZ; break; case TGSI_OPCODE_DPH: - read0 = WRITEMASK_XYZ; - read1 = WRITEMASK_XYZW; + read0 = BRW_WRITEMASK_XYZ; + read1 = BRW_WRITEMASK_XYZW; break; case TGSI_OPCODE_DP4: - read0 = WRITEMASK_XYZW; - read1 = WRITEMASK_XYZW; + read0 = BRW_WRITEMASK_XYZW; + read1 = BRW_WRITEMASK_XYZW; break; case TGSI_OPCODE_LIT: - read0 = WRITEMASK_XYW; + read0 = BRW_WRITEMASK_XYW; break; case TGSI_OPCODE_DST: diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index 86dcb74b5b..5045c9b4a6 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -31,7 +31,7 @@ #include "intel_mipmap_tree.h" -#include "intel_batchbuffer.h" +#include "brw_batchbuffer.h" #include "intel_tex.h" #include "intel_fbo.h" diff --git a/src/gallium/drivers/i965/intel_batchbuffer.h b/src/gallium/drivers/i965/intel_batchbuffer.h deleted file mode 100644 index be04656aec..0000000000 --- a/src/gallium/drivers/i965/intel_batchbuffer.h +++ /dev/null @@ -1,168 +0,0 @@ -#ifndef INTEL_BATCHBUFFER_H -#define INTEL_BATCHBUFFER_H - -#include "intel_bufmgr.h" -#include "intel_reg.h" - -#define BATCH_SZ 16384 -#define BATCH_RESERVED 16 - -enum cliprect_mode { - /** - * Batchbuffer contents may be looped over per cliprect, but do not - * require it. - */ - IGNORE_CLIPRECTS, - /** - * Batchbuffer contents require looping over per cliprect at batch submit - * time. - * - * This will be upgraded to NO_LOOP_CLIPRECTS when there's a single - * constant cliprect, as in DRI2 or FBO rendering. - */ - LOOP_CLIPRECTS, - /** - * Batchbuffer contents contain drawing that should not be executed multiple - * times. - */ - NO_LOOP_CLIPRECTS, - /** - * Batchbuffer contents contain drawing that already handles cliprects, such - * as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE. - * - * Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch - * outside of LOCK/UNLOCK. This is upgraded to just NO_LOOP_CLIPRECTS when - * there's a constant cliprect, as in DRI2 or FBO rendering. - */ - REFERENCES_CLIPRECTS -}; - -struct intel_batchbuffer -{ - struct intel_context *intel; - - struct brw_winsys_buffer *buf; - - GLubyte *buffer; - - GLubyte *map; - GLubyte *ptr; - - GLuint size; - - /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */ - struct { - GLuint total; - GLubyte *start_ptr; - } emit; - - GLuint dirty_state; -}; - -struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context - *intel); - -void intel_batchbuffer_free(struct intel_batchbuffer *batch); - - -void _intel_batchbuffer_flush(struct intel_batchbuffer *batch, - const char *file, int line); - -#define intel_batchbuffer_flush(batch) \ - _intel_batchbuffer_flush(batch, __FILE__, __LINE__) - -void intel_batchbuffer_reset(struct intel_batchbuffer *batch); - - -/* Unlike bmBufferData, this currently requires the buffer be mapped. - * Consider it a convenience function wrapping multple - * intel_buffer_dword() calls. - */ -void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, GLuint bytes, - enum cliprect_mode cliprect_mode); - -void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, - GLuint bytes); - -GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, - struct brw_winsys_buffer *buffer, - uint32_t read_domains, - uint32_t write_domain, - uint32_t offset); - -/* Inline functions - might actually be better off with these - * non-inlined. Certainly better off switching all command packets to - * be passed as structs rather than dwords, but that's a little bit of - * work... - */ -static INLINE GLint -intel_batchbuffer_space(struct intel_batchbuffer *batch) -{ - return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map); -} - - -static INLINE void -intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword) -{ - assert(batch->map); - assert(intel_batchbuffer_space(batch) >= 4); - *(GLuint *) (batch->ptr) = dword; - batch->ptr += 4; -} - -static INLINE void -intel_batchbuffer_require_space(struct intel_batchbuffer *batch, - GLuint sz, - enum cliprect_mode cliprect_mode) -{ - assert(sz < batch->size - 8); - if (intel_batchbuffer_space(batch) < sz) - intel_batchbuffer_flush(batch); - - /* All commands should be executed once regardless of cliprect - * mode. - */ - (void)cliprect_mode; -} - -/* Here are the crusty old macros, to be removed: - */ -#define BATCH_LOCALS - -#define BEGIN_BATCH(n, cliprect_mode) do { \ - intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \ - assert(intel->batch->emit.start_ptr == NULL); \ - intel->batch->emit.total = (n) * 4; \ - intel->batch->emit.start_ptr = intel->batch->ptr; \ -} while (0) - -#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) - -#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ - assert((unsigned) (delta) < buf->size); \ - intel_batchbuffer_emit_reloc(intel->batch, buf, \ - read_domains, write_domain, delta); \ -} while (0) - -#define ADVANCE_BATCH() do { \ - unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \ - assert(intel->batch->emit.start_ptr != NULL); \ - if (_n != intel->batch->emit.total) { \ - fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", \ - _n, intel->batch->emit.total); \ - abort(); \ - } \ - intel->batch->emit.start_ptr = NULL; \ -} while(0) - - -static INLINE void -intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) -{ - intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS); - intel_batchbuffer_emit_dword(batch, MI_FLUSH); -} - -#endif -- cgit v1.2.3 From 4f7931bb3554cb1839adc2044e3abe6d4af8b0b5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 24 Oct 2009 17:07:01 +0100 Subject: i965g: more work on compiling --- src/gallium/drivers/i965/Makefile | 2 - src/gallium/drivers/i965/brw_batchbuffer.h | 41 ++- src/gallium/drivers/i965/brw_cc.c | 8 +- src/gallium/drivers/i965/brw_clip.c | 7 +- src/gallium/drivers/i965/brw_clip.h | 19 +- src/gallium/drivers/i965/brw_clip_line.c | 21 +- src/gallium/drivers/i965/brw_clip_point.c | 1 - src/gallium/drivers/i965/brw_clip_state.c | 22 +- src/gallium/drivers/i965/brw_clip_tri.c | 5 +- src/gallium/drivers/i965/brw_clip_unfilled.c | 28 +- src/gallium/drivers/i965/brw_clip_util.c | 31 +- src/gallium/drivers/i965/brw_context.c | 87 +++--- src/gallium/drivers/i965/brw_context.h | 47 ++- src/gallium/drivers/i965/brw_curbe.c | 63 ++-- src/gallium/drivers/i965/brw_debug.h | 42 +++ src/gallium/drivers/i965/brw_defines.h | 7 - src/gallium/drivers/i965/brw_draw.c | 134 ++++----- src/gallium/drivers/i965/brw_draw.h | 15 +- src/gallium/drivers/i965/brw_draw_upload.c | 2 - src/gallium/drivers/i965/brw_eu.h | 2 + src/gallium/drivers/i965/brw_misc_state.c | 2 - src/gallium/drivers/i965/brw_pipe_debug.c | 2 - src/gallium/drivers/i965/brw_pipe_query.c | 4 +- src/gallium/drivers/i965/brw_pipe_vertex.c | 26 ++ src/gallium/drivers/i965/brw_reg.h | 36 +++ src/gallium/drivers/i965/brw_screen.c | 365 ++++++++++++++++++++++++ src/gallium/drivers/i965/brw_screen.h | 6 +- src/gallium/drivers/i965/brw_sf.c | 2 +- src/gallium/drivers/i965/brw_sf_state.c | 3 +- src/gallium/drivers/i965/brw_state_cache.c | 4 +- src/gallium/drivers/i965/brw_state_debug.c | 145 ++++++++++ src/gallium/drivers/i965/brw_state_upload.c | 113 +------- src/gallium/drivers/i965/brw_tex.c | 2 - src/gallium/drivers/i965/brw_tex_layout.c | 4 +- src/gallium/drivers/i965/brw_types.h | 3 + src/gallium/drivers/i965/brw_vs_surface_state.c | 3 +- src/gallium/drivers/i965/brw_winsys.h | 54 ++-- src/gallium/drivers/i965/brw_wm.c | 2 +- src/gallium/drivers/i965/brw_wm_state.c | 8 +- src/gallium/drivers/i965/brw_wm_surface_state.c | 9 +- src/gallium/drivers/i965/intel_chipset.h | 116 -------- src/gallium/drivers/i965/intel_tex_format.c | 28 -- src/gallium/drivers/i965/intel_tex_layout.c | 2 - 43 files changed, 920 insertions(+), 603 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_debug.h delete mode 100644 src/gallium/drivers/i965/brw_pipe_debug.c create mode 100644 src/gallium/drivers/i965/brw_pipe_vertex.c create mode 100644 src/gallium/drivers/i965/brw_screen.c create mode 100644 src/gallium/drivers/i965/brw_state_debug.c delete mode 100644 src/gallium/drivers/i965/intel_chipset.h delete mode 100644 src/gallium/drivers/i965/intel_tex_format.c (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 480d2efbc5..40c8364824 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -26,7 +26,6 @@ C_SOURCES = \ brw_gs_state.c \ brw_misc_state.c \ brw_pipe_blend.c \ - brw_pipe_debug.c \ brw_pipe_depth.c \ brw_pipe_fb.c \ brw_pipe_flush.c \ @@ -63,7 +62,6 @@ C_SOURCES = \ brw_wm_surface_state.c \ brw_bo.c \ intel_batchbuffer.c \ - intel_tex_format.c \ intel_tex_layout.c include ../../Makefile.template diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index 76b3c1bf69..b8492882e1 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -4,6 +4,7 @@ #include "brw_types.h" #include "brw_winsys.h" #include "brw_reg.h" +#include "util/u_debug.h" #define BATCH_SZ 16384 #define BATCH_RESERVED 16 @@ -68,56 +69,50 @@ brw_batchbuffer_emit_dword(struct brw_batchbuffer *batch, GLuint dword) static INLINE boolean brw_batchbuffer_require_space(struct brw_batchbuffer *batch, - GLuint sz, - enum cliprect_mode cliprect_mode) + GLuint sz) { assert(sz < batch->size - 8); if (brw_batchbuffer_space(batch) < sz) { assert(0); return FALSE; } - - /* All commands should be executed once regardless of cliprect - * mode. - */ - (void)cliprect_mode; +#ifdef DEBUG + batch->emit.end_ptr = batch->ptr + sz; +#endif + return TRUE; } /* Here are the crusty old macros, to be removed: */ -#define BATCH_LOCALS - #define BEGIN_BATCH(n, cliprect_mode) do { \ - brw_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \ - assert(intel->batch->emit.start_ptr == NULL); \ - intel->batch->emit.total = (n) * 4; \ - intel->batch->emit.start_ptr = intel->batch->ptr; \ + brw_batchbuffer_require_space(brw->batch, (n)*4); \ } while (0) -#define OUT_BATCH(d) brw_batchbuffer_emit_dword(intel->batch, d) +#define OUT_BATCH(d) brw_batchbuffer_emit_dword(brw->batch, d) #define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ assert((unsigned) (delta) < buf->size); \ - brw_batchbuffer_emit_reloc(intel->batch, buf, \ + brw_batchbuffer_emit_reloc(brw->batch, buf, \ read_domains, write_domain, delta); \ } while (0) +#ifdef DEBUG #define ADVANCE_BATCH() do { \ - unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr; \ - assert(intel->batch->emit.start_ptr != NULL); \ - if (_n != intel->batch->emit.total) { \ - fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", \ - _n, intel->batch->emit.total); \ + unsigned int _n = brw->batch->ptr - brw->batch->emit.end_ptr; \ + if (_n != 0) { \ + debug_printf("%s: %d too many bytes emitted to batch\n", __FUNCTION__, _n); \ abort(); \ } \ - intel->batch->emit.start_ptr = NULL; \ + brw->batch->emit.end_ptr = NULL; \ } while(0) - +#else +#define ADVANCE_BATCH() +#endif static INLINE void brw_batchbuffer_emit_mi_flush(struct brw_batchbuffer *batch) { - brw_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS); + brw_batchbuffer_require_space(batch, 4); brw_batchbuffer_emit_dword(batch, MI_FLUSH); } diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index c8e7851d75..76759304eb 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -109,10 +109,10 @@ static void cc_unit_populate_key(const struct brw_context *brw, struct brw_cc_unit_key *key) { - key->cc0 = brw->curr.dsa->cc0; - key->cc1 = brw->curr.dsa->cc1; - key->cc2 = brw->curr.dsa->cc2; - key->cc3 = combine_cc3( brw->curr.dsa->cc3, brw->curr.blend->cc3 ); + key->cc0 = brw->curr.zstencil->cc0; + key->cc1 = brw->curr.zstencil->cc1; + key->cc2 = brw->curr.zstencil->cc2; + key->cc3 = combine_cc3( brw->curr.zstencil->cc3, brw->curr.blend->cc3 ); key->cc5 = brw->curr.blend->cc5; key->cc6 = brw->curr.blend->cc6; key->cc7 = brw->curr.blend->cc7; diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 591e904705..622d9dba96 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -65,15 +65,16 @@ static void compile_clip_prog( struct brw_context *brw, c.func.single_program_flow = 1; + c.chipset = brw->chipset; c.key = *key; - c.need_ff_sync = BRW_IS_IGDNG(brw); + c.need_ff_sync = c.chipset.is_igdng; /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ c.header_position_offset = ATTR_SIZE; - if (BRW_IS_IGDNG(brw)) + if (c.chipset.is_igdng) delta = 3 * REG_SIZE; else delta = REG_SIZE; @@ -160,7 +161,7 @@ static void upload_clip_prog(struct brw_context *brw) key.primitive = brw->reduced_primitive; /* PIPE_NEW_VS */ - key.nr_attrs = brw->curr.vs->info.file_max[TGSI_FILE_OUTPUT] + 1; + key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_OUTPUT] + 1; /* PIPE_NEW_CLIP */ key.nr_userclip = brw->curr.ucp.nr; diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index cfe51bf292..772c34be88 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -32,8 +32,8 @@ #ifndef BRW_CLIP_H #define BRW_CLIP_H - -#include "brw_context.h" +#include "pipe/p_state.h" +#include "brw_reg.h" #include "brw_eu.h" #define MAX_VERTS (3+6+6) @@ -60,6 +60,12 @@ struct brw_clip_prog_key { GLfloat offset_units; }; +struct brw_clip_prog_data { + GLuint curb_read_length; /* user planes? */ + GLuint clip_mode; + GLuint urb_read_length; + GLuint total_grf; +}; #define CLIP_LINE 0 #define CLIP_POINT 1 @@ -112,12 +118,21 @@ struct brw_clip_compile { GLuint last_tmp; GLboolean need_direction; + struct brw_chipset chipset; GLuint last_mrf; GLuint header_position_offset; GLuint offset[PIPE_MAX_SHADER_OUTPUTS]; GLboolean need_ff_sync; + + GLuint nr_color_attrs; + GLuint offset_color0; + GLuint offset_color1; + GLuint offset_bfc0; + GLuint offset_bfc1; + + GLuint offset_edge; }; #define ATTR_SIZE (4*4) diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c index 6b4da25644..a4790bda95 100644 --- a/src/gallium/drivers/i965/brw_clip_line.c +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -29,14 +29,16 @@ * Keith Whitwell */ +#include "util/u_debug.h" + #include "brw_defines.h" -#include "brw_context.h" #include "brw_eu.h" #include "brw_util.h" #include "brw_clip.h" + static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) { GLuint i = 0,j; @@ -130,6 +132,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) struct brw_instruction *is_neg2 = NULL; struct brw_instruction *not_culled; struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); + const int hpos = 0; /* XXX: position not always first element */ brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); @@ -145,7 +148,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_clip_init_clipmask(c); /* -ve rhw workaround */ - if (BRW_IS_965(p->brw)) { + if (c->chipset.is_965) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); @@ -170,19 +173,19 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* dp = DP4(vtx->position, plane) */ - brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[hpos]), c->reg.plane_equation); /* if (IS_NEGATIVE(dp1)) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[hpos]), c->reg.plane_equation); is_negative = brw_IF(p, BRW_EXECUTE_1); { /* * Both can be negative on GM965/G965 due to RHW workaround * if so, this object should be rejected. */ - if (BRW_IS_965(p->brw)) { + if (c->chipset.is_965) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); { @@ -207,7 +210,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* If both are positive, do nothing */ /* Only on GM965/G965 */ - if (BRW_IS_965(p->brw)) { + if (c->chipset.is_965) { brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); is_neg2 = brw_IF(p, BRW_EXECUTE_1); } @@ -222,7 +225,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_set_predicate_control(p, BRW_PREDICATE_NONE); } - if (BRW_IS_965(p->brw)) { + if (c->chipset.is_965) { brw_ENDIF(p, is_neg2); } } @@ -245,8 +248,8 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); not_culled = brw_IF(p, BRW_EXECUTE_1); { - brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE); - brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, GL_FALSE); + brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE); + brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE); brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c index b2cf7b2011..e0a5330556 100644 --- a/src/gallium/drivers/i965/brw_clip_point.c +++ b/src/gallium/drivers/i965/brw_clip_point.c @@ -30,7 +30,6 @@ */ #include "brw_defines.h" -#include "brw_context.h" #include "brw_eu.h" #include "brw_util.h" #include "brw_clip.h" diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 0ea7ce5734..25b8c6372f 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -29,9 +29,13 @@ * Keith Whitwell */ +#include "util/u_math.h" + #include "brw_context.h" +#include "brw_clip.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_debug.h" struct brw_clip_unit_key { unsigned int total_grf; @@ -77,7 +81,7 @@ clip_unit_create_from_key(struct brw_context *brw, memset(&clip, 0, sizeof(clip)); - clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; /* reloc */ clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; @@ -112,10 +116,10 @@ clip_unit_create_from_key(struct brw_context *brw, clip.thread4.max_threads = 1 - 1; } - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) clip.thread4.max_threads = 0; - if (INTEL_DEBUG & DEBUG_STATS) + if (BRW_DEBUG & DEBUG_STATS) clip.thread4.stats_enable = 1; clip.clip5.userclip_enable_flags = 0x7f; @@ -145,12 +149,12 @@ clip_unit_create_from_key(struct brw_context *brw, /* Emit clip program relocation */ assert(brw->clip.prog_bo); - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, - 0, - clip.thread0.grf_reg_count << 1, - offsetof(struct brw_clip_unit_state, thread0), - brw->clip.prog_bo); + brw->sws->bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, + clip.thread0.grf_reg_count << 1, + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_bo); return bo; } diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c index d8feca6a87..5486f4fa89 100644 --- a/src/gallium/drivers/i965/brw_clip_tri.c +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -30,7 +30,6 @@ */ #include "brw_defines.h" -#include "brw_context.h" #include "brw_eu.h" #include "brw_util.h" #include "brw_clip.h" @@ -71,7 +70,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, for (j = 0; j < 3; j++) { GLuint delta = c->nr_attrs*16 + 32; - if (BRW_IS_IGDNG(c->func.brw)) + if (c->chipset.is_igdng) delta = c->nr_attrs * 16 + 32 * 3; brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); @@ -565,7 +564,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c ) /* if -ve rhw workaround bit is set, do cliptest */ - if (BRW_IS_965(p->brw)) { + if (c->chipset.is_965) { brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<20)); diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c index 8501599aef..1cb86dd25b 100644 --- a/src/gallium/drivers/i965/brw_clip_unfilled.c +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -29,10 +29,7 @@ * Keith Whitwell */ -#include "brw_batchbuffer.h" - #include "brw_defines.h" -#include "brw_context.h" #include "brw_eu.h" #include "brw_util.h" #include "brw_clip.h" @@ -126,8 +123,7 @@ static void copy_bfc( struct brw_clip_compile *c ) /* Do we have any colors to copy? */ - if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) && - !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])) + if (c->nr_color_attrs == 0) return; /* In some wierd degnerate cases we can end up testing the @@ -150,15 +146,15 @@ static void copy_bfc( struct brw_clip_compile *c ) GLuint i; for (i = 0; i < 3; i++) { - if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) + if (c->offset_color0 && c->offset_bfc0) brw_MOV(p, - byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]), - byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0])); + byte_offset(c->reg.vertex[i], c->offset_color0), + byte_offset(c->reg.vertex[i], c->offset_bfc0)); - if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]) + if (c->offset_color1 && c->offset_bfc1) brw_MOV(p, - byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]), - byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1])); + byte_offset(c->reg.vertex[i], c->offset_color0), + byte_offset(c->reg.vertex[i], c->offset_bfc0)); } } brw_ENDIF(p, ccw); @@ -218,12 +214,12 @@ static void merge_edgeflags( struct brw_clip_compile *c ) { brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); - brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset_edge), brw_imm_f(0)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); - brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset_edge), brw_imm_f(0)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); } brw_ENDIF(p, is_poly); @@ -294,7 +290,7 @@ static void emit_lines(struct brw_clip_compile *c, /* draw edge if edgeflag != 0 */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, - deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + deref_1f(v0, c->offset_edge), brw_imm_f(0)); draw_edge = brw_IF(p, BRW_EXECUTE_1); { @@ -333,7 +329,7 @@ static void emit_points(struct brw_clip_compile *c, */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, - deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + deref_1f(v0, c->offset_edge), brw_imm_f(0)); draw_point = brw_IF(p, BRW_EXECUTE_1); { @@ -450,7 +446,7 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c ) brw_clip_tri_init_vertices(c); brw_clip_init_ff_sync(c); - assert(c->offset[VERT_RESULT_EDGE]); + assert(c->offset_edge); if (c->key.fill_ccw == CLIP_CULL && c->key.fill_cw == CLIP_CULL) { diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 60bfd3538e..f8f98c8037 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -31,7 +31,6 @@ #include "brw_defines.h" -#include "brw_context.h" #include "brw_eu.h" #include "brw_util.h" #include "brw_clip.h" @@ -144,10 +143,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, for (i = 0; i < c->nr_attrs; i++) { GLuint delta = i*16 + 32; - if (BRW_IS_IGDNG(p->brw)) + if (c->chipset.is_igdng) delta = i * 16 + 32 * 3; - if (delta == c->offset[VERT_RESULT_EDGE]) { + if (delta == c->offset_edge) { if (force_edgeflag) brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); else @@ -178,7 +177,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, if (i & 1) { GLuint delta = i*16 + 32; - if (BRW_IS_IGDNG(p->brw)) + if (c->chipset.is_igdng) delta = i * 16 + 32 * 3; brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); @@ -304,25 +303,25 @@ void brw_clip_copy_colors( struct brw_clip_compile *c, { struct brw_compile *p = &c->func; - if (c->offset[VERT_RESULT_COL0]) + if (c->offset_color0) brw_MOV(p, - byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]), - byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0])); + byte_offset(c->reg.vertex[to], c->offset_color0), + byte_offset(c->reg.vertex[from], c->offset_color0)); - if (c->offset[VERT_RESULT_COL1]) + if (c->offset_color1) brw_MOV(p, - byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]), - byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1])); + byte_offset(c->reg.vertex[to], c->offset_color1), + byte_offset(c->reg.vertex[from], c->offset_color1)); - if (c->offset[VERT_RESULT_BFC0]) + if (c->offset_bfc0) brw_MOV(p, - byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]), - byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0])); + byte_offset(c->reg.vertex[to], c->offset_bfc0), + byte_offset(c->reg.vertex[from], c->offset_bfc0)); - if (c->offset[VERT_RESULT_BFC1]) + if (c->offset_bfc1) brw_MOV(p, - byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]), - byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1])); + byte_offset(c->reg.vertex[to], c->offset_bfc1), + byte_offset(c->reg.vertex[from], c->offset_bfc1)); } diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index 07a5420d6e..e9605bafe6 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -31,64 +31,31 @@ #include "pipe/p_context.h" +#include "util/u_simple_list.h" #include "brw_context.h" #include "brw_defines.h" #include "brw_draw.h" #include "brw_state.h" -#include "brw_vs.h" -#include "brw_screen_tex.h" #include "brw_batchbuffer.h" +#include "brw_winsys.h" - - -struct pipe_context *brw_create_context( struct pipe_screen *screen, - void *priv ) -{ - struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); - - if (!brw) { - debug_printf("%s: failed to alloc context\n", __FUNCTION__); - return GL_FALSE; - } - - /* We want the GLSL compiler to emit code that uses condition codes */ - ctx->Shader.EmitCondCodes = GL_TRUE; - ctx->Shader.EmitNVTempInitialization = GL_TRUE; - - - brw_init_query( brw ); - brw_init_state( brw ); - brw_draw_init( brw ); - - brw->state.dirty.mesa = ~0; - brw->state.dirty.brw = ~0; - - brw->emit_state_always = 0; - - make_empty_list(&brw->query.active_head); - - - return GL_TRUE; -} - -/** - * called from intelDestroyContext() - */ -static void brw_destroy_context( struct brw_context *brw ) +static void brw_destroy_context( struct pipe_context *pipe ) { + struct brw_context *brw = brw_context(pipe); int i; brw_destroy_state(brw); - brw_draw_destroy( brw ); - _mesa_free(brw->wm.compile_data); + brw_draw_cleanup( brw ); + + FREE(brw->wm.compile_data); - for (i = 0; i < brw->state.nr_color_regions; i++) - intel_region_release(&brw->state.color_regions[i]); - brw->state.nr_color_regions = 0; - intel_region_release(&brw->state.depth_region); + for (i = 0; i < brw->curr.fb.nr_cbufs; i++) + pipe_surface_reference(&brw->curr.fb.cbufs[i], NULL); + brw->curr.fb.nr_cbufs = 0; + pipe_surface_reference(&brw->curr.fb.zsbuf, NULL); brw->sws->bo_unreference(brw->curbe.curbe_bo); brw->sws->bo_unreference(brw->vs.prog_bo); @@ -114,3 +81,35 @@ static void brw_destroy_context( struct brw_context *brw ) brw->sws->bo_unreference(brw->cc.state_bo); brw->sws->bo_unreference(brw->cc.vp_bo); } + + +struct pipe_context *brw_create_context(struct pipe_screen *screen) +{ + struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context); + + if (!brw) { + debug_printf("%s: failed to alloc context\n", __FUNCTION__); + return GL_FALSE; + } + + /* We want the GLSL compiler to emit code that uses condition codes */ + //ctx->Shader.EmitCondCodes = GL_TRUE; + //ctx->Shader.EmitNVTempInitialization = GL_TRUE; + + brw->base.destroy = brw_destroy_context; + + brw_init_query( brw ); + brw_init_state( brw ); + brw_draw_init( brw ); + + brw->state.dirty.mesa = ~0; + brw->state.dirty.brw = ~0; + + brw->emit_state_always = 0; + + make_empty_list(&brw->query.active_head); + + + return &brw->base; +} + diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 3a2fece45c..dd782fdba9 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -35,6 +35,7 @@ #include "brw_structs.h" #include "brw_winsys.h" +#include "brw_reg.h" #include "pipe/p_state.h" #include "pipe/p_context.h" #include "tgsi/tgsi_scan.h" @@ -178,8 +179,8 @@ struct brw_fragment_shader { #define PIPE_NEW_VERTEX_ELEMENT 0x2 #define PIPE_NEW_FRAGMENT_SHADER 0x2 #define PIPE_NEW_VERTEX_SHADER 0x2 -#define PIPE_NEW_FRAGMENT_CONSTS 0x2 -#define PIPE_NEW_VERTEX_CONSTS 0x2 +#define PIPE_NEW_FRAGMENT_CONSTANTS 0x2 +#define PIPE_NEW_VERTEX_CONSTANTS 0x2 #define PIPE_NEW_CLIP 0x2 @@ -256,12 +257,8 @@ struct brw_sf_prog_data { GLuint urb_entry_size; }; -struct brw_clip_prog_data { - GLuint curb_read_length; /* user planes? */ - GLuint clip_mode; - GLuint urb_read_length; - GLuint total_grf; -}; + +struct brw_clip_prog_data; struct brw_gs_prog_data { GLuint urb_read_length; @@ -298,15 +295,15 @@ struct brw_vs_ouput_sizes { * This contains pointers to the drawing surfaces and current texture * objects and shader constant buffers (+2). */ -#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) +#define BRW_WM_MAX_SURF (PIPE_MAX_COLOR_BUFS + BRW_MAX_TEX_UNIT + 1) /** * Helpers to convert drawing buffers, textures and constant buffers * to surface binding table indexes, for WM. */ #define SURF_INDEX_DRAW(d) (d) -#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) -#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t)) +#define SURF_INDEX_FRAG_CONST_BUFFER (PIPE_MAX_COLOR_BUFS) +#define SURF_INDEX_TEXTURE(t) (PIPE_MAX_COLOR_BUFS + 1 + (t)) /** * Size of surface binding table for the VS. @@ -457,28 +454,32 @@ struct brw_query_object { */ struct brw_context { - struct pipe_context pipe; + struct pipe_context base; + struct brw_chipset chipset; struct brw_screen *brw_screen; struct brw_winsys_screen *sws; + struct brw_batchbuffer *batch; + GLuint primitive; GLuint reduced_primitive; GLboolean emit_state_always; - GLboolean no_batch_wrap; /* Active vertex program: */ struct { - const struct brw_vertex_shader *vs; - const struct brw_fragment_shader *fs; + const struct brw_vertex_shader *vertex_shader; + const struct brw_fragment_shader *fragment_shader; const struct brw_blend_state *blend; const struct brw_rasterizer_state *rast; - const struct brw_depth_stencil_alpha_state *dsa; + const struct brw_depth_stencil_alpha_state *zstencil; struct pipe_framebuffer_state fb; struct pipe_viewport_state vp; struct pipe_clip_state ucp; + struct pipe_buffer *vertex_constants; + struct pipe_buffer *fragment_constants; } curr; struct { @@ -673,15 +674,6 @@ struct brw_context }; -#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) - - - -/*====================================================================== - * brw_vtbl.c - */ -void brwInitVtbl( struct brw_context *brw ); - /*====================================================================== * brw_queryobj.c @@ -730,9 +722,10 @@ brw_context( struct pipe_context *ctx ) } +#define BRW_IS_965(brw) ((brw)->chipset.is_965) +#define BRW_IS_IGDNG(brw) ((brw)->chipset.is_igdng) +#define BRW_IS_G4X(brw) ((brw)->chipset.is_g4x) -#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1) - #endif diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index f2524d75e2..edc39ff223 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -29,13 +29,16 @@ * Keith Whitwell */ +#include "util/u_memory.h" +#include "util/u_math.h" #include "brw_batchbuffer.h" -#include "intel_regions.h" #include "brw_context.h" #include "brw_defines.h" #include "brw_state.h" #include "brw_util.h" +#include "brw_debug.h" +#include "brw_screen.h" /** @@ -57,7 +60,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) /* PIPE_NEW_CLIP */ if (brw->curr.ucp.nr) { - GLuint nr_planes = 6 + brw->nr_ucp; + GLuint nr_planes = 6 + brw->curr.ucp.nr; nr_clip_regs = (nr_planes * 4 + 15) / 16; } @@ -156,10 +159,6 @@ static GLfloat fixed_plane[6][4] = { */ static void prepare_constant_buffer(struct brw_context *brw) { - const struct brw_vertex_program *vp = - brw_vertex_program_const(brw->vertex_program); - const struct brw_fragment_program *fp = - brw_fragment_program_const(brw->fragment_program); const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; @@ -174,7 +173,7 @@ static void prepare_constant_buffer(struct brw_context *brw) return; } - buf = (GLfloat *) _mesa_calloc(bufsz); + buf = (GLfloat *) CALLOC(bufsz, 1); /* fragment shader constants */ if (brw->curbe.wm_size) { @@ -208,12 +207,12 @@ static void prepare_constant_buffer(struct brw_context *brw) /* Clip planes: */ - assert(brw->nr_ucp <= 6); - for (j = 0; j < brw->nr_ucp; j++) { - buf[offset + i * 4 + 0] = brw->ucp[j][0]; - buf[offset + i * 4 + 1] = brw->ucp[j][1]; - buf[offset + i * 4 + 2] = brw->ucp[j][2]; - buf[offset + i * 4 + 3] = brw->ucp[j][3]; + assert(brw->curr.ucp.nr <= 6); + for (j = 0; j < brw->curr.ucp.nr; j++) { + buf[offset + i * 4 + 0] = brw->curr.ucp.ucp[j][0]; + buf[offset + i * 4 + 1] = brw->curr.ucp.ucp[j][1]; + buf[offset + i * 4 + 2] = brw->curr.ucp.ucp[j][2]; + buf[offset + i * 4 + 3] = brw->curr.ucp.ucp[j][3]; i++; } } @@ -221,23 +220,21 @@ static void prepare_constant_buffer(struct brw_context *brw) /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; - GLuint nr = brw->vs.prog_data->nr_params / 4; + GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT]; + struct pipe_screen *screen = &brw->brw_screen->base; - /* map vs constant buffer */ + const GLfloat *value = screen->buffer_map( screen, + brw->curr.vertex_constants, + PIPE_BUFFER_USAGE_CPU_READ); - /* XXX just use a memcpy here */ - for (i = 0; i < nr; i++) { - const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; - buf[offset + i * 4 + 0] = value[0]; - buf[offset + i * 4 + 1] = value[1]; - buf[offset + i * 4 + 2] = value[2]; - buf[offset + i * 4 + 3] = value[3]; - } + /* XXX: what if user's constant buffer is too small? + */ + memcpy(&buf[offset], value, nr * 4 * sizeof(float)); - /* unmap vs constant buffer */ + screen->buffer_unmap( screen, brw->curr.vertex_constants ); } - if (0) { + if (BRW_DEBUG & DEBUG_CURBE) { for (i = 0; i < sz*16; i+=4) debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); @@ -275,18 +272,22 @@ static void prepare_constant_buffer(struct brw_context *brw) /* Allocate a single page for CURBE entries for this batchbuffer. * They're generally around 64b. */ - brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", - 4096, 1 << 6); + brw->curbe.curbe_bo = brw->sws->bo_alloc(brw->sws, + BRW_BUFFER_TYPE_CURBE, + 4096, 1 << 6); brw->curbe.curbe_next_offset = 0; } brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; brw->curbe.curbe_next_offset += bufsz; - brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64); + brw->curbe.curbe_next_offset = align(brw->curbe.curbe_next_offset, 64); /* Copy data to the buffer: */ - dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf); + brw->sws->bo_subdata(brw->curbe.curbe_bo, + brw->curbe.curbe_offset, + bufsz, + buf); } brw_add_validated_bo(brw, brw->curbe.curbe_bo); @@ -325,8 +326,8 @@ static void emit_constant_buffer(struct brw_context *brw) const struct brw_tracked_state brw_constant_buffer = { .dirty = { - .mesa = (PIPE_NEW_FS_CONSTANTS | - PIPE_NEW_VS_CONSTANTS | + .mesa = (PIPE_NEW_FRAGMENT_CONSTANTS | + PIPE_NEW_VERTEX_CONSTANTS | PIPE_NEW_CLIP), .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_VERTEX_PROGRAM | diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h new file mode 100644 index 0000000000..aee62f7a5b --- /dev/null +++ b/src/gallium/drivers/i965/brw_debug.h @@ -0,0 +1,42 @@ +#ifndef BRW_DEBUG_H +#define BRW_DEBUG_H + +/* ================================================================ + * Debugging: + */ + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_BLIT 0x8 +#define DEBUG_CURBE 0x10 +#define DEBUG_FALLBACKS 0x20 +#define DEBUG_VERBOSE 0x40 +#define DEBUG_BATCH 0x80 +#define DEBUG_PIXEL 0x100 +#define DEBUG_BUFMGR 0x200 +#define DEBUG_unused1 0x400 +#define DEBUG_unused2 0x800 +#define DEBUG_unused3 0x1000 +#define DEBUG_SYNC 0x2000 +#define DEBUG_PRIMS 0x4000 +#define DEBUG_VERTS 0x8000 +#define DEBUG_unused4 0x10000 +#define DEBUG_DMA 0x20000 +#define DEBUG_SANITY 0x40000 +#define DEBUG_SLEEP 0x80000 +#define DEBUG_STATS 0x100000 +#define DEBUG_unused5 0x200000 +#define DEBUG_SINGLE_THREAD 0x400000 +#define DEBUG_WM 0x800000 +#define DEBUG_URB 0x1000000 +#define DEBUG_VS 0x2000000 + +#ifdef DEBUG +extern int BRW_DEBUG; +#else +#define BRW_DEBUG 0 +#endif + + +#endif diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h index 1dc64ddc8f..544d36306c 100644 --- a/src/gallium/drivers/i965/brw_defines.h +++ b/src/gallium/drivers/i965/brw_defines.h @@ -838,13 +838,6 @@ #define R02_PRIM_END 0x1 #define R02_PRIM_START 0x2 -#include "intel_chipset.h" - -#define BRW_IS_G4X(brw) (IS_G4X((brw)->brw_screen->pci_id)) -#define BRW_IS_IGDNG(brw) (IS_IGDNG((brw)->brw_screen->pci_id)) -#define BRW_IS_965(brw) (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))) -#define CMD_PIPELINE_SELECT(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965) -#define CMD_VF_STATISTICS(brw) ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965) #define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \ (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 741537309a..7af490bc5a 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -30,9 +30,9 @@ #include "brw_defines.h" #include "brw_context.h" #include "brw_state.h" +#include "brw_debug.h" #include "brw_batchbuffer.h" -#include "intel_buffer_objects.h" #define FILE_DEBUG_FLAG DEBUG_BATCH @@ -56,26 +56,18 @@ static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = { * programs be immune to the active primitive (ie. cope with all * possibilities). That may not be realistic however. */ -static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) +static GLuint brw_set_prim(struct brw_context *brw, unsigned prim) { - if (INTEL_DEBUG & DEBUG_PRIMS) - _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim)); + if (BRW_DEBUG & DEBUG_PRIMS) + debug_printf("PRIM: %s\n", u_prim_name(prim)); - /* Slight optimization to avoid the GS program when not needed: - */ - if (prim == GL_QUAD_STRIP && - ctx->Light.ShadeModel != GL_FLAT && - ctx->Polygon.FrontMode == GL_FILL && - ctx->Polygon.BackMode == GL_FILL) - prim = GL_TRIANGLE_STRIP; - if (prim != brw->primitive) { brw->primitive = prim; brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; - if (reduced_prim[prim] != brw->intel.reduced_primitive) { - brw->intel.reduced_primitive = reduced_prim[prim]; + if (reduced_prim[prim] != brw->reduced_primitive) { + brw->reduced_primitive = reduced_prim[prim]; brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; } } @@ -84,43 +76,33 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim) } -static GLuint trim(GLenum prim, GLuint length) -{ - if (prim == GL_QUAD_STRIP) - return length > 3 ? (length - length % 2) : 0; - else if (prim == GL_QUADS) - return length - length % 4; - else - return length; -} - -static void brw_emit_prim(struct brw_context *brw, - const struct _mesa_prim *prim, - uint32_t hw_prim) +static enum pipe_error brw_emit_prim(struct brw_context *brw, + unsigned prim, + unsigned start, + unsigned count, + boolean indexed, + uint32_t hw_prim) { struct brw_3d_primitive prim_packet; if (INTEL_DEBUG & DEBUG_PRIMS) - _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), - prim->start, prim->count); + debug_printf("PRIM: %s %d %d\n", u_prim_name(prim), start, count); prim_packet.header.opcode = CMD_3D_PRIM; prim_packet.header.length = sizeof(prim_packet)/4 - 2; prim_packet.header.pad = 0; prim_packet.header.topology = hw_prim; - prim_packet.header.indexed = prim->indexed; + prim_packet.header.indexed = indexed; - prim_packet.verts_per_instance = trim(prim->mode, prim->count); - prim_packet.start_vert_location = prim->start; - if (prim->indexed) + prim_packet.verts_per_instance = count; + prim_packet.start_vert_location = start; + if (indexed) prim_packet.start_vert_location += brw->ib.start_vertex_offset; prim_packet.instance_count = 1; prim_packet.start_instance_location = 0; prim_packet.base_vert_location = prim->basevertex; - /* Can't wrap here, since we rely on the validated state. */ - brw->no_batch_wrap = GL_TRUE; /* If we're set to always flush, do it before and after the primitive emit. * We want to catch both missed flushes that hurt instruction/state cache @@ -128,13 +110,15 @@ static void brw_emit_prim(struct brw_context *brw, * the besides the draw code. */ if (intel->always_flush_cache) { - BEGIN_BATCH(1, IGNORE_CLIPRECTS); + BEGIN_BATCH(1, IGNORE_CLIPRECTS) OUT_BATCH(intel->vtbl.flush_cmd()); ADVANCE_BATCH(); } if (prim_packet.verts_per_instance) { - brw_batchbuffer_data( brw->intel.batch, &prim_packet, - sizeof(prim_packet), LOOP_CLIPRECTS); + ret = brw_batchbuffer_data( brw->intel.batch, &prim_packet, + sizeof(prim_packet), LOOP_CLIPRECTS); + if (ret) + return ret; } if (intel->always_flush_cache) { BEGIN_BATCH(1, IGNORE_CLIPRECTS); @@ -142,34 +126,9 @@ static void brw_emit_prim(struct brw_context *brw, ADVANCE_BATCH(); } - brw->no_batch_wrap = GL_FALSE; + return 0; } -static void brw_merge_inputs( struct brw_context *brw, - const struct gl_client_array *arrays[]) -{ - struct brw_vertex_info old = brw->vb.info; - GLuint i; - - for (i = 0; i < VERT_ATTRIB_MAX; i++) - brw->sws->bo_unreference(brw->vb.inputs[i].bo); - - memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs)); - memset(&brw->vb.info, 0, sizeof(brw->vb.info)); - - for (i = 0; i < VERT_ATTRIB_MAX; i++) { - brw->vb.inputs[i].glarray = arrays[i]; - brw->vb.inputs[i].attrib = (gl_vert_attrib) i; - - if (arrays[i]->StrideB != 0) - brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) << - ((i%16) * 2); - } - - /* Raise statechanges if input sizes have changed. */ - if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0) - brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; -} /* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. @@ -229,14 +188,14 @@ static GLboolean brw_try_draw_prims( struct brw_context *brw, return 0; } -void brw_draw_prims( struct brw_context *brw, - const struct gl_client_array *arrays[], - const struct _mesa_prim *prim, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLboolean index_bounds_valid, - GLuint min_index, - GLuint max_index ) + +static boolean +brw_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) { enum pipe_error ret; @@ -256,15 +215,40 @@ void brw_draw_prims( struct brw_context *brw, ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); assert(ret == 0); } + + return TRUE; } -void brw_draw_init( struct brw_context *brw ) +static boolean +brw_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned mode, + unsigned start, unsigned count) { - struct vbo_context *vbo = vbo_context(ctx); + return brw_draw_range_elements( pipe, index_buffer, + index_size, + 0, 0xffffffff, + mode, + start, count ); +} +static boolean +brw_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return brw_draw_elements(pipe, NULL, 0, mode, start, count); +} + + + +void brw_draw_init( struct brw_context *brw ) +{ /* Register our drawing function: */ - vbo->draw_prims = brw_draw_prims; + brw->base.draw_arrays = brw_draw_arrays; + brw->base.draw_elements = brw_draw_elements; + brw->base.draw_range_elements = brw_draw_range_elements; } void brw_draw_destroy( struct brw_context *brw ) diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h index dc7ca8731d..13f0443a81 100644 --- a/src/gallium/drivers/i965/brw_draw.h +++ b/src/gallium/drivers/i965/brw_draw.h @@ -33,21 +33,8 @@ struct brw_context; -void brw_draw_prims( struct brw_context *brw, - const struct gl_client_array *arrays[], - const struct _mesa_prim *prims, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLboolean index_bounds_valid, - GLuint min_index, - GLuint max_index ); - void brw_draw_init( struct brw_context *brw ); -void brw_draw_destroy( struct brw_context *brw ); +void brw_draw_cleanup( struct brw_context *brw ); -/* brw_draw_current.c - */ -void brw_init_current_values(struct brw_context *brw, - struct gl_client_array *arrays); #endif diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index 1ab65d60c4..7b0860d04c 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -36,8 +36,6 @@ #include "brw_fallback.h" #include "brw_batchbuffer.h" -#include "intel_buffer_objects.h" -#include "intel_tex.h" diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h index 46d52a473b..ac5a623cac 100644 --- a/src/gallium/drivers/i965/brw_eu.h +++ b/src/gallium/drivers/i965/brw_eu.h @@ -33,6 +33,8 @@ #ifndef BRW_EU_H #define BRW_EU_H +#include "util/u_debug.h" + #include "brw_structs.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index eb39be8545..0f2612c181 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -32,8 +32,6 @@ #include "brw_batchbuffer.h" -#include "intel_regions.h" - #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_pipe_debug.c b/src/gallium/drivers/i965/brw_pipe_debug.c deleted file mode 100644 index 34d6d4028a..0000000000 --- a/src/gallium/drivers/i965/brw_pipe_debug.c +++ /dev/null @@ -1,2 +0,0 @@ - if (INTEL_DEBUG & DEBUG_STATS) - cc.cc5.statistics_enable = 1; diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c index 55242ac6ad..a2da1373bf 100644 --- a/src/gallium/drivers/i965/brw_pipe_query.c +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -43,7 +43,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_batchbuffer.h" -#include "intel_reg.h" +#include "brw_reg.h" /** Waits on the query object's BO and totals the results for this query */ static void @@ -165,7 +165,7 @@ brw_prepare_query_begin(struct brw_context *brw) brw->sws->bo_unreference(brw->query.bo); brw->query.bo = NULL; - brw->query.bo = dri_bo_alloc(brw->bufmgr, "query", 4096, 1); + brw->query.bo = brw->sws->bo_alloc(brw->sws, BRW_BUFFER_TYPE_QUERY, 4096, 1); brw->query.index = 0; } diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c new file mode 100644 index 0000000000..b0928adbe4 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -0,0 +1,26 @@ + +static void brw_merge_inputs( struct brw_context *brw, + const struct gl_client_array *arrays[]) +{ + struct brw_vertex_info old = brw->vb.info; + GLuint i; + + for (i = 0; i < VERT_ATTRIB_MAX; i++) + brw->sws->bo_unreference(brw->vb.inputs[i].bo); + + memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs)); + memset(&brw->vb.info, 0, sizeof(brw->vb.info)); + + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + brw->vb.inputs[i].glarray = arrays[i]; + brw->vb.inputs[i].attrib = (gl_vert_attrib) i; + + if (arrays[i]->StrideB != 0) + brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) << + ((i%16) * 2); + } + + /* Raise statechanges if input sizes have changed. */ + if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0) + brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; +} diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h index a640104d71..f428ec9269 100644 --- a/src/gallium/drivers/i965/brw_reg.h +++ b/src/gallium/drivers/i965/brw_reg.h @@ -76,4 +76,40 @@ #define FENCE_YMAJOR 2 + +/* PCI IDs + */ +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + +#define PCI_CHIP_GM45_GM 0x2A42 + +#define PCI_CHIP_IGD_E_G 0x2E02 +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_G45_G 0x2E22 +#define PCI_CHIP_G41_G 0x2E32 +#define PCI_CHIP_B43_G 0x2E42 + +#define PCI_CHIP_ILD_G 0x0042 +#define PCI_CHIP_ILM_G 0x0046 + +struct brw_chipset { + int pci_id:16; + int is_965:1; + int is_igdng:1; + int is_g4x:1; + int pad:13; +}; + + +/* XXX: hacks + */ +#define VERT_RESULT_HPOS 0 /* not always true */ +#define VERT_RESULT_PSIZ 10000 /* disabled */ + + #endif diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c new file mode 100644 index 0000000000..671467989d --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen.c @@ -0,0 +1,365 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_string.h" + +#include "brw_reg.h" +#include "brw_context.h" +#include "brw_screen.h" +#include "brw_buffer.h" +#include "brw_texture.h" +#include "brw_winsys.h" + +#ifdef DEBUG +static const struct debug_named_value debug_names[] = { + { "tex", DEBUG_TEXTURE}, + { "state", DEBUG_STATE}, + { "ioctl", DEBUG_IOCTL}, + { "blit", DEBUG_BLIT}, + { "curbe", DEBUG_CURBE}, + { "fall", DEBUG_FALLBACKS}, + { "verb", DEBUG_VERBOSE}, + { "bat", DEBUG_BATCH}, + { "pix", DEBUG_PIXEL}, + { "buf", DEBUG_BUFMGR}, + { "reg", DEBUG_REGION}, + { "fbo", DEBUG_FBO}, + { "lock", DEBUG_LOCK}, + { "sync", DEBUG_SYNC}, + { "prim", DEBUG_PRIMS }, + { "vert", DEBUG_VERTS }, + { "dri", DEBUG_DRI }, + { "dma", DEBUG_DMA }, + { "san", DEBUG_SANITY }, + { "sleep", DEBUG_SLEEP }, + { "stats", DEBUG_STATS }, + { "tile", DEBUG_TILE }, + { "sing", DEBUG_SINGLE_THREAD }, + { "thre", DEBUG_SINGLE_THREAD }, + { "wm", DEBUG_WM }, + { "urb", DEBUG_URB }, + { "vs", DEBUG_VS }, + { NULL, 0 } +}; + +int BRW_DEBUG = 0; +#endif + + +/* + * Probe functions + */ + + +static const char * +brw_get_vendor(struct pipe_screen *screen) +{ + return "VMware, Inc."; +} + +static const char * +brw_get_name(struct pipe_screen *screen) +{ + static char buffer[128]; + const char *chipset; + + switch (brw_screen(screen)->pci_id) { + case PCI_CHIP_I965_G: + chipset = "I965_G"; + break; + case PCI_CHIP_I965_Q: + chipset = "I965_Q"; + break; + case PCI_CHIP_I965_G_1: + chipset = "I965_G_1"; + break; + case PCI_CHIP_I946_GZ: + chipset = "I946_GZ"; + break; + case PCI_CHIP_I965_GM: + chipset = "I965_GM"; + break; + case PCI_CHIP_I965_GME: + chipset = "I965_GME"; + break; + case PCI_CHIP_GM45_GM: + chipset = "GM45_GM"; + break; + case PCI_CHIP_IGD_E_G: + chipset = "IGD_E_G"; + break; + case PCI_CHIP_Q45_G: + chipset = "Q45_G"; + break; + case PCI_CHIP_G45_G: + chipset = "G45_G"; + break; + case PCI_CHIP_G41_G: + chipset = "G41_G"; + break; + case PCI_CHIP_B43_G: + chipset = "B43_G"; + break; + case PCI_CHIP_ILD_G: + chipset = "ILD_G"; + break; + case PCI_CHIP_ILM_G: + chipset = "ILM_G"; + break; + } + + util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset); + return buffer; +} + +static int +brw_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + +static float +brw_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + +static boolean +brw_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) +{ + static const enum pipe_format tex_supported[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_A8L8_UNORM, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + static const enum pipe_format surface_supported[] = { + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + const enum pipe_format *list; + uint i; + + if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + list = surface_supported; + else + list = tex_supported; + + for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { + if (list[i] == format) + return TRUE; + } + + return FALSE; +} + + +/* + * Fence functions + */ + + +static void +brw_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct brw_screen *is = brw_screen(screen); + + is->iws->fence_reference(is->iws, ptr, fence); +} + +static int +brw_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + struct brw_screen *is = brw_screen(screen); + + return is->iws->fence_signalled(is->iws, fence); +} + +static int +brw_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flags) +{ + struct brw_screen *is = brw_screen(screen); + + return is->iws->fence_finish(is->iws, fence); +} + + +/* + * Generic functions + */ + + +static void +brw_destroy_screen(struct pipe_screen *screen) +{ + struct brw_screen *is = brw_screen(screen); + + if (is->iws) + is->iws->destroy(is->iws); + + FREE(is); +} + +/** + * Create a new brw_screen object + */ +struct pipe_screen * +brw_create_screen(struct intel_winsys *iws, uint pci_id) +{ + struct brw_screen *is; + struct brw_chipset chipset; + +#ifdef DEBUG + BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0); + BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0); +#endif + + memset(&chipset, 0, sizeof chipset); + + chipset.pci_id = pci_id; + + switch (pci_id) { + case PCI_CHIP_I965_G: + case PCI_CHIP_I965_Q: + case PCI_CHIP_I965_G_1: + case PCI_CHIP_I946_GZ: + case PCI_CHIP_I965_GM: + case PCI_CHIP_I965_GME: + chipset.is_965 = TRUE; + break; + + case PCI_CHIP_GM45_GM: + case PCI_CHIP_IGD_E_G: + case PCI_CHIP_Q45_G: + case PCI_CHIP_G45_G: + case PCI_CHIP_G41_G: + case PCI_CHIP_B43_G: + chipset.is_g4x = TRUE; + break; + + case PCI_CHIP_ILD_G: + case PCI_CHIP_ILM_G: + chipset.is_igdng = TRUE; + break; + + default: + debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", + __FUNCTION__, pci_id); + return NULL; + } + + + is = CALLOC_STRUCT(brw_screen); + if (!is) + return NULL; + + is->chipset = chipset; + is->iws = iws; + is->base.winsys = NULL; + is->base.destroy = brw_destroy_screen; + is->base.get_name = brw_get_name; + is->base.get_vendor = brw_get_vendor; + is->base.get_param = brw_get_param; + is->base.get_paramf = brw_get_paramf; + is->base.is_format_supported = brw_is_format_supported; + is->base.fence_reference = brw_fence_reference; + is->base.fence_signalled = brw_fence_signalled; + is->base.fence_finish = brw_fence_finish; + + brw_screen_init_texture_functions(is); + brw_screen_init_buffer_functions(is); + + return &is->base; +} diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index 716b55c52b..79d595d0ad 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -31,6 +31,7 @@ #include "pipe/p_state.h" #include "pipe/p_screen.h" +#include "brw_reg.h" struct brw_winsys_screen; @@ -41,11 +42,8 @@ struct brw_winsys_screen; struct brw_screen { struct pipe_screen base; - + struct brw_chipset chipset; struct brw_winsys_screen *sws; - - boolean is_i945; - uint pci_id; }; /** diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index 54202cbd12..53e8f09e37 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -56,7 +56,7 @@ static void compile_sf_prog( struct brw_context *brw, c.key = *key; c.nr_attrs = util_count_bits(c.key.attrs); c.nr_attr_regs = (c.nr_attrs+1)/2; - c.nr_setup_attrs = util_count_bits(c.key.attrs & DO_SETUP_BITS); + c.nr_setup_attrs = c.key.nr_attrs; c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index 5e1229d22f..0e406f12e1 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -68,8 +68,7 @@ static void upload_sf_vp(struct brw_context *brw) */ /* The scissor only needs to handle the intersection of drawable and - * scissor rect. Clipping to the boundaries of static shared buffers - * for front/back/depth is covered by looping over cliprects in brw_draw.c. + * scissor rect. * * Note that the hardware's coordinates are inclusive, while Mesa's min is * inclusive but max is exclusive. diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c index 1b5f27cc16..97f88b3ab3 100644 --- a/src/gallium/drivers/i965/brw_state_cache.c +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -220,8 +220,8 @@ brw_upload_cache( struct brw_cache *cache, int i; /* Create the buffer object to contain the data */ - bo = dri_bo_alloc(cache->brw->intel.bufmgr, - cache->name[cache_id], data_size, 1 << 6); + bo = brw->sws->bo_alloc(cache->sws, + cache->buffer_type[cache_id], data_size, 1 << 6); /* Set up the memory containing the key, aux_data, and reloc_bufs */ diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c new file mode 100644 index 0000000000..812b761d40 --- /dev/null +++ b/src/gallium/drivers/i965/brw_state_debug.c @@ -0,0 +1,145 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + + + +#include "brw_context.h" +#include "brw_state.h" + + +struct dirty_bit_map { + uint32_t bit; + char *name; + uint32_t count; +}; + +#define DEFINE_BIT(name) {name, #name, 0} + +static struct dirty_bit_map mesa_bits[] = { + DEFINE_BIT(_NEW_MODELVIEW), + DEFINE_BIT(_NEW_PROJECTION), + DEFINE_BIT(_NEW_TEXTURE_MATRIX), + DEFINE_BIT(_NEW_COLOR_MATRIX), + DEFINE_BIT(_NEW_ACCUM), + DEFINE_BIT(_NEW_COLOR), + DEFINE_BIT(_NEW_DEPTH), + DEFINE_BIT(_NEW_EVAL), + DEFINE_BIT(_NEW_FOG), + DEFINE_BIT(_NEW_HINT), + DEFINE_BIT(_NEW_LIGHT), + DEFINE_BIT(_NEW_LINE), + DEFINE_BIT(_NEW_PIXEL), + DEFINE_BIT(_NEW_POINT), + DEFINE_BIT(_NEW_POLYGON), + DEFINE_BIT(_NEW_POLYGONSTIPPLE), + DEFINE_BIT(_NEW_SCISSOR), + DEFINE_BIT(_NEW_STENCIL), + DEFINE_BIT(_NEW_TEXTURE), + DEFINE_BIT(_NEW_TRANSFORM), + DEFINE_BIT(_NEW_VIEWPORT), + DEFINE_BIT(_NEW_PACKUNPACK), + DEFINE_BIT(_NEW_ARRAY), + DEFINE_BIT(_NEW_RENDERMODE), + DEFINE_BIT(_NEW_BUFFERS), + DEFINE_BIT(_NEW_MULTISAMPLE), + DEFINE_BIT(_NEW_TRACK_MATRIX), + DEFINE_BIT(_NEW_PROGRAM), + DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), + {0, 0, 0} +}; + +static struct dirty_bit_map brw_bits[] = { + DEFINE_BIT(BRW_NEW_URB_FENCE), + DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM), + DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM), + DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS), + DEFINE_BIT(BRW_NEW_CURBE_OFFSETS), + DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE), + DEFINE_BIT(BRW_NEW_PRIMITIVE), + DEFINE_BIT(BRW_NEW_CONTEXT), + DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), + DEFINE_BIT(BRW_NEW_PSP), + DEFINE_BIT(BRW_NEW_FENCE), + DEFINE_BIT(BRW_NEW_INDICES), + DEFINE_BIT(BRW_NEW_INDEX_BUFFER), + DEFINE_BIT(BRW_NEW_VERTICES), + DEFINE_BIT(BRW_NEW_BATCH), + DEFINE_BIT(BRW_NEW_DEPTH_BUFFER), + {0, 0, 0} +}; + +static struct dirty_bit_map cache_bits[] = { + DEFINE_BIT(CACHE_NEW_CC_VP), + DEFINE_BIT(CACHE_NEW_CC_UNIT), + DEFINE_BIT(CACHE_NEW_WM_PROG), + DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR), + DEFINE_BIT(CACHE_NEW_SAMPLER), + DEFINE_BIT(CACHE_NEW_WM_UNIT), + DEFINE_BIT(CACHE_NEW_SF_PROG), + DEFINE_BIT(CACHE_NEW_SF_VP), + DEFINE_BIT(CACHE_NEW_SF_UNIT), + DEFINE_BIT(CACHE_NEW_VS_UNIT), + DEFINE_BIT(CACHE_NEW_VS_PROG), + DEFINE_BIT(CACHE_NEW_GS_UNIT), + DEFINE_BIT(CACHE_NEW_GS_PROG), + DEFINE_BIT(CACHE_NEW_CLIP_VP), + DEFINE_BIT(CACHE_NEW_CLIP_UNIT), + DEFINE_BIT(CACHE_NEW_CLIP_PROG), + DEFINE_BIT(CACHE_NEW_SURFACE), + DEFINE_BIT(CACHE_NEW_SURF_BIND), + {0, 0, 0} +}; + + +static void +brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) +{ + int i; + + for (i = 0; i < 32; i++) { + if (bit_map[i].bit == 0) + return; + + if (bit_map[i].bit & bits) + bit_map[i].count++; + } +} + +static void +brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) +{ + int i; + + for (i = 0; i < 32; i++) { + if (bit_map[i].bit == 0) + return; + + fprintf(stderr, "0x%08x: %12d (%s)\n", + bit_map[i].bit, bit_map[i].count, bit_map[i].name); + } +} + diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index 842380e38f..8659e35289 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -45,7 +45,7 @@ const struct brw_tracked_state *atoms[] = { &brw_check_fallback, - &brw_wm_input_sizes, +// &brw_wm_input_sizes, &brw_vs_prog, &brw_gs_prog, &brw_clip_prog, @@ -155,117 +155,6 @@ brw_clear_validated_bos(struct brw_context *brw) brw->state.validated_bo_count = 0; } -struct dirty_bit_map { - uint32_t bit; - char *name; - uint32_t count; -}; - -#define DEFINE_BIT(name) {name, #name, 0} - -static struct dirty_bit_map mesa_bits[] = { - DEFINE_BIT(_NEW_MODELVIEW), - DEFINE_BIT(_NEW_PROJECTION), - DEFINE_BIT(_NEW_TEXTURE_MATRIX), - DEFINE_BIT(_NEW_COLOR_MATRIX), - DEFINE_BIT(_NEW_ACCUM), - DEFINE_BIT(_NEW_COLOR), - DEFINE_BIT(_NEW_DEPTH), - DEFINE_BIT(_NEW_EVAL), - DEFINE_BIT(_NEW_FOG), - DEFINE_BIT(_NEW_HINT), - DEFINE_BIT(_NEW_LIGHT), - DEFINE_BIT(_NEW_LINE), - DEFINE_BIT(_NEW_PIXEL), - DEFINE_BIT(_NEW_POINT), - DEFINE_BIT(_NEW_POLYGON), - DEFINE_BIT(_NEW_POLYGONSTIPPLE), - DEFINE_BIT(_NEW_SCISSOR), - DEFINE_BIT(_NEW_STENCIL), - DEFINE_BIT(_NEW_TEXTURE), - DEFINE_BIT(_NEW_TRANSFORM), - DEFINE_BIT(_NEW_VIEWPORT), - DEFINE_BIT(_NEW_PACKUNPACK), - DEFINE_BIT(_NEW_ARRAY), - DEFINE_BIT(_NEW_RENDERMODE), - DEFINE_BIT(_NEW_BUFFERS), - DEFINE_BIT(_NEW_MULTISAMPLE), - DEFINE_BIT(_NEW_TRACK_MATRIX), - DEFINE_BIT(_NEW_PROGRAM), - DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), - {0, 0, 0} -}; - -static struct dirty_bit_map brw_bits[] = { - DEFINE_BIT(BRW_NEW_URB_FENCE), - DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM), - DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM), - DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS), - DEFINE_BIT(BRW_NEW_CURBE_OFFSETS), - DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE), - DEFINE_BIT(BRW_NEW_PRIMITIVE), - DEFINE_BIT(BRW_NEW_CONTEXT), - DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), - DEFINE_BIT(BRW_NEW_PSP), - DEFINE_BIT(BRW_NEW_FENCE), - DEFINE_BIT(BRW_NEW_INDICES), - DEFINE_BIT(BRW_NEW_INDEX_BUFFER), - DEFINE_BIT(BRW_NEW_VERTICES), - DEFINE_BIT(BRW_NEW_BATCH), - DEFINE_BIT(BRW_NEW_DEPTH_BUFFER), - {0, 0, 0} -}; - -static struct dirty_bit_map cache_bits[] = { - DEFINE_BIT(CACHE_NEW_CC_VP), - DEFINE_BIT(CACHE_NEW_CC_UNIT), - DEFINE_BIT(CACHE_NEW_WM_PROG), - DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR), - DEFINE_BIT(CACHE_NEW_SAMPLER), - DEFINE_BIT(CACHE_NEW_WM_UNIT), - DEFINE_BIT(CACHE_NEW_SF_PROG), - DEFINE_BIT(CACHE_NEW_SF_VP), - DEFINE_BIT(CACHE_NEW_SF_UNIT), - DEFINE_BIT(CACHE_NEW_VS_UNIT), - DEFINE_BIT(CACHE_NEW_VS_PROG), - DEFINE_BIT(CACHE_NEW_GS_UNIT), - DEFINE_BIT(CACHE_NEW_GS_PROG), - DEFINE_BIT(CACHE_NEW_CLIP_VP), - DEFINE_BIT(CACHE_NEW_CLIP_UNIT), - DEFINE_BIT(CACHE_NEW_CLIP_PROG), - DEFINE_BIT(CACHE_NEW_SURFACE), - DEFINE_BIT(CACHE_NEW_SURF_BIND), - {0, 0, 0} -}; - - -static void -brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) -{ - int i; - - for (i = 0; i < 32; i++) { - if (bit_map[i].bit == 0) - return; - - if (bit_map[i].bit & bits) - bit_map[i].count++; - } -} - -static void -brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) -{ - int i; - - for (i = 0; i < 32; i++) { - if (bit_map[i].bit == 0) - return; - - fprintf(stderr, "0x%08x: %12d (%s)\n", - bit_map[i].bit, bit_map[i].count, bit_map[i].name); - } -} /*********************************************************************** * Emit all state: diff --git a/src/gallium/drivers/i965/brw_tex.c b/src/gallium/drivers/i965/brw_tex.c index c33c19ee51..6f7adb6393 100644 --- a/src/gallium/drivers/i965/brw_tex.c +++ b/src/gallium/drivers/i965/brw_tex.c @@ -30,8 +30,6 @@ */ -#include "intel_regions.h" -#include "intel_tex.h" #include "brw_context.h" /** diff --git a/src/gallium/drivers/i965/brw_tex_layout.c b/src/gallium/drivers/i965/brw_tex_layout.c index 813cd31f49..50c30878c6 100644 --- a/src/gallium/drivers/i965/brw_tex_layout.c +++ b/src/gallium/drivers/i965/brw_tex_layout.c @@ -32,9 +32,7 @@ /* Code to layout images in a mipmap tree for i965. */ -#include "intel_mipmap_tree.h" -#include "intel_tex_layout.h" -#include "intel_chipset.h" +#include "brw_tex_layout.h" #define FILE_DEBUG_FLAG DEBUG_MIPTREE diff --git a/src/gallium/drivers/i965/brw_types.h b/src/gallium/drivers/i965/brw_types.h index 87dae13d94..89e08a5c80 100644 --- a/src/gallium/drivers/i965/brw_types.h +++ b/src/gallium/drivers/i965/brw_types.h @@ -15,4 +15,7 @@ typedef float GLfloat; typedef uint8_t GLboolean; +#define GL_FALSE FALSE +#define GL_TRUE TRUE + #endif diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c index 6446e8e761..319e29bfcb 100644 --- a/src/gallium/drivers/i965/brw_vs_surface_state.c +++ b/src/gallium/drivers/i965/brw_vs_surface_state.c @@ -52,7 +52,8 @@ brw_vs_update_constant_buffer(struct brw_context *brw) if (!vp->use_const_buffer) return NULL; - const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", + const_buffer = brw->sws->bo_alloc(brw->sws, + BRW_BUFFER_TYPE_SHADER_CONSTANTS, size, 64); /* _NEW_PROGRAM_CONSTANTS */ diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index 2142db5a4d..82cd8007ac 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -37,6 +37,7 @@ struct brw_winsys_buffer { struct brw_winsys_screen *sws; void *bo; unsigned offset; + unsigned size; }; enum brw_buffer_usage { @@ -63,6 +64,11 @@ enum brw_buffer_type BRW_BUFFER_TYPE_TEXTURE, BRW_BUFFER_TYPE_SCANOUT, /**< a texture used for scanning out from */ BRW_BUFFER_TYPE_VERTEX, + BRW_BUFFER_TYPE_CURBE, + BRW_BUFFER_TYPE_QUERY, + BRW_BUFFER_TYPE_SHADER_CONSTANTS, + BRW_BUFFER_TYPE_WM_SCRATCH, + BRW_BUFFER_TYPE_BATCH, }; @@ -82,6 +88,10 @@ struct brw_batchbuffer { uint8_t *map; uint8_t *ptr; size_t size; + struct { + uint8_t *end_ptr; + } emit; + size_t relocs; size_t max_relocs; @@ -125,15 +135,15 @@ struct brw_winsys_screen { /** * Buffer functions. */ + /*@{*/ /** * Create a buffer. */ - struct brw_winsys_buffer *(*buffer_create)(struct brw_winsys *iws, - unsigned size, - unsigned alignment, - enum brw_buffer_type type); - + struct brw_winsys_buffer *(*bo_alloc)( struct brw_winsys_screen *sws, + enum brw_buffer_type type, + unsigned size, + unsigned alignment ); /* Reference and unreference buffers: */ @@ -146,6 +156,11 @@ struct brw_winsys_screen { unsigned offset, struct brw_winsys_buffer *b2); + void (*bo_subdata)(struct brw_winsys_buffer *dst, + size_t offset, + size_t size, + const void *data); + /** * Map a buffer. */ @@ -159,17 +174,6 @@ struct brw_winsys_screen { void (*buffer_unmap)(struct brw_winsys *iws, struct brw_winsys_buffer *buffer); - /** - * Write to a buffer. - * - * Arguments follows pipe_buffer_write. - */ - int (*buffer_write)(struct brw_winsys *iws, - struct brw_winsys_buffer *dst, - size_t offset, - size_t size, - const void *data); - void (*buffer_destroy)(struct brw_winsys *iws, struct brw_winsys_buffer *buffer); /*@}*/ @@ -208,14 +212,14 @@ struct brw_winsys_screen { /** - * Create i915 pipe_screen. + * Create brw pipe_screen. */ -struct pipe_screen *i915_create_screen(struct brw_winsys *iws, unsigned pci_id); +struct pipe_screen *brw_create_screen(struct brw_winsys *iws, unsigned pci_id); /** - * Create a i915 pipe_context. + * Create a brw pipe_context. */ -struct pipe_context *i915_create_context(struct pipe_screen *screen); +struct pipe_context *brw_create_context(struct pipe_screen *screen); /** * Get the brw_winsys buffer backing the texture. @@ -223,7 +227,7 @@ struct pipe_context *i915_create_context(struct pipe_screen *screen); * TODO UGLY */ struct pipe_texture; -boolean i915_get_texture_buffer_brw(struct pipe_texture *texture, +boolean brw_get_texture_buffer_brw(struct pipe_texture *texture, struct brw_winsys_buffer **buffer, unsigned *stride); @@ -232,10 +236,10 @@ boolean i915_get_texture_buffer_brw(struct pipe_texture *texture, * * TODO UGLY */ -struct pipe_texture * i915_texture_blanket_brw(struct pipe_screen *screen, - struct pipe_texture *tmplt, - unsigned pitch, - struct brw_winsys_buffer *buffer); +struct pipe_texture * brw_texture_blanket(struct pipe_screen *screen, + struct pipe_texture *tmplt, + unsigned pitch, + struct brw_winsys_buffer *buffer); diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 32b8900bac..284cf42f8b 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -310,7 +310,7 @@ static void brw_wm_populate_key( struct brw_context *brw, } /* CACHE_NEW_VS_PROG */ - key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS; + key->vp_outputs_written = brw->vs.prog_data->outputs_written; /* bitmask */ /* The unique fragment program ID */ key->program_string_id = fp->id; diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index 958c00d3e0..16a2324049 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -272,10 +272,10 @@ static void upload_wm_unit( struct brw_context *brw ) brw->wm.scratch_bo = NULL; } if (brw->wm.scratch_bo == NULL) { - brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr, - "wm scratch", - total, - 4096); + brw->wm.scratch_bo = brw->sws->bo_alloc(brw->sws, + BRW_BUFFER_TYPE_WM_SCRATCH, + total, + 4096); } } diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index 5045c9b4a6..e1ed6438dc 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -30,11 +30,7 @@ */ -#include "intel_mipmap_tree.h" #include "brw_batchbuffer.h" -#include "intel_tex.h" -#include "intel_fbo.h" - #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" @@ -365,7 +361,8 @@ brw_wm_update_constant_buffer(struct brw_context *brw) if (!fp->use_const_buffer) return NULL; - const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", + const_buffer = drm_intel_bo_alloc(intel->bufmgr, + BRW_BUFFER_TYPE_SHADER_CONSTANTS, size, 64); /* _NEW_PROGRAM_CONSTANTS */ @@ -686,7 +683,7 @@ static void prepare_wm_surfaces(struct brw_context *brw ) } old_nr_surfaces = brw->wm.nr_surfaces; - brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; + brw->wm.nr_surfaces = PIPE_MAX_COLOR_BUFS; if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; diff --git a/src/gallium/drivers/i965/intel_chipset.h b/src/gallium/drivers/i965/intel_chipset.h deleted file mode 100644 index 3c38f1676c..0000000000 --- a/src/gallium/drivers/i965/intel_chipset.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright © 2007 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -#define PCI_CHIP_I810 0x7121 -#define PCI_CHIP_I810_DC100 0x7123 -#define PCI_CHIP_I810_E 0x7125 -#define PCI_CHIP_I815 0x1132 - -#define PCI_CHIP_I830_M 0x3577 -#define PCI_CHIP_845_G 0x2562 -#define PCI_CHIP_I855_GM 0x3582 -#define PCI_CHIP_I865_G 0x2572 - -#define PCI_CHIP_I915_G 0x2582 -#define PCI_CHIP_E7221_G 0x258A -#define PCI_CHIP_I915_GM 0x2592 -#define PCI_CHIP_I945_G 0x2772 -#define PCI_CHIP_I945_GM 0x27A2 -#define PCI_CHIP_I945_GME 0x27AE - -#define PCI_CHIP_Q35_G 0x29B2 -#define PCI_CHIP_G33_G 0x29C2 -#define PCI_CHIP_Q33_G 0x29D2 - -#define PCI_CHIP_IGD_GM 0xA011 -#define PCI_CHIP_IGD_G 0xA001 - -#define IS_IGDGM(devid) (devid == PCI_CHIP_IGD_GM) -#define IS_IGDG(devid) (devid == PCI_CHIP_IGD_G) -#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid)) - -#define PCI_CHIP_I965_G 0x29A2 -#define PCI_CHIP_I965_Q 0x2992 -#define PCI_CHIP_I965_G_1 0x2982 -#define PCI_CHIP_I946_GZ 0x2972 -#define PCI_CHIP_I965_GM 0x2A02 -#define PCI_CHIP_I965_GME 0x2A12 - -#define PCI_CHIP_GM45_GM 0x2A42 - -#define PCI_CHIP_IGD_E_G 0x2E02 -#define PCI_CHIP_Q45_G 0x2E12 -#define PCI_CHIP_G45_G 0x2E22 -#define PCI_CHIP_G41_G 0x2E32 - -#define PCI_CHIP_ILD_G 0x0042 -#define PCI_CHIP_ILM_G 0x0046 - -#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ - devid == PCI_CHIP_I915_GM || \ - devid == PCI_CHIP_I945_GM || \ - devid == PCI_CHIP_I945_GME || \ - devid == PCI_CHIP_I965_GM || \ - devid == PCI_CHIP_I965_GME || \ - devid == PCI_CHIP_GM45_GM || \ - IS_IGD(devid) || \ - devid == PCI_CHIP_ILM_G) - -#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \ - devid == PCI_CHIP_Q45_G || \ - devid == PCI_CHIP_G45_G || \ - devid == PCI_CHIP_G41_G) -#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM) -#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) - -#define IS_ILD(devid) (devid == PCI_CHIP_ILD_G) -#define IS_ILM(devid) (devid == PCI_CHIP_ILM_G) -#define IS_IGDNG(devid) (IS_ILD(devid) || IS_ILM(devid)) - -#define IS_915(devid) (devid == PCI_CHIP_I915_G || \ - devid == PCI_CHIP_E7221_G || \ - devid == PCI_CHIP_I915_GM) - -#define IS_945(devid) (devid == PCI_CHIP_I945_G || \ - devid == PCI_CHIP_I945_GM || \ - devid == PCI_CHIP_I945_GME || \ - devid == PCI_CHIP_G33_G || \ - devid == PCI_CHIP_Q33_G || \ - devid == PCI_CHIP_Q35_G || IS_IGD(devid)) - -#define IS_965(devid) (devid == PCI_CHIP_I965_G || \ - devid == PCI_CHIP_I965_Q || \ - devid == PCI_CHIP_I965_G_1 || \ - devid == PCI_CHIP_I965_GM || \ - devid == PCI_CHIP_I965_GME || \ - devid == PCI_CHIP_I946_GZ || \ - IS_G4X(devid) || \ - IS_IGDNG(devid)) - -#define IS_9XX(devid) (IS_915(devid) || \ - IS_945(devid) || \ - IS_965(devid)) diff --git a/src/gallium/drivers/i965/intel_tex_format.c b/src/gallium/drivers/i965/intel_tex_format.c deleted file mode 100644 index c62ecdadf0..0000000000 --- a/src/gallium/drivers/i965/intel_tex_format.c +++ /dev/null @@ -1,28 +0,0 @@ -#include "intel_context.h" -#include "intel_tex.h" -#include "intel_chipset.h" - - - - -int intel_compressed_num_bytes(GLuint mesaFormat) -{ - int bytes = 0; - switch(mesaFormat) { - - case MESA_FORMAT_RGB_FXT1: - case MESA_FORMAT_RGBA_FXT1: - case MESA_FORMAT_RGB_DXT1: - case MESA_FORMAT_RGBA_DXT1: - bytes = 2; - break; - - case MESA_FORMAT_RGBA_DXT3: - case MESA_FORMAT_RGBA_DXT5: - bytes = 4; - default: - break; - } - - return bytes; -} diff --git a/src/gallium/drivers/i965/intel_tex_layout.c b/src/gallium/drivers/i965/intel_tex_layout.c index 1cdab49e5e..7e0ca553f2 100644 --- a/src/gallium/drivers/i965/intel_tex_layout.c +++ b/src/gallium/drivers/i965/intel_tex_layout.c @@ -30,9 +30,7 @@ * Michel Dänzer */ -#include "intel_mipmap_tree.h" #include "intel_tex_layout.h" -#include "intel_context.h" void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h) { -- cgit v1.2.3 From 4dd2f6640b70e2313f8771f7588aa49a861153aa Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 25 Oct 2009 00:02:16 +0100 Subject: i965g: more work on compiling, particularly the brw_draw files --- src/gallium/auxiliary/util/u_debug.c | 27 +++ src/gallium/auxiliary/util/u_prim.h | 2 + src/gallium/auxiliary/util/u_upload_mgr.h | 2 + src/gallium/drivers/i965/Makefile | 2 +- src/gallium/drivers/i965/brw_batchbuffer.c | 198 +++++++++++++++ src/gallium/drivers/i965/brw_batchbuffer.h | 14 +- src/gallium/drivers/i965/brw_cc.c | 8 +- src/gallium/drivers/i965/brw_clip.c | 4 +- src/gallium/drivers/i965/brw_clip_state.c | 4 +- src/gallium/drivers/i965/brw_context.c | 2 +- src/gallium/drivers/i965/brw_context.h | 68 ++++-- src/gallium/drivers/i965/brw_curbe.c | 13 +- src/gallium/drivers/i965/brw_draw.c | 165 +++++++------ src/gallium/drivers/i965/brw_draw.h | 3 +- src/gallium/drivers/i965/brw_draw_upload.c | 372 +++++++++++++++++------------ src/gallium/drivers/i965/brw_eu.c | 5 +- src/gallium/drivers/i965/brw_eu_debug.c | 13 +- src/gallium/drivers/i965/brw_misc_state.c | 18 +- src/gallium/drivers/i965/brw_pipe_flush.c | 3 + src/gallium/drivers/i965/brw_pipe_shader.c | 19 ++ src/gallium/drivers/i965/brw_pipe_vertex.c | 25 +- src/gallium/drivers/i965/brw_screen.h | 22 ++ src/gallium/drivers/i965/brw_sf.c | 2 +- src/gallium/drivers/i965/brw_sf_state.c | 39 +-- src/gallium/drivers/i965/brw_state.h | 6 +- src/gallium/drivers/i965/brw_state_batch.c | 4 +- src/gallium/drivers/i965/brw_swtnl.c | 6 +- src/gallium/drivers/i965/brw_winsys.h | 7 + src/gallium/drivers/i965/brw_wm.c | 2 +- src/gallium/drivers/i965/brw_wm.h | 8 +- src/gallium/drivers/i965/brw_wm_glsl.c | 28 --- src/gallium/drivers/i965/brw_wm_pass0.c | 32 +-- src/mesa/state_tracker/st_draw.c | 3 +- 33 files changed, 722 insertions(+), 404 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_batchbuffer.c (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c index 96d400c839..321ac59a7d 100644 --- a/src/gallium/auxiliary/util/u_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -69,6 +69,7 @@ #include "util/u_stream.h" #include "util/u_math.h" #include "util/u_tile.h" +#include "util/u_prim.h" #ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY @@ -600,6 +601,32 @@ const char *pf_name( enum pipe_format format ) } + +static const struct debug_named_value pipe_prim_names[] = { +#ifdef DEBUG + DEBUG_NAMED_VALUE(PIPE_PRIM_POINTS), + DEBUG_NAMED_VALUE(PIPE_PRIM_LINES), + DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_LOOP), + DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_STRIP), + DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLES), + DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_STRIP), + DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_FAN), + DEBUG_NAMED_VALUE(PIPE_PRIM_QUADS), + DEBUG_NAMED_VALUE(PIPE_PRIM_QUAD_STRIP), + DEBUG_NAMED_VALUE(PIPE_PRIM_POLYGON), +#endif + DEBUG_NAMED_VALUE_END +}; + + +const char *u_prim_name( unsigned prim ) +{ + return debug_dump_enum(pipe_prim_names, prim); +} + + + + #ifdef DEBUG void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index a9b533eea7..7434329962 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -135,4 +135,6 @@ static INLINE unsigned u_reduced_prim( unsigned pipe_prim ) } } +const char *u_prim_name( unsigned pipe_prim ); + #endif diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h index 745b5834af..d414a1f2f6 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.h +++ b/src/gallium/auxiliary/util/u_upload_mgr.h @@ -32,6 +32,8 @@ #ifndef U_UPLOAD_MGR_H #define U_UPLOAD_MGR_H +#include "pipe/p_error.h" + struct pipe_screen; struct pipe_buffer; struct u_upload_mgr; diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 40c8364824..40e8aa8786 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -61,7 +61,7 @@ C_SOURCES = \ brw_wm_state.c \ brw_wm_surface_state.c \ brw_bo.c \ - intel_batchbuffer.c \ + brw_batchbuffer.c \ intel_tex_layout.c include ../../Makefile.template diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c new file mode 100644 index 0000000000..8bcac76ede --- /dev/null +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -0,0 +1,198 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "brw_batchbuffer.h" +#include "brw_decode.h" +#include "brw_reg.h" +#include "brw_winsys.h" + + +void +brw_batchbuffer_reset(struct brw_batchbuffer *batch) +{ + struct intel_context *intel = batch->intel; + + if (batch->buf != NULL) { + brw->sws->bo_unreference(batch->buf); + batch->buf = NULL; + } + + if (!batch->buffer && intel->ttm == GL_TRUE) + batch->buffer = malloc (intel->maxBatchSize); + + batch->buf = batch->sws->bo_alloc(batch->sws, + BRW_BUFFER_TYPE_BATCH, + intel->maxBatchSize, 4096); + if (batch->buffer) + batch->map = batch->buffer; + else { + batch->sws->bo_map(batch->buf, GL_TRUE); + batch->map = batch->buf->virtual; + } + batch->size = intel->maxBatchSize; + batch->ptr = batch->map; + batch->dirty_state = ~0; + batch->cliprect_mode = IGNORE_CLIPRECTS; +} + +struct brw_batchbuffer * +brw_batchbuffer_alloc(struct brw_winsys_screen *sws) +{ + struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer); + + batch->sws = sws; + brw_batchbuffer_reset(batch); + + return batch; +} + +void +brw_batchbuffer_free(struct brw_batchbuffer *batch) +{ + if (batch->map) { + dri_bo_unmap(batch->buf); + batch->map = NULL; + } + + brw->sws->bo_unreference(batch->buf); + batch->buf = NULL; + FREE(batch); +} + + +void +_brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file, + int line) +{ + struct intel_context *intel = batch->intel; + GLuint used = batch->ptr - batch->map; + + if (used == 0) + return; + + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch->buf; + batch->sws->bo_reference(intel->first_post_swapbuffers_batch); + } + + if (intel->first_post_swapbuffers_batch == NULL) { + intel->first_post_swapbuffers_batch = intel->batch->buf; + batch->sws->bo_reference(intel->first_post_swapbuffers_batch); + } + + + if (INTEL_DEBUG & DEBUG_BATCH) + fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, + used); + + /* Emit a flush if the bufmgr doesn't do it for us. */ + if (intel->always_flush_cache || !intel->ttm) { + *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd(); + batch->ptr += 4; + used = batch->ptr - batch->map; + } + + /* Round batchbuffer usage to 2 DWORDs. */ + + if ((used & 4) == 0) { + *(GLuint *) (batch->ptr) = 0; /* noop */ + batch->ptr += 4; + used = batch->ptr - batch->map; + } + + /* Mark the end of the buffer. */ + *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */ + batch->ptr += 4; + used = batch->ptr - batch->map; + + batch->sws->bo_unmap(batch->buf); + + batch->map = NULL; + batch->ptr = NULL; + + batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 ); + + if (INTEL_DEBUG & DEBUG_BATCH) { + dri_bo_map(batch->buf, GL_FALSE); + intel_decode(batch->buf->virtual, used / 4, batch->buf->offset, + brw->brw_screen->pci_id); + dri_bo_unmap(batch->buf); + } + + if (INTEL_DEBUG & DEBUG_SYNC) { + fprintf(stderr, "waiting for idle\n"); + dri_bo_map(batch->buf, GL_TRUE); + dri_bo_unmap(batch->buf); + } + + /* Reset the buffer: + */ + brw_batchbuffer_reset(batch); +} + + +/* This is the only way buffers get added to the validate list. + */ +GLboolean +brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, + struct brw_winsys_buffer *buffer, + uint32_t read_domains, uint32_t write_domain, + uint32_t delta) +{ + int ret; + + if (batch->ptr - batch->map > batch->buf->size) + _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n", + batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); + + ret = batch->sws->bo_emit_reloc(batch->buf, + read_domains, + write_domain, + delta, + batch->ptr - batch->map, + buffer); + + /* + * Using the old buffer offset, write in what the right data would be, in case + * the buffer doesn't move and we can short-circuit the relocation processing + * in the kernel + */ + brw_batchbuffer_emit_dword (batch, buffer->offset + delta); + + return GL_TRUE; +} + +void +brw_batchbuffer_data(struct brw_batchbuffer *batch, + const void *data, GLuint bytes, + enum cliprect_mode cliprect_mode) +{ + assert((bytes & 3) == 0); + brw_batchbuffer_require_space(batch, bytes); + __memcpy(batch->ptr, data, bytes); + batch->ptr += bytes; +} diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index b8492882e1..25bb9cefca 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -33,18 +33,16 @@ void brw_batchbuffer_reset(struct brw_batchbuffer *batch); * Consider it a convenience function wrapping multple * intel_buffer_dword() calls. */ -void brw_batchbuffer_data(struct brw_batchbuffer *batch, +int brw_batchbuffer_data(struct brw_batchbuffer *batch, const void *data, GLuint bytes, enum cliprect_mode cliprect_mode); -void brw_batchbuffer_release_space(struct brw_batchbuffer *batch, - GLuint bytes); -GLboolean brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, - struct brw_winsys_buffer *buffer, - uint32_t read_domains, - uint32_t write_domain, - uint32_t offset); +int brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, + struct brw_winsys_buffer *buffer, + uint32_t read_domains, + uint32_t write_domain, + uint32_t offset); /* Inline functions - might actually be better off with these * non-inlined. Certainly better off switching all command packets to diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index 76759304eb..ca10bc73f6 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -57,7 +57,7 @@ static void calc_sane_viewport( const struct pipe_viewport_state *vp, svp->far = 1; } -static void prepare_cc_vp( struct brw_context *brw ) +static int prepare_cc_vp( struct brw_context *brw ) { struct brw_cc_viewport ccv; struct sane_viewport svp; @@ -72,6 +72,8 @@ static void prepare_cc_vp( struct brw_context *brw ) brw->sws->bo_unreference(brw->cc.vp_bo); brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); + + return 0; } const struct brw_tracked_state brw_cc_vp = { @@ -158,7 +160,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) return bo; } -static void prepare_cc_unit( struct brw_context *brw ) +static int prepare_cc_unit( struct brw_context *brw ) { struct brw_cc_unit_key key; @@ -172,6 +174,8 @@ static void prepare_cc_unit( struct brw_context *brw ) if (brw->cc.state_bo == NULL) brw->cc.state_bo = cc_unit_create_from_key(brw, &key); + + return 0; } const struct brw_tracked_state brw_cc_unit = { diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 622d9dba96..1a52fa771b 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -146,7 +146,7 @@ static void compile_clip_prog( struct brw_context *brw, /* Calculate interpolants for triangle and line rasterization. */ -static void upload_clip_prog(struct brw_context *brw) +static int upload_clip_prog(struct brw_context *brw) { struct brw_clip_prog_key key; @@ -173,6 +173,8 @@ static void upload_clip_prog(struct brw_context *brw) &brw->clip.prog_data); if (brw->clip.prog_bo == NULL) compile_clip_prog( brw, &key ); + + return 0; } diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 25b8c6372f..bf4e6f5103 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -159,7 +159,7 @@ clip_unit_create_from_key(struct brw_context *brw, return bo; } -static void upload_clip_unit( struct brw_context *brw ) +static int upload_clip_unit( struct brw_context *brw ) { struct brw_clip_unit_key key; @@ -173,6 +173,8 @@ static void upload_clip_unit( struct brw_context *brw ) if (brw->clip.state_bo == NULL) { brw->clip.state_bo = clip_unit_create_from_key(brw, &key); } + + return 0; } const struct brw_tracked_state brw_clip_unit = { diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index e9605bafe6..e10b7d8bf5 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -105,7 +105,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen) brw->state.dirty.mesa = ~0; brw->state.dirty.brw = ~0; - brw->emit_state_always = 0; + brw->flags.always_emit_state = 0; make_empty_list(&brw->query.active_head); diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index dd782fdba9..7ead641811 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -182,6 +182,8 @@ struct brw_fragment_shader { #define PIPE_NEW_FRAGMENT_CONSTANTS 0x2 #define PIPE_NEW_VERTEX_CONSTANTS 0x2 #define PIPE_NEW_CLIP 0x2 +#define PIPE_NEW_INDEX_BUFFER 0x2 +#define PIPE_NEW_INDEX_RANGE 0x2 #define BRW_NEW_URB_FENCE 0x1 @@ -387,8 +389,8 @@ struct brw_cache { */ struct brw_tracked_state { struct brw_state_flags dirty; - void (*prepare)( struct brw_context *brw ); - void (*emit)( struct brw_context *brw ); + int (*prepare)( struct brw_context *brw ); + int (*emit)( struct brw_context *brw ); }; /* Flags for brw->state.cache. @@ -465,9 +467,7 @@ struct brw_context GLuint primitive; GLuint reduced_primitive; - GLboolean emit_state_always; - - /* Active vertex program: + /* Active state from the state tracker: */ struct { const struct brw_vertex_shader *vertex_shader; @@ -475,11 +475,31 @@ struct brw_context const struct brw_blend_state *blend; const struct brw_rasterizer_state *rast; const struct brw_depth_stencil_alpha_state *zstencil; + + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned num_vertex_elements; + unsigned num_vertex_buffers; + struct pipe_framebuffer_state fb; struct pipe_viewport_state vp; struct pipe_clip_state ucp; struct pipe_buffer *vertex_constants; struct pipe_buffer *fragment_constants; + + /** + * Index buffer for this draw_prims call. + * + * Updates are signaled by PIPE_NEW_INDEX_BUFFER. + */ + struct pipe_buffer *index_buffer; + unsigned index_size; + + /* Updates are signalled by PIPE_NEW_INDEX_RANGE: + */ + unsigned min_index; + unsigned max_index; + } curr; struct { @@ -504,30 +524,26 @@ struct brw_context struct brw_cached_batch_item *cached_batch_items; struct { - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned num_vertex_element; - unsigned num_vertex_buffer; - struct u_upload_mgr *upload_vertex; struct u_upload_mgr *upload_index; - - /* Summary of size and varying of active arrays, so we can check - * for changes to this state: + /* Information on uploaded vertex buffers: */ - struct brw_vertex_info info; - unsigned int min_index, max_index; + struct { + unsigned stride; /* in bytes between successive vertices */ + unsigned offset; /* in bytes, of first vertex in bo */ + unsigned vertex_count; /* count of valid vertices which may be accessed */ + struct brw_winsys_buffer *bo; + } vb[PIPE_MAX_ATTRIBS]; + + struct { + } ve[PIPE_MAX_ATTRIBS]; + + unsigned nr_vb; /* currently the same as curr.num_vertex_buffers */ + unsigned nr_ve; /* currently the same as curr.num_vertex_elements */ } vb; struct { - /** - * Index buffer for this draw_prims call. - * - * Updates are signaled by BRW_NEW_INDICES. - */ - const struct _mesa_index_buffer *ib; - /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */ struct brw_winsys_buffer *bo; unsigned int offset; @@ -668,6 +684,14 @@ struct brw_context int index; GLboolean active; } query; + + struct { + unsigned always_emit_state:1; + unsigned always_flush_batch:1; + unsigned force_swtnl:1; + unsigned no_swtnl:1; + } flags; + /* Used to give every program string a unique id */ GLuint program_id; diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index edc39ff223..278ffa4ca2 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -48,7 +48,7 @@ * constants. That greatly reduces the demand for space in the CURBE. * Some of the comments within are dated... */ -static void calculate_curbe_offsets( struct brw_context *brw ) +static int calculate_curbe_offsets( struct brw_context *brw ) { /* CACHE_NEW_WM_PROG */ const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; @@ -104,6 +104,8 @@ static void calculate_curbe_offsets( struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; } + + return 0; } @@ -157,7 +159,7 @@ static GLfloat fixed_plane[6][4] = { * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ -static void prepare_constant_buffer(struct brw_context *brw) +static int prepare_constant_buffer(struct brw_context *brw) { const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); @@ -170,7 +172,7 @@ static void prepare_constant_buffer(struct brw_context *brw) brw->curbe.last_buf = NULL; brw->curbe.last_bufsz = 0; } - return; + return 0; } buf = (GLfloat *) CALLOC(bufsz, 1); @@ -305,9 +307,11 @@ static void prepare_constant_buffer(struct brw_context *brw) * flushes as necessary when doublebuffering of CURBEs isn't * possible. */ + + return 0; } -static void emit_constant_buffer(struct brw_context *brw) +static int emit_constant_buffer(struct brw_context *brw) { GLuint sz = brw->curbe.total_size; @@ -322,6 +326,7 @@ static void emit_constant_buffer(struct brw_context *brw) (sz - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); + return 0; } const struct brw_tracked_state brw_constant_buffer = { diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 7af490bc5a..b5fe7c9601 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -26,15 +26,18 @@ **************************************************************************/ +#include "util/u_prim.h" +#include "util/u_upload_mgr.h" + #include "brw_draw.h" #include "brw_defines.h" #include "brw_context.h" #include "brw_state.h" #include "brw_debug.h" +#include "brw_screen.h" #include "brw_batchbuffer.h" -#define FILE_DEBUG_FLAG DEBUG_BATCH static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = { _3DPRIM_POINTLIST, @@ -56,18 +59,21 @@ static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = { * programs be immune to the active primitive (ie. cope with all * possibilities). That may not be realistic however. */ -static GLuint brw_set_prim(struct brw_context *brw, unsigned prim) +static int brw_set_prim(struct brw_context *brw, unsigned prim ) { if (BRW_DEBUG & DEBUG_PRIMS) debug_printf("PRIM: %s\n", u_prim_name(prim)); if (prim != brw->primitive) { + unsigned reduced_prim; + brw->primitive = prim; brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; - if (reduced_prim[prim] != brw->reduced_primitive) { - brw->reduced_primitive = reduced_prim[prim]; + reduced_prim = u_reduced_prim(prim); + if (reduced_prim != brw->reduced_primitive) { + brw->reduced_primitive = reduced_prim; brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; } } @@ -77,17 +83,14 @@ static GLuint brw_set_prim(struct brw_context *brw, unsigned prim) -static enum pipe_error brw_emit_prim(struct brw_context *brw, - unsigned prim, - unsigned start, - unsigned count, - boolean indexed, - uint32_t hw_prim) +static int brw_emit_prim(struct brw_context *brw, + unsigned start, + unsigned count, + boolean indexed, + uint32_t hw_prim) { struct brw_3d_primitive prim_packet; - - if (INTEL_DEBUG & DEBUG_PRIMS) - debug_printf("PRIM: %s %d %d\n", u_prim_name(prim), start, count); + int ret; prim_packet.header.opcode = CMD_3D_PRIM; prim_packet.header.length = sizeof(prim_packet)/4 - 2; @@ -101,7 +104,7 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw, prim_packet.start_vert_location += brw->ib.start_vertex_offset; prim_packet.instance_count = 1; prim_packet.start_instance_location = 0; - prim_packet.base_vert_location = prim->basevertex; + prim_packet.base_vert_location = 0; // prim->basevertex; XXX: add this to gallium /* If we're set to always flush, do it before and after the primitive emit. @@ -109,20 +112,20 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw, * and missed flushes of the render cache as it heads to other parts of * the besides the draw code. */ - if (intel->always_flush_cache) { - BEGIN_BATCH(1, IGNORE_CLIPRECTS) - OUT_BATCH(intel->vtbl.flush_cmd()); + if (0) { + BEGIN_BATCH(1, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); ADVANCE_BATCH(); } if (prim_packet.verts_per_instance) { - ret = brw_batchbuffer_data( brw->intel.batch, &prim_packet, + ret = brw_batchbuffer_data( brw->batch, &prim_packet, sizeof(prim_packet), LOOP_CLIPRECTS); if (ret) return ret; } - if (intel->always_flush_cache) { + if (0) { BEGIN_BATCH(1, IGNORE_CLIPRECTS); - OUT_BATCH(intel->vtbl.flush_cmd()); + OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); ADVANCE_BATCH(); } @@ -133,44 +136,24 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw, /* May fail if out of video memory for texture or vbo upload, or on * fallback conditions. */ -static GLboolean brw_try_draw_prims( struct brw_context *brw, - const struct gl_client_array *arrays[], - const struct _mesa_prim *prim, - GLuint nr_prims, - const struct _mesa_index_buffer *ib, - GLuint min_index, - GLuint max_index ) +static int +try_draw_range_elements(struct brw_context *brw, + struct pipe_buffer *index_buffer, + unsigned hw_prim, + unsigned start, unsigned count) { - struct brw_context *brw = brw_context(ctx); - GLboolean retval = GL_FALSE; - GLboolean warn = GL_FALSE; - GLboolean first_time = GL_TRUE; - uint32_t hw_prim; - GLuint i; - - if (ctx->NewState) - _mesa_update_state( ctx ); - - /* Bind all inputs, derive varying and size information: - */ - brw_merge_inputs( brw, arrays ); - - brw->ib.ib = ib; - brw->state.dirty.brw |= BRW_NEW_INDICES; - - brw->vb.min_index = min_index; - brw->vb.max_index = max_index; - brw->state.dirty.brw |= BRW_NEW_VERTICES; - - hw_prim = brw_set_prim(brw, prim[i].mode); + int ret; - brw_validate_state(brw); + ret = brw_validate_state(brw); + if (ret) + return ret; /* Check that we can fit our state in with our existing batchbuffer, or * flush otherwise. */ - ret = dri_bufmgr_check_aperture_space(brw->state.validated_bos, - brw->state.validated_bo_count); + ret = brw->sws->check_aperture_space(brw->sws, + brw->state.validated_bos, + brw->state.validated_bo_count); if (ret) return ret; @@ -178,12 +161,12 @@ static GLboolean brw_try_draw_prims( struct brw_context *brw, if (ret) return ret; - ret = brw_emit_prim(brw, &prim[i], hw_prim); + ret = brw_emit_prim(brw, start, count, index_buffer != NULL, hw_prim); if (ret) return ret; - if (intel->always_flush_batch) - brw_batchbuffer_flush(intel->batch); + if (brw->flags.always_flush_batch) + brw_batchbuffer_flush(brw->batch); return 0; } @@ -197,22 +180,45 @@ brw_draw_range_elements(struct pipe_context *pipe, unsigned max_index, unsigned mode, unsigned start, unsigned count) { - enum pipe_error ret; + struct brw_context *brw = brw_context(pipe); + int ret; + uint32_t hw_prim; + + hw_prim = brw_set_prim(brw, mode); - if (!vbo_all_varyings_in_vbos(arrays)) { - if (!index_bounds_valid) - vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index); + if (BRW_DEBUG & DEBUG_PRIMS) + debug_printf("PRIM: %s %d %d\n", u_prim_name(mode), start, count); + + /* Potentially trigger upload of new index buffer. + * + * XXX: do we need to go through state validation to achieve this? + * Could just call upload code directly. + */ + if (brw->curr.index_buffer != index_buffer) { + pipe_buffer_reference( &brw->curr.index_buffer, index_buffer ); + brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER; + } + + /* XXX: do we really care? + */ + if (brw->curr.min_index != min_index || + brw->curr.max_index != max_index) + { + brw->curr.min_index = min_index; + brw->curr.max_index = max_index; + brw->state.dirty.mesa |= PIPE_NEW_INDEX_RANGE; } + /* Make a first attempt at drawing: */ - ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); /* Otherwise, flush and retry: */ if (ret != 0) { - brw_batchbuffer_flush(intel->batch); - ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index); + brw_batchbuffer_flush(brw->batch); + ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); assert(ret == 0); } @@ -242,28 +248,37 @@ brw_draw_arrays(struct pipe_context *pipe, unsigned mode, -void brw_draw_init( struct brw_context *brw ) +boolean brw_draw_init( struct brw_context *brw ) { /* Register our drawing function: */ brw->base.draw_arrays = brw_draw_arrays; brw->base.draw_elements = brw_draw_elements; brw->base.draw_range_elements = brw_draw_range_elements; -} -void brw_draw_destroy( struct brw_context *brw ) -{ - int i; + /* Create helpers for uploading data in user buffers: + */ + brw->vb.upload_vertex = u_upload_create( &brw->brw_screen->base, + 128 * 1024, + 64, + PIPE_BUFFER_USAGE_VERTEX ); + if (brw->vb.upload_vertex == NULL) + return FALSE; + + brw->vb.upload_index = u_upload_create( &brw->brw_screen->base, + 128 * 1024, + 64, + PIPE_BUFFER_USAGE_INDEX ); + if (brw->vb.upload_index == NULL) + return FALSE; - if (brw->vb.upload.bo != NULL) { - brw->sws->bo_unreference(brw->vb.upload.bo); - brw->vb.upload.bo = NULL; - } + return TRUE; +} - for (i = 0; i < VERT_ATTRIB_MAX; i++) { - brw->sws->bo_unreference(brw->vb.inputs[i].bo); - brw->vb.inputs[i].bo = NULL; - } +void brw_draw_cleanup( struct brw_context *brw ) +{ + u_upload_destroy( brw->vb.upload_vertex ); + u_upload_destroy( brw->vb.upload_index ); brw->sws->bo_unreference(brw->ib.bo); brw->ib.bo = NULL; diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h index 13f0443a81..8dc5dbce62 100644 --- a/src/gallium/drivers/i965/brw_draw.h +++ b/src/gallium/drivers/i965/brw_draw.h @@ -32,8 +32,7 @@ struct brw_context; - -void brw_draw_init( struct brw_context *brw ); +boolean brw_draw_init( struct brw_context *brw ); void brw_draw_cleanup( struct brw_context *brw ); diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index 7b0860d04c..040d8ca93a 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -26,21 +26,23 @@ **************************************************************************/ #include "pipe/p_context.h" +#include "pipe/p_error.h" #include "util/u_upload_mgr.h" +#include "util/u_math.h" #include "brw_draw.h" #include "brw_defines.h" #include "brw_context.h" #include "brw_state.h" -#include "brw_fallback.h" - +#include "brw_screen.h" #include "brw_batchbuffer.h" +#include "brw_debug.h" -unsigned brw_translate_surface_format( unsigned id ) +static unsigned brw_translate_surface_format( unsigned id ) { switch (id) { case PIPE_FORMAT_R64_FLOAT: @@ -186,70 +188,136 @@ static unsigned get_index_type(int type) } - -static boolean brw_prepare_vertices(struct brw_context *brw) +static int brw_prepare_vertices(struct brw_context *brw) { - GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; + unsigned int min_index = brw->curr.min_index; + unsigned int max_index = brw->curr.max_index; GLuint i; - const unsigned char *ptr = NULL; - GLuint interleave = 0; - unsigned int min_index = brw->vb.min_index; - unsigned int max_index = brw->vb.max_index; - - struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; - GLuint nr_uploads = 0; - - /* First build an array of pointers to ve's in vb.inputs_read - */ - if (0) - _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); - + int ret; + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index); - for (i = 0; i < brw->vb.num_vertex_buffer; i++) { - struct brw_vertex_buffer *vb = brw->vb.vertex_buffer[i]; - unsigned size = (vb->stride == 0 ? - vb->size : - vb->stride * (max_index + 1 - min_index)); - - if (brw_is_user_buffer(vb)) { - u_upload_buffer( brw->upload_vertex, - min_index * vb->stride, - size, - &offset, - &buffer ); + for (i = 0; i < brw->curr.num_vertex_buffers; i++) { + struct pipe_vertex_buffer *vb = &brw->curr.vertex_buffer[i]; + struct brw_winsys_buffer *bo; + struct pipe_buffer *upload_buf; + unsigned offset; + + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s vb[%d] user:%d offset:0x%x sz:0x%x stride:0x%x\n", + __FUNCTION__, i, + brw_buffer_is_user_buffer(vb->buffer), + vb->buffer_offset, + vb->buffer->size, + vb->stride); + + if (brw_buffer_is_user_buffer(vb->buffer)) { + + /* XXX: simplify this. Stop the state trackers from generating + * zero-stride buffers & have them use additional constants (or + * add support for >1 constant buffer) instead. + */ + unsigned size = (vb->stride == 0 ? + vb->buffer->size - vb->buffer_offset : + MAX2(vb->buffer->size - vb->buffer_offset, + vb->stride * (max_index + 1 - min_index))); + + ret = u_upload_buffer( brw->vb.upload_vertex, + vb->buffer_offset + min_index * vb->stride, + size, + vb->buffer, + &offset, + &upload_buf ); + if (ret) + return ret; + + bo = brw_buffer(upload_buf)->bo; + + assert(offset + size <= bo->size); } else { - offset = 0; - buffer = vb->buffer; + offset = vb->buffer_offset; + bo = brw_buffer(vb->buffer)->bo; } + + assert(offset < bo->size); /* Set up post-upload info about this vertex buffer: */ - input->offset = (unsigned long)offset; - input->stride = vb->stride; - input->count = count; - brw->sws->bo_unreference(input->bo); - input->bo = intel_bufferobj_buffer(intel, intel_buffer, - INTEL_READ); - brw->sws->bo_reference(input->bo); - - assert(input->offset < input->bo->size); - assert(input->offset + size <= input->bo->size); + brw->vb.vb[i].offset = offset; + brw->vb.vb[i].stride = vb->stride; + brw->vb.vb[i].vertex_count = (vb->stride == 0 ? + 1 : + (bo->size - offset) / vb->stride); + brw->sws->bo_unreference(brw->vb.vb[i].bo); + brw->vb.vb[i].bo = bo; + brw->sws->bo_reference(brw->vb.vb[i].bo); + + /* Don't need to retain this reference. We have a reference on + * the underlying winsys buffer: + */ + pipe_buffer_reference( &upload_buf, NULL ); } + brw->vb.nr_vb = i; brw_prepare_query_begin(brw); - for (i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; + for (i = 0; i < brw->vb.nr_vb; i++) { + brw_add_validated_bo(brw, brw->vb.vb[i].bo); + } + + return 0; +} + +static int brw_emit_vertex_buffers( struct brw_context *brw ) +{ + int i; + + /* If the VS doesn't read any inputs (calculating vertex position from + * a state variable for some reason, for example), just bail. + * + * The stale VB state stays in place, but they don't do anything unless + * a VE loads from them. + */ + if (brw->vb.nr_vb == 0) { + if (BRW_DEBUG & DEBUG_VERTS) + debug_printf("%s: no active vertex buffers\n", __FUNCTION__); - brw_add_validated_bo(brw, input->bo); + return 0; + } + + /* Emit VB state packets. + */ + BEGIN_BATCH(1 + brw->vb.nr_vb * 4, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_BUFFER << 16) | + ((1 + brw->vb.nr_vb * 4) - 2)); + + for (i = 0; i < brw->vb.nr_vb; i++) { + OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | + BRW_VB0_ACCESS_VERTEXDATA | + (brw->vb.vb[i].stride << BRW_VB0_PITCH_SHIFT)); + OUT_RELOC(brw->vb.vb[i].bo, + I915_GEM_DOMAIN_VERTEX, 0, + brw->vb.vb[i].offset); + if (BRW_IS_IGDNG(brw)) { + OUT_RELOC(brw->vb.vb[i].bo, + I915_GEM_DOMAIN_VERTEX, 0, + brw->vb.vb[i].bo->size - 1); + } else + OUT_BATCH(brw->vb.vb[i].stride ? brw->vb.vb[i].vertex_count : 0); + OUT_BATCH(0); /* Instance data step rate */ } + ADVANCE_BATCH(); + return 0; } -static void brw_emit_vertices(struct brw_context *brw) + + + +static int brw_emit_vertex_elements(struct brw_context *brw) { GLuint i; @@ -262,7 +330,7 @@ static void brw_emit_vertices(struct brw_context *brw) * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ - if (brw->vb.nr_enabled == 0) { + if (brw->vb.nr_ve == 0) { BEGIN_BATCH(3, IGNORE_CLIPRECTS); OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | @@ -274,59 +342,23 @@ static void brw_emit_vertices(struct brw_context *brw) (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); ADVANCE_BATCH(); - return; + return 0; } - /* Now emit VB and VEP state packets. + /* Now emit vertex element (VEP) state packets. * - * This still defines a hardware VB for each input, even if they - * are interleaved or from the same VBO. TBD if this makes a - * performance difference. */ - BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS); - OUT_BATCH((CMD_VERTEX_BUFFER << 16) | - ((1 + brw->vb.nr_enabled * 4) - 2)); - - for (i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - - OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) | - BRW_VB0_ACCESS_VERTEXDATA | - (input->stride << BRW_VB0_PITCH_SHIFT)); - OUT_RELOC(input->bo, - I915_GEM_DOMAIN_VERTEX, 0, - input->offset); - if (BRW_IS_IGDNG(brw)) { - if (input->stride) { - OUT_RELOC(input->bo, - I915_GEM_DOMAIN_VERTEX, 0, - input->offset + input->stride * input->count - 1); - } else { - assert(input->count == 1); - OUT_RELOC(input->bo, - I915_GEM_DOMAIN_VERTEX, 0, - input->offset + input->element_size - 1); - } - } else - OUT_BATCH(input->stride ? input->count : 0); - OUT_BATCH(0); /* Instance data step rate */ - } - ADVANCE_BATCH(); - - BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2)); - for (i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - uint32_t format = get_surface_type(input->glarray->Type, - input->glarray->Size, - input->glarray->Format, - input->glarray->Normalized); + BEGIN_BATCH(1 + brw->curr.num_vertex_elements * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_ve * 2) - 2)); + for (i = 0; i < brw->vb.nr_ve; i++) { + const struct pipe_vertex_element *input = &brw->curr.vertex_element[i]; + uint32_t format = brw_translate_surface_format( input->src_format ); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC; uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC; - switch (input->glarray->Size) { + switch (input->nr_components) { case 0: comp0 = BRW_VE1_COMPONENT_STORE_0; case 1: comp1 = BRW_VE1_COMPONENT_STORE_0; case 2: comp2 = BRW_VE1_COMPONENT_STORE_0; @@ -352,11 +384,29 @@ static void brw_emit_vertices(struct brw_context *brw) ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } ADVANCE_BATCH(); + return 0; +} + + +static int brw_emit_vertices( struct brw_context *brw ) +{ + int ret; + + ret = brw_emit_vertex_buffers( brw ); + if (ret) + return ret; + + ret = brw_emit_vertex_elements( brw ); + if (ret) + return ret; + + return 0; } + const struct brw_tracked_state brw_vertices = { .dirty = { - .mesa = 0, + .mesa = PIPE_NEW_INDEX_RANGE, .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES, .cache = 0, }, @@ -364,104 +414,106 @@ const struct brw_tracked_state brw_vertices = { .emit = brw_emit_vertices, }; -static void brw_prepare_indices(struct brw_context *brw) + +static int brw_prepare_indices(struct brw_context *brw) { - const struct _mesa_index_buffer *index_buffer = brw->ib.ib; - GLuint ib_size; + struct pipe_buffer *index_buffer = brw->curr.index_buffer; struct brw_winsys_buffer *bo = NULL; - struct gl_buffer_object *bufferobj; GLuint offset; - GLuint ib_type_size; + GLuint index_size; + GLuint ib_size; + int ret; if (index_buffer == NULL) - return; + return 0; - ib_type_size = get_size(index_buffer->type); - ib_size = ib_type_size * index_buffer->count; - bufferobj = index_buffer->obj;; + if (DEBUG & DEBUG_VERTS) + debug_printf("%s: index_size:%d index_buffer->size:%d\n", + __FUNCTION__, + brw->curr.index_size, + brw->curr.index_buffer->size); - /* Turn into a proper VBO: - */ - if (!_mesa_is_bufferobj(bufferobj)) { - brw->ib.start_vertex_offset = 0; + ib_size = index_buffer->size; + index_size = brw->curr.index_size; - /* Get new bufferobj, offset: - */ - get_space(brw, ib_size, &bo, &offset); - - /* Straight upload + /* Turn userbuffer into a proper hardware buffer? + */ + if (brw_buffer_is_user_buffer(index_buffer)) { + struct pipe_buffer *upload_buf; + + ret = u_upload_buffer( brw->vb.upload_index, + 0, + ib_size, + index_buffer, + &offset, + &upload_buf ); + if (ret) + return ret; + + bo = brw_buffer(upload_buf)->bo; + brw->sws->bo_reference(bo); + pipe_buffer_reference( &upload_buf, NULL ); + + /* XXX: annotate the userbuffer with the upload information so + * that successive calls don't get re-uploaded. */ - brw_bo_subdata(bo, offset, ib_size, index_buffer->ptr); - - } else { - offset = (GLuint) (unsigned long) index_buffer->ptr; - brw->ib.start_vertex_offset = 0; + } + else { + bo = brw_buffer(index_buffer)->bo; + brw->sws->bo_reference(bo); + + ib_size = bo->size; + offset = 0; + } - /* If the index buffer isn't aligned to its element size, we have to - * rebase it into a temporary. - */ - if ((get_size(index_buffer->type) - 1) & offset) { - GLubyte *map = ctx->Driver.MapBuffer(ctx, - GL_ELEMENT_ARRAY_BUFFER_ARB, - GL_DYNAMIC_DRAW_ARB, - bufferobj); - map += offset; - - get_space(brw, ib_size, &bo, &offset); - - dri_bo_subdata(bo, offset, ib_size, map); - - ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); - } else { - bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), - INTEL_READ); - brw->sws->bo_reference(bo); - - /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading - * the index buffer state when we're just moving the start index - * of our drawing. - */ - brw->ib.start_vertex_offset = offset / ib_type_size; - offset = 0; - ib_size = bo->size; - } + /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading the + * index buffer state when we're just moving the start index of our + * drawing. + * + * In gallium this will happen in the case where successive draw + * calls are made with (distinct?) userbuffers, but the upload_mgr + * places the data into a single winsys buffer. + * + * This statechange doesn't raise any state flags and is always + * just merged into the final draw packet: + */ + if (1) { + assert((offset & (index_size - 1)) == 0); + brw->ib.start_vertex_offset = offset / index_size; } + /* These statechanges trigger a new CMD_INDEX_BUFFER packet: + */ if (brw->ib.bo != bo || - brw->ib.offset != offset || brw->ib.size != ib_size) { - drm_intel_bo_unreference(brw->ib.bo); + brw->sws->bo_unreference(brw->ib.bo); brw->ib.bo = bo; - brw->ib.offset = offset; brw->ib.size = ib_size; - brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; - } else { - drm_intel_bo_unreference(bo); + } + else { + brw->sws->bo_unreference(bo); } brw_add_validated_bo(brw, brw->ib.bo); + return 0; } const struct brw_tracked_state brw_indices = { .dirty = { - .mesa = 0, - .brw = BRW_NEW_INDICES, + .mesa = PIPE_NEW_INDEX_BUFFER, + .brw = 0, .cache = 0, }, .prepare = brw_prepare_indices, }; -static void brw_emit_index_buffer(struct brw_context *brw) +static int brw_emit_index_buffer(struct brw_context *brw) { - const struct _mesa_index_buffer *index_buffer = brw->ib.ib; - - if (index_buffer == NULL) - return; - /* Emit the indexbuffer packet: */ + if (brw->ib.bo) { struct brw_indexbuffer ib; @@ -469,7 +521,7 @@ static void brw_emit_index_buffer(struct brw_context *brw) ib.header.bits.opcode = CMD_INDEX_BUFFER; ib.header.bits.length = sizeof(ib)/4 - 2; - ib.header.bits.index_format = get_index_type(index_buffer->type); + ib.header.bits.index_format = get_index_type(brw->ib.size); ib.header.bits.cut_index_enable = 0; BEGIN_BATCH(4, IGNORE_CLIPRECTS); @@ -483,6 +535,8 @@ static void brw_emit_index_buffer(struct brw_context *brw) OUT_BATCH( 0 ); ADVANCE_BATCH(); } + + return 0; } const struct brw_tracked_state brw_index_buffer = { diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c index 1df561386e..df49d4b72f 100644 --- a/src/gallium/drivers/i965/brw_eu.c +++ b/src/gallium/drivers/i965/brw_eu.c @@ -29,6 +29,7 @@ * Keith Whitwell */ +#include "util/u_memory.h" #include "brw_context.h" #include "brw_defines.h" @@ -237,7 +238,7 @@ brw_resolve_cals(struct brw_compile *c) struct brw_glsl_call *call, *next; for (call = c->first_call; call; call = next) { next = call->next; - _mesa_free(call); + FREE(call); } c->first_call = NULL; } @@ -247,7 +248,7 @@ brw_resolve_cals(struct brw_compile *c) struct brw_glsl_label *label, *next; for (label = c->first_label; label; label = next) { next = label->next; - _mesa_free(label); + FREE(label); } c->first_label = NULL; } diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c index ad7ec36e86..5989f5a04e 100644 --- a/src/gallium/drivers/i965/brw_eu_debug.c +++ b/src/gallium/drivers/i965/brw_eu_debug.c @@ -28,7 +28,8 @@ * Authors: * Keith Whitwell */ - + +#include "util/u_debug.h" #include "brw_eu.h" @@ -52,7 +53,7 @@ void brw_print_reg( struct brw_reg hwreg ) "f" }; - _mesa_printf("%s%s", + debug_printf("%s%s", hwreg.abs ? "abs/" : "", hwreg.negate ? "-" : ""); @@ -64,7 +65,7 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && hwreg.type == BRW_REGISTER_TYPE_F) { /* vector register */ - _mesa_printf("vec%d", hwreg.nr); + debug_printf("vec%d", hwreg.nr); } else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && hwreg.vstride == BRW_VERTICAL_STRIDE_0 && @@ -72,13 +73,13 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && hwreg.type == BRW_REGISTER_TYPE_F) { /* "scalar" register */ - _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); } else if (hwreg.file == BRW_IMMEDIATE_VALUE) { - _mesa_printf("imm %f", hwreg.dw1.f); + debug_printf("imm %f", hwreg.dw1.f); } else { - _mesa_printf("%s%d.%d<%d;%d,%d>:%s", + debug_printf("%s%d.%d<%d;%d,%d>:%s", file[hwreg.file], hwreg.nr, hwreg.subnr / type_sz(hwreg.type), diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index 0f2612c181..98fec85c1d 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -315,24 +315,20 @@ const struct brw_tracked_state brw_polygon_stipple = { static void upload_polygon_stipple_offset(struct brw_context *brw) { - __DRIdrawablePrivate *dPriv = brw->intel.driDrawable; struct brw_polygon_stipple_offset bpso; memset(&bpso, 0, sizeof(bpso)); bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; bpso.header.length = sizeof(bpso)/4-2; - /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0), - * we have to invert the Y axis in order to match the OpenGL - * pixel coordinate system, and our offset must be matched - * to the window position. If we're drawing to a FBO - * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate - * system works just fine, and there's no window system to - * worry about. + /* Never need to offset stipple coordinates. + * + * XXX: is it ever necessary to invert Y values? */ - if (brw->intel.ctx.DrawBuffer->Name == 0) { - bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31; - bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31; + if (0) { + int x = 0, y = 0, h = 0; + bpso.bits0.x_offset = (32 - (x & 31)) & 31; + bpso.bits0.y_offset = (32 - ((y + h) & 31)) & 31; } else { bpso.bits0.y_offset = 0; diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index e85a1a9c1b..65e7151517 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -53,6 +53,9 @@ static void brw_note_fence( struct brw_context *brw, GLuint fence ) static GLuint brw_flush_cmd( void ) { struct brw_mi_flush flush; + + return ; + flush.opcode = CMD_MI_FLUSH; flush.pad = 0; flush.flags = BRW_FLUSH_STATE_CACHE; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index fbb772d18c..8b61da763c 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -33,6 +33,25 @@ #include "brw_util.h" #include "brw_wm.h" + +/** + * Determine if the given fragment program uses GLSL features such + * as flow conditionals, loops, subroutines. + * Some GLSL shaders may use these features, others might not. + */ +GLboolean brw_wm_is_glsl(const struct brw_fragment_shader *fp) +{ + return (fp->info.insn_count[TGSI_OPCODE_ARL] > 0 || + fp->info.insn_count[TGSI_OPCODE_IF] > 0 || + fp->info.insn_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */ + fp->info.insn_count[TGSI_OPCODE_CAL] > 0 || + fp->info.insn_count[TGSI_OPCODE_BRK] > 0 || /* redundant - BGNLOOP */ + fp->info.insn_count[TGSI_OPCODE_RET] > 0 || /* redundant - CAL */ + fp->info.insn_count[TGSI_OPCODE_BGNLOOP] > 0); +} + + + static void brwBindProgram( struct brw_context *brw, GLenum target, struct gl_program *prog ) diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index b0928adbe4..d1d0d7cd43 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -1,26 +1,11 @@ -static void brw_merge_inputs( struct brw_context *brw, - const struct gl_client_array *arrays[]) -{ - struct brw_vertex_info old = brw->vb.info; - GLuint i; - - for (i = 0; i < VERT_ATTRIB_MAX; i++) - brw->sws->bo_unreference(brw->vb.inputs[i].bo); - memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs)); - memset(&brw->vb.info, 0, sizeof(brw->vb.info)); +void +brw_pipe_vertex_cleanup( struct brw_context *brw ) +{ for (i = 0; i < VERT_ATTRIB_MAX; i++) { - brw->vb.inputs[i].glarray = arrays[i]; - brw->vb.inputs[i].attrib = (gl_vert_attrib) i; - - if (arrays[i]->StrideB != 0) - brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) << - ((i%16) * 2); + brw->sws->bo_unreference(brw->vb.inputs[i].bo); + brw->vb.inputs[i].bo = NULL; } - - /* Raise statechanges if input sizes have changed. */ - if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0) - brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS; } diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index 79d595d0ad..b0be0e1f8a 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -56,6 +56,14 @@ struct brw_transfer unsigned offset; }; +struct brw_buffer +{ + struct pipe_buffer base; + struct brw_winsys_buffer *bo; + void *ptr; + boolean is_user_buffer; +}; + /* * Cast wrappers @@ -72,5 +80,19 @@ brw_transfer(struct pipe_transfer *transfer) return (struct brw_transfer *)transfer; } +static INLINE struct brw_buffer * +brw_buffer(struct pipe_buffer *buffer) +{ + return (struct brw_buffer *)buffer; +} + + +/* Pipe buffer helpers + */ +static INLINE boolean +brw_buffer_is_user_buffer( const struct pipe_buffer *buf ) +{ + return ((const struct brw_buffer *)buf)->is_user_buffer; +} #endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index 53e8f09e37..e2db2e76e6 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -134,7 +134,7 @@ static void upload_sf_prog(struct brw_context *brw) key.attrs = brw->vs.prog_data->outputs_written; /* BRW_NEW_REDUCED_PRIMITIVE */ - switch (brw->intel.reduced_primitive) { + switch (brw->reduced_primitive) { case GL_TRIANGLES: /* NOTE: We just use the edgeflag attribute as an indicator that * unfilled triangles are active. We don't actually do the diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index 0e406f12e1..648a16a038 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -40,19 +40,12 @@ static void upload_sf_vp(struct brw_context *brw) const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; struct brw_sf_viewport sfv; GLfloat y_scale, y_bias; - const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0); const GLfloat *v = ctx->Viewport._WindowMap.m; memset(&sfv, 0, sizeof(sfv)); - if (render_to_fbo) { - y_scale = 1.0; - y_bias = 0; - } - else { - y_scale = -1.0; - y_bias = ctx->DrawBuffer->Height; - } + y_scale = 1.0; + y_bias = 0; /* _NEW_VIEWPORT */ @@ -73,20 +66,11 @@ static void upload_sf_vp(struct brw_context *brw) * Note that the hardware's coordinates are inclusive, while Mesa's min is * inclusive but max is exclusive. */ - if (render_to_fbo) { - /* texmemory: Y=0=bottom */ - sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; - sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; - sfv.scissor.ymin = ctx->DrawBuffer->_Ymin; - sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1; - } - else { - /* memory: Y=0=top */ - sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; - sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; - sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax; - sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1; - } + /* Y=0=bottom */ + sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; + sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; + sfv.scissor.ymin = ctx->DrawBuffer->_Ymin; + sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1; brw->sws->bo_unreference(brw->sf.vp_bo); brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); @@ -151,7 +135,7 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) /* _NEW_LIGHT */ key->provoking_vertex = ctx->Light.ProvokingVertex; - key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + key->render_to_fbo = 1; } static struct brw_winsys_buffer * @@ -211,11 +195,6 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, else sf.sf5.front_winding = BRW_FRONTWINDING_CW; - /* The viewport is inverted for rendering to a FBO, and that inverts - * polygon front/back orientation. - */ - sf.sf5.front_winding ^= key->render_to_fbo; - switch (key->cull_face) { case GL_FRONT: sf.sf6.cull_mode = BRW_CULLMODE_FRONT; @@ -245,7 +224,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.sf6.line_width = 0; /* _NEW_BUFFERS */ - key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; + key->render_to_fbo = 1; if (!key->render_to_fbo) { /* Rendering to an OpenGL window */ sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index 02657eaba7..9bf34c3fe4 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -109,8 +109,8 @@ struct brw_surface_key { /*********************************************************************** * brw_state.c */ -void brw_validate_state(struct brw_context *brw); -void brw_upload_state(struct brw_context *brw); +int brw_validate_state(struct brw_context *brw); +int brw_upload_state(struct brw_context *brw); void brw_init_state(struct brw_context *brw); void brw_destroy_state(struct brw_context *brw); @@ -157,7 +157,7 @@ void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer /*********************************************************************** * brw_state_batch.c */ -#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) +#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS) #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) GLboolean brw_cached_batch_struct( struct brw_context *brw, diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c index b285837070..324fce5163 100644 --- a/src/gallium/drivers/i965/brw_state_batch.c +++ b/src/gallium/drivers/i965/brw_state_batch.c @@ -47,7 +47,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, struct header *newheader = (struct header *)data; if (brw->emit_state_always) { - brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } @@ -74,7 +74,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, emit: memcpy(item->header, newheader, sz); - brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS); + brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c index 83f138f67a..d2df8af9f4 100644 --- a/src/gallium/drivers/i965/brw_swtnl.c +++ b/src/gallium/drivers/i965/brw_swtnl.c @@ -12,13 +12,13 @@ static GLboolean check_fallbacks( struct brw_context *brw, * use fallbacks. If we're forcing fallbacks, always * use fallfacks. */ - if (brw->intel.conformance_mode == 0) + if (brw->flags.no_swtnl) return GL_FALSE; - if (brw->intel.conformance_mode == 2) + if (brw->flags.force_swtnl) return GL_TRUE; - if (ctx->Polygon.SmoothFlag) { + if (brw->curr.rast->tmpl.smooth_polys) { for (i = 0; i < nr_prims; i++) if (reduced_prim[prim[i].mode] == GL_TRIANGLES) return GL_TRUE; diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index 82cd8007ac..51e23b9640 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -161,6 +161,13 @@ struct brw_winsys_screen { size_t size, const void *data); + /* XXX: couldn't this be handled by returning true/false on + * bo_emit_reloc? + */ + boolean (*check_aperture_space)( struct brw_winsys_screen *iws, + struct brw_winsys_buffer **buffers, + unsigned count ); + /** * Map a buffer. */ diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 284cf42f8b..4948ea0dff 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -158,7 +158,7 @@ static void do_wm_prog( struct brw_context *brw, memcpy(&c->key, key, sizeof(*key)); c->fp = fp; - c->env_param = brw->intel.ctx.FragmentProgram.Parameters; + c->env_param = NULL; /*brw->intel.ctx.FragmentProgram.Parameters;*/ brw_init_compile(brw, &c->func); diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 18775830f9..e06de95a8a 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -131,9 +131,9 @@ struct brw_wm_ref { GLuint insn:24; }; -struct brw_wm_constref { +struct brw_wm_imm_ref { const struct brw_wm_ref *ref; - GLfloat constval; + GLfloat imm1f; }; @@ -232,8 +232,8 @@ struct brw_wm_compile { struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; GLuint nr_insns; - struct brw_wm_constref constref[BRW_WM_MAX_CONST]; - GLuint nr_constrefs; + struct brw_wm_imm_ref imm_ref[BRW_WM_MAX_CONST]; + GLuint nr_imm_refs; struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2]; diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index c4f0711793..a8de5fdd0b 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -7,34 +7,6 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, const struct prog_instruction *inst, GLuint component); -/** - * Determine if the given fragment program uses GLSL features such - * as flow conditionals, loops, subroutines. - * Some GLSL shaders may use these features, others might not. - */ -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp) -{ - int i; - - for (i = 0; i < fp->Base.NumInstructions; i++) { - const struct prog_instruction *inst = &fp->Base.Instructions[i]; - switch (inst->Opcode) { - case OPCODE_ARL: - case OPCODE_IF: - case OPCODE_ENDIF: - case OPCODE_CAL: - case OPCODE_BRK: - case OPCODE_RET: - case OPCODE_BGNLOOP: - return GL_TRUE; - default: - break; - } - } - return GL_FALSE; -} - - static void reclaim_temps(struct brw_wm_compile *c); diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index de5f5fe821..31b0270e84 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -124,33 +124,33 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, } -/** Return a ref to a constant/literal value */ -static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c, - const GLfloat *constval ) +/** Return a ref to an immediate value */ +static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c, + const GLfloat *imm1f ) { GLuint i; /* Search for an existing const value matching the request: */ - for (i = 0; i < c->nr_constrefs; i++) { - if (c->constref[i].constval == *constval) - return c->constref[i].ref; + for (i = 0; i < c->nr_imm_refs; i++) { + if (c->imm_ref[i].imm_val == *imm1f) + return c->imm_ref[i].ref; } /* Else try to add a new one: */ - if (c->nr_constrefs < BRW_WM_MAX_CONST) { - GLuint i = c->nr_constrefs++; + if (c->nr_imm_refs < BRW_WM_MAX_IMM) { + GLuint i = c->nr_imm_refs++; - /* A constant is a special type of parameter: + /* An immediate is a special type of parameter: */ - c->constref[i].constval = *constval; - c->constref[i].ref = get_param_ref(c, constval); + c->imm_ref[i].imm_val = *imm_val; + c->imm_ref[i].ref = get_param_ref(c, imm_val); - return c->constref[i].ref; + return c->imm_ref[i].ref; } else { - _mesa_printf("%s: out of constrefs\n", __FUNCTION__); + _mesa_printf("%s: out of imm_refs\n", __FUNCTION__); c->prog_data.error = 1; return NULL; } @@ -200,7 +200,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, case PROGRAM_CONSTANT: /* These are invarient: */ - ref = get_const_ref(c, &plist->ParameterValues[idx][component]); + ref = get_imm_ref(c, &plist->ParameterValues[idx][component]); break; case PROGRAM_STATE_VAR: @@ -266,9 +266,9 @@ static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, static const GLfloat const_one = 1.0; if (component == SWIZZLE_ZERO) - src_ref = get_const_ref(c, &const_zero); + src_ref = get_imm_ref(c, &const_zero); else if (component == SWIZZLE_ONE) - src_ref = get_const_ref(c, &const_one); + src_ref = get_imm_ref(c, &const_one); else src_ref = pass0_get_reg(c, src.File, src.Index, component); diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index c76bff9181..ec9c859fcb 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -554,7 +554,8 @@ st_draw_vbo(GLcontext *ctx, /* Gallium probably doesn't want this in some cases. */ if (!index_bounds_valid) - vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index); + if (!vbo_all_varyings_in_vbos(arrays)) + vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index); /* sanity check for pointer arithmetic below */ assert(sizeof(arrays[0]->Ptr[0]) == 1); -- cgit v1.2.3 From 562ca4eae257dd3b268e7f13487c8cd91f618eae Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 25 Oct 2009 01:20:56 +0100 Subject: i965g: more compiling wip --- src/gallium/drivers/i965/brw_context.h | 15 +- src/gallium/drivers/i965/brw_curbe.c | 3 +- src/gallium/drivers/i965/brw_gs.c | 48 +++---- src/gallium/drivers/i965/brw_gs.h | 4 +- src/gallium/drivers/i965/brw_gs_state.c | 21 +-- src/gallium/drivers/i965/brw_misc_state.c | 222 ++++++++++++++---------------- src/gallium/drivers/i965/brw_pipe_blend.c | 19 +++ src/gallium/drivers/i965/brw_pipe_rast.c | 20 +++ src/gallium/drivers/i965/brw_screen.h | 7 + src/gallium/drivers/i965/brw_sf.c | 2 +- src/gallium/drivers/i965/brw_state.h | 4 +- src/gallium/drivers/i965/brw_urb.c | 3 +- src/gallium/drivers/i965/brw_vs.c | 4 +- src/gallium/drivers/i965/brw_vs_emit.c | 67 +++++---- src/gallium/drivers/i965/brw_wm.c | 2 +- src/gallium/drivers/i965/brw_wm.h | 2 +- 16 files changed, 243 insertions(+), 200 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 7ead641811..2e17e150bb 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -184,6 +184,8 @@ struct brw_fragment_shader { #define PIPE_NEW_CLIP 0x2 #define PIPE_NEW_INDEX_BUFFER 0x2 #define PIPE_NEW_INDEX_RANGE 0x2 +#define PIPE_NEW_BLEND_COLOR 0x2 +#define PIPE_NEW_POLYGON_STIPPLE 0x2 #define BRW_NEW_URB_FENCE 0x1 @@ -202,7 +204,9 @@ struct brw_fragment_shader { #define BRW_NEW_VERTICES 0x8000 /** * Used for any batch entry with a relocated pointer that will be used - * by any 3D rendering. + * by any 3D rendering. Need to re-emit these fresh in each + * batchbuffer as the referenced buffers may be relocated in the + * meantime. */ #define BRW_NEW_BATCH 0x10000 /** brw->depth_region updated */ @@ -271,7 +275,7 @@ struct brw_vs_prog_data { GLuint curb_read_length; GLuint urb_read_length; GLuint total_grf; - GLuint outputs_written; + GLuint nr_outputs_written; GLuint nr_params; /**< number of float params/constants */ GLuint inputs_read; @@ -487,6 +491,9 @@ struct brw_context struct pipe_buffer *vertex_constants; struct pipe_buffer *fragment_constants; + struct brw_blend_constant_color bcc; + struct brw_polygon_stipple bps; + /** * Index buffer for this draw_prims call. * @@ -726,11 +733,11 @@ void brw_init_shader_funcs( struct brw_context *brw ); /* brw_urb.c */ -void brw_upload_urb_fence(struct brw_context *brw); +int brw_upload_urb_fence(struct brw_context *brw); /* brw_curbe.c */ -void brw_upload_cs_urb_state(struct brw_context *brw); +int brw_upload_cs_urb_state(struct brw_context *brw); /* brw_disasm.c */ int brw_disasm (FILE *file, struct brw_instruction *inst); diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 278ffa4ca2..3dd08f6eeb 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -126,7 +126,7 @@ const struct brw_tracked_state brw_curbe_offsets = { * fixed-function hardware in a double-buffering scheme to avoid a * pipeline stall each time the contents of the curbe is changed. */ -void brw_upload_cs_urb_state(struct brw_context *brw) +int brw_upload_cs_urb_state(struct brw_context *brw) { struct brw_cs_urb_state cs_urb; memset(&cs_urb, 0, sizeof(cs_urb)); @@ -144,6 +144,7 @@ void brw_upload_cs_urb_state(struct brw_context *brw) assert(brw->urb.nr_cs_entries); BRW_CACHED_BATCH_STRUCT(brw, &cs_urb); + return 0; } static GLfloat fixed_plane[6][4] = { diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 692ce46679..3ecaa74e4f 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -54,7 +54,7 @@ static void compile_gs_prog( struct brw_context *brw, /* Need to locate the two positions present in vertex + header. * These are currently hardcoded: */ - c.nr_attrs = util_count_bits(c.key.attrs); + c.nr_attrs = c.key.nr_attrs; if (BRW_IS_IGDNG(brw)) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ @@ -80,30 +80,30 @@ static void compile_gs_prog( struct brw_context *brw, * already been weeded out by this stage: */ switch (key->primitive) { - case GL_QUADS: + case PIPE_PRIM_QUADS: brw_gs_quads( &c ); break; - case GL_QUAD_STRIP: + case PIPE_PRIM_QUAD_STRIP: brw_gs_quad_strip( &c ); break; - case GL_LINE_LOOP: + case PIPE_PRIM_LINE_LOOP: brw_gs_lines( &c ); break; - case GL_LINES: + case PIPE_PRIM_LINES: if (key->hint_gs_always) brw_gs_lines( &c ); else { return; } break; - case GL_TRIANGLES: + case PIPE_PRIM_TRIANGLES: if (key->hint_gs_always) brw_gs_tris( &c ); else { return; } break; - case GL_POINTS: + case PIPE_PRIM_POINTS: if (key->hint_gs_always) brw_gs_points( &c ); else { @@ -129,17 +129,17 @@ static void compile_gs_prog( struct brw_context *brw, &brw->gs.prog_data ); } -static const GLenum gs_prim[GL_POLYGON+1] = { - GL_POINTS, - GL_LINES, - GL_LINE_LOOP, - GL_LINES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_TRIANGLES, - GL_QUADS, - GL_QUAD_STRIP, - GL_TRIANGLES +static const unsigned gs_prim[PIPE_PRIM_MAX] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINE_LOOP, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_QUADS, + PIPE_PRIM_QUAD_STRIP, + PIPE_PRIM_TRIANGLES }; static void populate_key( struct brw_context *brw, @@ -148,7 +148,7 @@ static void populate_key( struct brw_context *brw, memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG */ - key->attrs = brw->vs.prog_data->outputs_written; + key->nr_attrs = brw->vs.prog_data->nr_outputs_written; /* BRW_NEW_PRIMITIVE */ key->primitive = gs_prim[brw->primitive]; @@ -156,14 +156,14 @@ static void populate_key( struct brw_context *brw, key->hint_gs_always = 0; /* debug code? */ key->need_gs_prog = (key->hint_gs_always || - brw->primitive == GL_QUADS || - brw->primitive == GL_QUAD_STRIP || - brw->primitive == GL_LINE_LOOP); + brw->primitive == PIPE_PRIM_QUADS || + brw->primitive == PIPE_PRIM_QUAD_STRIP || + brw->primitive == PIPE_PRIM_LINE_LOOP); } /* Calculate interpolants for triangle and line rasterization. */ -static void prepare_gs_prog(struct brw_context *brw) +static int prepare_gs_prog(struct brw_context *brw) { struct brw_gs_prog_key key; /* Populate the key: @@ -184,6 +184,8 @@ static void prepare_gs_prog(struct brw_context *brw) if (brw->gs.prog_bo == NULL) compile_gs_prog( brw, &key ); } + + return 0; } diff --git a/src/gallium/drivers/i965/brw_gs.h b/src/gallium/drivers/i965/brw_gs.h index bbb991ea2e..6e616dcb87 100644 --- a/src/gallium/drivers/i965/brw_gs.h +++ b/src/gallium/drivers/i965/brw_gs.h @@ -40,11 +40,11 @@ #define MAX_GS_VERTS (4) struct brw_gs_prog_key { - GLuint attrs:32; + GLuint nr_attrs:8; GLuint primitive:4; GLuint hint_gs_always:1; GLuint need_gs_prog:1; - GLuint pad:26; + GLuint pad:18; }; struct brw_gs_compile { diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c index 6d03d72d96..15a66c9741 100644 --- a/src/gallium/drivers/i965/brw_gs_state.c +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -29,11 +29,12 @@ * Keith Whitwell */ - +#include "util/u_math.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_debug.h" struct brw_gs_unit_key { unsigned int total_grf; @@ -76,7 +77,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) memset(&gs, 0, sizeof(gs)); - gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; if (key->prog_active) /* reloc */ gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; @@ -100,7 +101,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) if (BRW_IS_IGDNG(brw)) gs.thread4.rendering_enable = 1; - if (INTEL_DEBUG & DEBUG_STATS) + if (BRW_DEBUG & DEBUG_STATS) gs.thread4.stats_enable = 1; bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, @@ -111,17 +112,17 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) if (key->prog_active) { /* Emit GS program relocation */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - gs.thread0.grf_reg_count << 1, - offsetof(struct brw_gs_unit_state, thread0), - brw->gs.prog_bo); + brw->sws->bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + gs.thread0.grf_reg_count << 1, + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_bo); } return bo; } -static void prepare_gs_unit(struct brw_context *brw) +static int prepare_gs_unit(struct brw_context *brw) { struct brw_gs_unit_key key; @@ -135,6 +136,8 @@ static void prepare_gs_unit(struct brw_context *brw) if (brw->gs.state_bo == NULL) { brw->gs.state_bo = gs_unit_create_from_key(brw, &key); } + + return 0; } const struct brw_tracked_state brw_gs_unit = { diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index 98fec85c1d..ccebe08b4f 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -31,10 +31,12 @@ +#include "brw_debug.h" #include "brw_batchbuffer.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_screen.h" @@ -44,25 +46,16 @@ * Blend color */ -static void upload_blend_constant_color(struct brw_context *brw) +static int upload_blend_constant_color(struct brw_context *brw) { - struct brw_blend_constant_color bcc; - - memset(&bcc, 0, sizeof(bcc)); - bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; - bcc.header.length = sizeof(bcc)/4-2; - bcc.blend_constant_color[0] = ctx->Color.BlendColor[0]; - bcc.blend_constant_color[1] = ctx->Color.BlendColor[1]; - bcc.blend_constant_color[2] = ctx->Color.BlendColor[2]; - bcc.blend_constant_color[3] = ctx->Color.BlendColor[3]; - - BRW_CACHED_BATCH_STRUCT(brw, &bcc); + BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bcc); + return 0; } const struct brw_tracked_state brw_blend_constant_color = { .dirty = { - .mesa = _NEW_COLOR, + .mesa = PIPE_NEW_BLEND_COLOR, .brw = 0, .cache = 0 }, @@ -70,30 +63,32 @@ const struct brw_tracked_state brw_blend_constant_color = { }; /* Constant single cliprect for framebuffer object or DRI2 drawing */ -static void upload_drawing_rect(struct brw_context *brw) +static int upload_drawing_rect(struct brw_context *brw) { BEGIN_BATCH(4, NO_LOOP_CLIPRECTS); OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965); OUT_BATCH(0); - OUT_BATCH(((brw->fb.width - 1) & 0xffff) | - ((brw->fb.height - 1) << 16)); + OUT_BATCH(((brw->curr.fb.width - 1) & 0xffff) | + ((brw->curr.fb.height - 1) << 16)); OUT_BATCH(0); ADVANCE_BATCH(); + return 0; } const struct brw_tracked_state brw_drawing_rect = { .dirty = { - .mesa = _NEW_BUFFERS, + .mesa = PIPE_NEW_FRAMEBUFFER, .brw = 0, .cache = 0 }, .emit = upload_drawing_rect }; -static void prepare_binding_table_pointers(struct brw_context *brw) +static int prepare_binding_table_pointers(struct brw_context *brw) { brw_add_validated_bo(brw, brw->vs.bind_bo); brw_add_validated_bo(brw, brw->wm.bind_bo); + return 0; } /** @@ -103,7 +98,7 @@ static void prepare_binding_table_pointers(struct brw_context *brw) * The binding table pointers are relative to the surface state base address, * which is 0. */ -static void upload_binding_table_pointers(struct brw_context *brw) +static int upload_binding_table_pointers(struct brw_context *brw) { BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); @@ -116,6 +111,7 @@ static void upload_binding_table_pointers(struct brw_context *brw) OUT_BATCH(0); /* sf */ OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ ADVANCE_BATCH(); + return 0; } const struct brw_tracked_state brw_binding_table_pointers = { @@ -135,7 +131,7 @@ const struct brw_tracked_state brw_binding_table_pointers = { * The state pointers in this packet are all relative to the general state * base address set by CMD_STATE_BASE_ADDRESS, which is 0. */ -static void upload_pipelined_state_pointers(struct brw_context *brw ) +static int upload_pipelined_state_pointers(struct brw_context *brw ) { BEGIN_BATCH(7, IGNORE_CLIPRECTS); OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); @@ -151,10 +147,11 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) ADVANCE_BATCH(); brw->state.dirty.brw |= BRW_NEW_PSP; + return 0; } -static void prepare_psp_urb_cbs(struct brw_context *brw) +static int prepare_psp_urb_cbs(struct brw_context *brw) { brw_add_validated_bo(brw, brw->vs.state_bo); brw_add_validated_bo(brw, brw->gs.state_bo); @@ -162,13 +159,26 @@ static void prepare_psp_urb_cbs(struct brw_context *brw) brw_add_validated_bo(brw, brw->sf.state_bo); brw_add_validated_bo(brw, brw->wm.state_bo); brw_add_validated_bo(brw, brw->cc.state_bo); + return 0; } -static void upload_psp_urb_cbs(struct brw_context *brw ) +static int upload_psp_urb_cbs(struct brw_context *brw ) { - upload_pipelined_state_pointers(brw); - brw_upload_urb_fence(brw); - brw_upload_cs_urb_state(brw); + int ret; + + ret = upload_pipelined_state_pointers(brw); + if (ret) + return ret; + + ret = brw_upload_urb_fence(brw); + if (ret) + return ret; + + ret = brw_upload_cs_urb_state(brw); + if (ret) + return ret; + + return 0; } const struct brw_tracked_state brw_psp_urb_cbs = { @@ -187,20 +197,22 @@ const struct brw_tracked_state brw_psp_urb_cbs = { .emit = upload_psp_urb_cbs, }; -static void prepare_depthbuffer(struct brw_context *brw) +static int prepare_depthbuffer(struct brw_context *brw) { - struct intel_region *region = brw->state.depth_region; + struct pipe_surface *zsbuf = brw->curr.fb.zsbuf; - if (region != NULL) - brw_add_validated_bo(brw, region->buffer); + if (zsbuf) + brw_add_validated_bo(brw, brw_surface_bo(zsbuf)); + + return 0; } -static void emit_depthbuffer(struct brw_context *brw) +static int emit_depthbuffer(struct brw_context *brw) { - struct intel_region *region = brw->state.depth_region; + struct pipe_surface *surface = brw->curr.fb.zsbuf; unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5; - if (region == NULL) { + if (surface == NULL) { BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | @@ -214,38 +226,45 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } else { + struct brw_winsys_buffer *bo; unsigned int format; + unsigned int pitch; + unsigned int cpp; - switch (region->cpp) { - case 2: + switch (surface->format) { + case PIPE_FORMAT_Z16_UNORM: format = BRW_DEPTHFORMAT_D16_UNORM; + cpp = 2; + break; + case PIPE_FORMAT_Z24S8_UNORM: + format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + cpp = 4; break; - case 4: - if (intel->depth_buffer_is_float) - format = BRW_DEPTHFORMAT_D32_FLOAT; - else - format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + case PIPE_FORMAT_Z32_FLOAT: + format = BRW_DEPTHFORMAT_D32_FLOAT; + cpp = 4; break; default: assert(0); - return; + return PIPE_ERROR_BAD_INPUT; } - assert(region->tiling != I915_TILING_X); + bo = brw_surface_bo(surface); + pitch = brw_surface_pitch(surface); BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); - OUT_BATCH(((region->pitch * region->cpp) - 1) | + OUT_BATCH(((pitch * cpp) - 1) | (format << 18) | (BRW_TILEWALK_YMAJOR << 26) | - ((region->tiling != I915_TILING_NONE) << 27) | + ((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) | (BRW_SURFACE_2D << 29)); - OUT_RELOC(region->buffer, + OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); + surface->offset); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | - ((region->pitch - 1) << 6) | - ((region->height - 1) << 19)); + ((pitch - 1) << 6) | + ((surface->height - 1) << 19)); OUT_BATCH(0); if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) @@ -253,6 +272,8 @@ static void emit_depthbuffer(struct brw_context *brw) ADVANCE_BATCH(); } + + return 0; } const struct brw_tracked_state brw_depthbuffer = { @@ -271,37 +292,15 @@ const struct brw_tracked_state brw_depthbuffer = { * Polygon stipple packet */ -static void upload_polygon_stipple(struct brw_context *brw) +static int upload_polygon_stipple(struct brw_context *brw) { - struct brw_polygon_stipple bps; - GLuint i; - - memset(&bps, 0, sizeof(bps)); - bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; - bps.header.length = sizeof(bps)/4-2; - - /* Polygon stipple is provided in OpenGL order, i.e. bottom - * row first. If we're rendering to a window (i.e. the - * default frame buffer object, 0), then we need to invert - * it to match our pixel layout. But if we're rendering - * to a FBO (i.e. any named frame buffer object), we *don't* - * need to invert - we already match the layout. - */ - if (ctx->DrawBuffer->Name == 0) { - for (i = 0; i < 32; i++) - bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */ - } - else { - for (i = 0; i < 32; i++) - bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */ - } - - BRW_CACHED_BATCH_STRUCT(brw, &bps); + BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bps); + return 0; } const struct brw_tracked_state brw_polygon_stipple = { .dirty = { - .mesa = _NEW_POLYGONSTIPPLE, + .mesa = PIPE_NEW_POLYGON_STIPPLE, .brw = 0, .cache = 0 }, @@ -313,37 +312,26 @@ const struct brw_tracked_state brw_polygon_stipple = { * Polygon stipple offset packet */ -static void upload_polygon_stipple_offset(struct brw_context *brw) +static int upload_polygon_stipple_offset(struct brw_context *brw) { struct brw_polygon_stipple_offset bpso; + /* This is invarient state in gallium: + */ memset(&bpso, 0, sizeof(bpso)); bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; bpso.header.length = sizeof(bpso)/4-2; - - /* Never need to offset stipple coordinates. - * - * XXX: is it ever necessary to invert Y values? - */ - if (0) { - int x = 0, y = 0, h = 0; - bpso.bits0.x_offset = (32 - (x & 31)) & 31; - bpso.bits0.y_offset = (32 - ((y + h) & 31)) & 31; - } - else { - bpso.bits0.y_offset = 0; - bpso.bits0.x_offset = 0; - } + bpso.bits0.y_offset = 0; + bpso.bits0.x_offset = 0; BRW_CACHED_BATCH_STRUCT(brw, &bpso); + return 0; } -#define _NEW_WINDOW_POS 0x40000000 - const struct brw_tracked_state brw_polygon_stipple_offset = { .dirty = { - .mesa = _NEW_WINDOW_POS, - .brw = 0, + .mesa = 0, + .brw = BRW_NEW_CONTEXT, .cache = 0 }, .emit = upload_polygon_stipple_offset @@ -352,12 +340,12 @@ const struct brw_tracked_state brw_polygon_stipple_offset = { /********************************************************************** * AA Line parameters */ -static void upload_aa_line_parameters(struct brw_context *brw) +static int upload_aa_line_parameters(struct brw_context *brw) { struct brw_aa_line_parameters balp; if (BRW_IS_965(brw)) - return; + return 0; /* use legacy aa line coverage computation */ memset(&balp, 0, sizeof(balp)); @@ -365,6 +353,7 @@ static void upload_aa_line_parameters(struct brw_context *brw) balp.header.length = sizeof(balp) / 4 - 2; BRW_CACHED_BATCH_STRUCT(brw, &balp); + return 0; } const struct brw_tracked_state brw_aa_line_parameters = { @@ -380,31 +369,16 @@ const struct brw_tracked_state brw_aa_line_parameters = { * Line stipple packet */ -static void upload_line_stipple(struct brw_context *brw) +static int upload_line_stipple(struct brw_context *brw) { - struct brw_line_stipple bls; - GLfloat tmp; - GLint tmpi; - - memset(&bls, 0, sizeof(bls)); - bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; - bls.header.length = sizeof(bls)/4 - 2; - - bls.bits0.pattern = ctx->Line.StipplePattern; - bls.bits1.repeat_count = ctx->Line.StippleFactor; - - tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; - tmpi = tmp * (1<<13); - - - bls.bits1.inverse_repeat_count = tmpi; - - BRW_CACHED_BATCH_STRUCT(brw, &bls); + struct brw_line_stipple *bls = NULL; //brw->curr.rast->bls; + BRW_CACHED_BATCH_STRUCT(brw, bls); + return 0; } const struct brw_tracked_state brw_line_stipple = { .dirty = { - .mesa = _NEW_LINE, + .mesa = PIPE_NEW_RAST, .brw = 0, .cache = 0 }, @@ -416,7 +390,7 @@ const struct brw_tracked_state brw_line_stipple = { * Misc invarient state packets */ -static void upload_invarient_state( struct brw_context *brw ) +static int upload_invarient_state( struct brw_context *brw ) { { /* 0x61040000 Pipeline Select */ @@ -424,7 +398,10 @@ static void upload_invarient_state( struct brw_context *brw ) struct brw_pipeline_select ps; memset(&ps, 0, sizeof(ps)); - ps.header.opcode = CMD_PIPELINE_SELECT(brw); + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + ps.header.opcode = CMD_PIPELINE_SELECT_GM45; + else + ps.header.opcode = CMD_PIPELINE_SELECT_965; ps.header.pipeline_select = 0; BRW_BATCH_STRUCT(brw, &ps); } @@ -460,12 +437,18 @@ static void upload_invarient_state( struct brw_context *brw ) struct brw_vf_statistics vfs; memset(&vfs, 0, sizeof(vfs)); - vfs.opcode = CMD_VF_STATISTICS(brw); - if (INTEL_DEBUG & DEBUG_STATS) + if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) + vfs.opcode = CMD_VF_STATISTICS_GM45; + else + vfs.opcode = CMD_VF_STATISTICS_965; + + if (BRW_DEBUG & DEBUG_STATS) vfs.statistics_enable = 1; BRW_BATCH_STRUCT(brw, &vfs); } + + return 0; } const struct brw_tracked_state brw_invarient_state = { @@ -485,7 +468,7 @@ const struct brw_tracked_state brw_invarient_state = { * state pools. This comes at the expense of memory, and more expensive cache * misses. */ -static void upload_state_base_address( struct brw_context *brw ) +static int upload_state_base_address( struct brw_context *brw ) { /* Output the structure (brw_state_base_address) directly to the * batchbuffer, so we can emit relocations inline. @@ -511,6 +494,7 @@ static void upload_state_base_address( struct brw_context *brw ) OUT_BATCH(1); /* Indirect object upper bound */ ADVANCE_BATCH(); } + return 0; } const struct brw_tracked_state brw_state_base_address = { diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c index 17895d2782..54d09d9e45 100644 --- a/src/gallium/drivers/i965/brw_pipe_blend.c +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -43,3 +43,22 @@ if (INTEL_DEBUG & DEBUG_STATS) cc.cc5.statistics_enable = 1; } + + + +static void brw_set_blend_color(struct pipe_context *pipe, + const float *blend_color) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_blend_constant_color *bcc = &brw->curr.blend_color.bcc; + + memset(bcc, 0, sizeof(*bcc)); + bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc->header.length = sizeof(*bcc)/4-2; + bcc->blend_constant_color[0] = blend_color[0]; + bcc->blend_constant_color[1] = blend_color[1]; + bcc->blend_constant_color[2] = blend_color[2]; + bcc->blend_constant_color[3] = blend_color[3]; + + brw->state.dirty.pipe |= PIPE_NEW_BLEND_COLOR; +} diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c index ff64dbd48d..86822d478a 100644 --- a/src/gallium/drivers/i965/brw_pipe_rast.c +++ b/src/gallium/drivers/i965/brw_pipe_rast.c @@ -44,3 +44,23 @@ calculate_clip_key_rast() } } } + + +static void +calculate_line_stipple_rast() +{ + GLfloat tmp; + GLint tmpi; + + memset(&bls, 0, sizeof(bls)); + bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; + bls.header.length = sizeof(bls)/4 - 2; + bls.bits0.pattern = brw->curr.rast.line_stipple_pattern; + bls.bits1.repeat_count = brw->curr.rast.line_stipple_factor + 1; + + tmp = 1.0 / (GLfloat) bls.bits1.repeat_count; + tmpi = tmp * (1<<13); + + bls.bits1.inverse_repeat_count = tmpi; + +} diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index b0be0e1f8a..eafd8ddf77 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -95,4 +95,11 @@ brw_buffer_is_user_buffer( const struct pipe_buffer *buf ) return ((const struct brw_buffer *)buf)->is_user_buffer; } +struct brw_winsys_buffer * +brw_surface_bo( struct pipe_surface *surface ); + +unsigned +brw_surface_pitch( const struct pipe_surface *surface ); + + #endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index e2db2e76e6..1b73b3fd51 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -131,7 +131,7 @@ static void upload_sf_prog(struct brw_context *brw) /* Populate the key, noting state dependencies: */ /* CACHE_NEW_VS_PROG */ - key.attrs = brw->vs.prog_data->outputs_written; + key.attrs = brw->vs.prog_data->nr_outputs_written; /* BRW_NEW_REDUCED_PRIMITIVE */ switch (brw->reduced_primitive) { diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index 9bf34c3fe4..663fc839df 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -33,9 +33,11 @@ #ifndef BRW_STATE_H #define BRW_STATE_H -#include "brw_context.h" +#include "pipe/p_error.h" #include "util/u_memory.h" +#include "brw_context.h" + static inline void brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo) { diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c index 18d79c5ebb..a2277519ad 100644 --- a/src/gallium/drivers/i965/brw_urb.c +++ b/src/gallium/drivers/i965/brw_urb.c @@ -221,7 +221,7 @@ const struct brw_tracked_state brw_recalculate_urb_fence = { -void brw_upload_urb_fence(struct brw_context *brw) +int brw_upload_urb_fence(struct brw_context *brw) { struct brw_urb_fence uf; memset(&uf, 0, sizeof(uf)); @@ -247,4 +247,5 @@ void brw_upload_urb_fence(struct brw_context *brw) uf.bits1.cs_fence = URB_SIZES(brw); BRW_BATCH_STRUCT(brw, &uf); + return 0; } diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index dcd687ac34..010ac115d3 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -51,11 +51,11 @@ static void do_vs_prog( struct brw_context *brw, brw_init_compile(brw, &c.func); c.vp = vp; - c.prog_data.outputs_written = vp->program.Base.OutputsWritten; + c.prog_data.nr_outputs_written = vp->program.Base.OutputsWritten; c.prog_data.inputs_read = vp->program.Base.InputsRead; if (c.key.copy_edgeflag) { - c.prog_data.outputs_written |= 1<first_overflow_output = 0; if (BRW_IS_IGDNG(c->func.brw)) - mrf = 8; + mrf = 8; else - mrf = 4; + mrf = 4; - for (i = 0; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1 << i)) { - c->nr_outputs++; - assert(i < Elements(c->regs[PROGRAM_OUTPUT])); - if (i == VERT_RESULT_HPOS) { - c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; + for (i = 0; i < c->prog_data.nr_outputs_written; i++) { + c->nr_outputs++; + assert(i < Elements(c->regs[PROGRAM_OUTPUT])); + if (i == VERT_RESULT_HPOS) { + c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + else if (i == VERT_RESULT_PSIZ) { + c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + mrf++; /* just a placeholder? XXX fix later stages & remove this */ + } + else { + if (mrf < 16) { + c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); + mrf++; } - else if (i == VERT_RESULT_PSIZ) { + else { + /* too many vertex results to fit in MRF, use GRF for overflow */ + if (!c->first_overflow_output) + c->first_overflow_output = i; c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; - mrf++; /* just a placeholder? XXX fix later stages & remove this */ - } - else { - if (mrf < 16) { - c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); - mrf++; - } - else { - /* too many vertex results to fit in MRF, use GRF for overflow */ - if (!c->first_overflow_output) - c->first_overflow_output = i; - c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } } } } @@ -238,9 +236,9 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); if (BRW_IS_IGDNG(c->func.brw)) - c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; + c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else - c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; + c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; c->prog_data.total_grf = reg; @@ -1050,8 +1048,9 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Update the header for point size, user clipping flags, and -ve rhw * workaround. */ - if ((c->prog_data.outputs_written & (1<key.nr_userclip || BRW_IS_965(p->brw)) + if (c->prog_data.writes_psiz || + c->key.nr_userclip || + BRW_IS_965(p->brw)) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1060,7 +1059,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_set_access_mode(p, BRW_ALIGN_16); - if (c->prog_data.outputs_written & (1<prog_data.writes_psiz) { struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; brw_MUL(p, brw_writemask(header1, BRW_WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); brw_AND(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); @@ -1149,12 +1148,10 @@ static void emit_vertex_write( struct brw_vs_compile *c) * at mrf[4] atm... */ GLuint i, mrf = 0; - for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) { - if (c->prog_data.outputs_written & (1 << i)) { - /* move from GRF to MRF */ - brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); - mrf++; - } + for (i = c->first_overflow_output; i < c->prog_data.nr_outputs_written; i++) { + /* move from GRF to MRF */ + brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); + mrf++; } brw_urb_WRITE(p, diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 4948ea0dff..764708f7df 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -310,7 +310,7 @@ static void brw_wm_populate_key( struct brw_context *brw, } /* CACHE_NEW_VS_PROG */ - key->vp_outputs_written = brw->vs.prog_data->outputs_written; /* bitmask */ + key->vp_nr_outputs_written = brw->vs.prog_data->nr_outputs_written; /* The unique fragment program ID */ key->program_string_id = fp->id; diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index e06de95a8a..bf241f5fa4 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -76,7 +76,7 @@ struct brw_wm_prog_key { GLuint program_string_id:32; GLuint drawable_height; - GLuint vp_outputs_written; + GLuint vp_nr_outputs_written; }; -- cgit v1.2.3 From 590949553f737902008dea020420311e2085aa1a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 25 Oct 2009 11:36:22 +0000 Subject: i965g: start hooking up some to the gallium context interfaces - create/bind/destroy blend and depth state - framebuffer and viewport - etc. --- src/gallium/drivers/i965/brw_cc.c | 2 +- src/gallium/drivers/i965/brw_context.h | 81 ++++++----- src/gallium/drivers/i965/brw_misc_state.c | 110 +++++++------- src/gallium/drivers/i965/brw_pipe_blend.c | 214 ++++++++++++++++++++++------ src/gallium/drivers/i965/brw_pipe_depth.c | 172 ++++++++++++++++------ src/gallium/drivers/i965/brw_pipe_fb.c | 72 +++++++--- src/gallium/drivers/i965/brw_pipe_misc.c | 14 ++ src/gallium/drivers/i965/brw_pipe_rast.h | 1 + src/gallium/drivers/i965/brw_pipe_sampler.c | 52 +++++++ src/gallium/drivers/i965/brw_state_debug.c | 31 +--- src/gallium/drivers/i965/brw_util.c | 56 -------- src/gallium/drivers/i965/brw_wm_state.c | 8 +- 12 files changed, 519 insertions(+), 294 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_pipe_misc.c create mode 100644 src/gallium/drivers/i965/brw_pipe_sampler.c (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index ca10bc73f6..bdd6418ae1 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -117,7 +117,7 @@ cc_unit_populate_key(const struct brw_context *brw, key->cc3 = combine_cc3( brw->curr.zstencil->cc3, brw->curr.blend->cc3 ); key->cc5 = brw->curr.blend->cc5; key->cc6 = brw->curr.blend->cc6; - key->cc7 = brw->curr.blend->cc7; + key->cc7 = brw->curr.zstencil->cc7; } /** diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 2e17e150bb..df43d8ba4d 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -122,8 +122,8 @@ struct brw_context; -struct brw_depth_stencil_alpha_state { - struct pipe_depth_stencil_alpha_state templ; /* for draw module */ +struct brw_depth_stencil_state { + //struct pipe_depth_stencil_alpha_state templ; /* for draw module */ /* Precalculated hardware state: */ @@ -131,18 +131,19 @@ struct brw_depth_stencil_alpha_state { struct brw_cc1 cc1; struct brw_cc2 cc2; struct brw_cc3 cc3; + struct brw_cc7 cc7; }; struct brw_blend_state { - struct pipe_depth_stencil_alpha_state templ; /* for draw module */ + //struct pipe_depth_stencil_alpha_state templ; /* for draw module */ /* Precalculated hardware state: */ + struct brw_cc2 cc2; struct brw_cc3 cc3; struct brw_cc5 cc5; struct brw_cc6 cc6; - struct brw_cc7 cc7; }; @@ -172,20 +173,24 @@ struct brw_fragment_shader { #define PIPE_NEW_DEPTH_STENCIL_ALPHA 0x1 #define PIPE_NEW_RAST 0x2 -#define PIPE_NEW_BLEND 0x2 -#define PIPE_NEW_VIEWPORT 0x2 -#define PIPE_NEW_FRAMEBUFFER 0x2 -#define PIPE_NEW_VERTEX_BUFFER 0x2 -#define PIPE_NEW_VERTEX_ELEMENT 0x2 -#define PIPE_NEW_FRAGMENT_SHADER 0x2 -#define PIPE_NEW_VERTEX_SHADER 0x2 -#define PIPE_NEW_FRAGMENT_CONSTANTS 0x2 -#define PIPE_NEW_VERTEX_CONSTANTS 0x2 -#define PIPE_NEW_CLIP 0x2 -#define PIPE_NEW_INDEX_BUFFER 0x2 -#define PIPE_NEW_INDEX_RANGE 0x2 -#define PIPE_NEW_BLEND_COLOR 0x2 -#define PIPE_NEW_POLYGON_STIPPLE 0x2 +#define PIPE_NEW_BLEND 0x4 +#define PIPE_NEW_VIEWPORT 0x8 +#define PIPE_NEW_SAMPLERS 0x10 +#define PIPE_NEW_VERTEX_BUFFER 0x20 +#define PIPE_NEW_VERTEX_ELEMENT 0x40 +#define PIPE_NEW_FRAGMENT_SHADER 0x80 +#define PIPE_NEW_VERTEX_SHADER 0x100 +#define PIPE_NEW_FRAGMENT_CONSTANTS 0x200 +#define PIPE_NEW_VERTEX_CONSTANTS 0x400 +#define PIPE_NEW_CLIP 0x800 +#define PIPE_NEW_INDEX_BUFFER 0x1000 +#define PIPE_NEW_INDEX_RANGE 0x2000 +#define PIPE_NEW_BLEND_COLOR 0x4000 +#define PIPE_NEW_POLYGON_STIPPLE 0x8000 +#define PIPE_NEW_FRAMEBUFFER_DIMENSIONS 0x10000 +#define PIPE_NEW_DEPTH_BUFFER 0x20000 +#define PIPE_NEW_COLOR_BUFFERS 0x40000 + #define BRW_NEW_URB_FENCE 0x1 @@ -209,8 +214,6 @@ struct brw_fragment_shader { * meantime. */ #define BRW_NEW_BATCH 0x10000 -/** brw->depth_region updated */ -#define BRW_NEW_DEPTH_BUFFER 0x20000 #define BRW_NEW_NR_WM_SURFACES 0x40000 #define BRW_NEW_NR_VS_SURFACES 0x80000 #define BRW_NEW_INDEX_BUFFER 0x100000 @@ -385,12 +388,6 @@ struct brw_cache { }; -/* Considered adding a member to this struct to document which flags - * an update might raise so that ordering of the state atoms can be - * checked or derived at runtime. Dropped the idea in favor of having - * a debug mode where the state is monitored for flags which are - * raised that have already been tested against. - */ struct brw_tracked_state { struct brw_state_flags dirty; int (*prepare)( struct brw_context *brw ); @@ -478,7 +475,7 @@ struct brw_context const struct brw_fragment_shader *fragment_shader; const struct brw_blend_state *blend; const struct brw_rasterizer_state *rast; - const struct brw_depth_stencil_alpha_state *zstencil; + const struct brw_depth_stencil_state *zstencil; struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; @@ -491,6 +488,7 @@ struct brw_context struct pipe_buffer *vertex_constants; struct pipe_buffer *fragment_constants; + struct pipe_viewport_state viewport; struct brw_blend_constant_color bcc; struct brw_polygon_stipple bps; @@ -719,16 +717,31 @@ void brw_emit_query_end(struct brw_context *brw); */ void brw_debug_batch(struct brw_context *intel); -/*====================================================================== - * brw_tex.c - */ -void brw_validate_textures( struct brw_context *brw ); - /*====================================================================== - * brw_pipe_shader.c + * brw_pipe_*.c */ -void brw_init_shader_funcs( struct brw_context *brw ); +void brw_pipe_blend_init( struct brw_context *brw ); +void brw_pipe_depth_stencil_init( struct brw_context *brw ); +void brw_pipe_framebuffer_init( struct brw_context *brw ); +void brw_pipe_flush_init( struct brw_context *brw ); +void brw_pipe_misc_init( struct brw_context *brw ); +void brw_pipe_query_init( struct brw_context *brw ); +void brw_pipe_rast_init( struct brw_context *brw ); +void brw_pipe_sampler_init( struct brw_context *brw ); +void brw_pipe_shader_init( struct brw_context *brw ); +void brw_pipe_vertex_init( struct brw_context *brw ); + +void brw_pipe_blend_cleanup( struct brw_context *brw ); +void brw_pipe_depth_stencil_cleanup( struct brw_context *brw ); +void brw_pipe_framebuffer_cleanup( struct brw_context *brw ); +void brw_pipe_flush_cleanup( struct brw_context *brw ); +void brw_pipe_misc_cleanup( struct brw_context *brw ); +void brw_pipe_query_cleanup( struct brw_context *brw ); +void brw_pipe_rast_cleanup( struct brw_context *brw ); +void brw_pipe_sampler_cleanup( struct brw_context *brw ); +void brw_pipe_shader_cleanup( struct brw_context *brw ); +void brw_pipe_vertex_cleanup( struct brw_context *brw ); /* brw_urb.c diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index ccebe08b4f..db8a2a5008 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -62,7 +62,9 @@ const struct brw_tracked_state brw_blend_constant_color = { .emit = upload_blend_constant_color }; -/* Constant single cliprect for framebuffer object or DRI2 drawing */ +/*********************************************************************** + * Drawing rectangle - framebuffer dimensions + */ static int upload_drawing_rect(struct brw_context *brw) { BEGIN_BATCH(4, NO_LOOP_CLIPRECTS); @@ -77,13 +79,18 @@ static int upload_drawing_rect(struct brw_context *brw) const struct brw_tracked_state brw_drawing_rect = { .dirty = { - .mesa = PIPE_NEW_FRAMEBUFFER, + .mesa = PIPE_NEW_FRAMEBUFFER_DIMENSIONS, .brw = 0, .cache = 0 }, .emit = upload_drawing_rect }; + +/*********************************************************************** + * Binding table pointers + */ + static int prepare_binding_table_pointers(struct brw_context *brw) { brw_add_validated_bo(brw, brw->vs.bind_bo); @@ -125,7 +132,7 @@ const struct brw_tracked_state brw_binding_table_pointers = { }; -/** +/********************************************************************** * Upload pointers to the per-stage state. * * The state pointers in this packet are all relative to the general state @@ -197,6 +204,11 @@ const struct brw_tracked_state brw_psp_urb_cbs = { .emit = upload_psp_urb_cbs, }; + +/*********************************************************************** + * Depth buffer + */ + static int prepare_depthbuffer(struct brw_context *brw) { struct pipe_surface *zsbuf = brw->curr.fb.zsbuf; @@ -278,8 +290,8 @@ static int emit_depthbuffer(struct brw_context *brw) const struct brw_tracked_state brw_depthbuffer = { .dirty = { - .mesa = 0, - .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH, + .mesa = PIPE_NEW_DEPTH_BUFFER, + .brw = BRW_NEW_BATCH, .cache = 0, }, .prepare = prepare_depthbuffer, @@ -308,63 +320,6 @@ const struct brw_tracked_state brw_polygon_stipple = { }; -/*********************************************************************** - * Polygon stipple offset packet - */ - -static int upload_polygon_stipple_offset(struct brw_context *brw) -{ - struct brw_polygon_stipple_offset bpso; - - /* This is invarient state in gallium: - */ - memset(&bpso, 0, sizeof(bpso)); - bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; - bpso.header.length = sizeof(bpso)/4-2; - bpso.bits0.y_offset = 0; - bpso.bits0.x_offset = 0; - - BRW_CACHED_BATCH_STRUCT(brw, &bpso); - return 0; -} - -const struct brw_tracked_state brw_polygon_stipple_offset = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_CONTEXT, - .cache = 0 - }, - .emit = upload_polygon_stipple_offset -}; - -/********************************************************************** - * AA Line parameters - */ -static int upload_aa_line_parameters(struct brw_context *brw) -{ - struct brw_aa_line_parameters balp; - - if (BRW_IS_965(brw)) - return 0; - - /* use legacy aa line coverage computation */ - memset(&balp, 0, sizeof(balp)); - balp.header.opcode = CMD_AA_LINE_PARAMETERS; - balp.header.length = sizeof(balp) / 4 - 2; - - BRW_CACHED_BATCH_STRUCT(brw, &balp); - return 0; -} - -const struct brw_tracked_state brw_aa_line_parameters = { - .dirty = { - .mesa = 0, - .brw = BRW_NEW_CONTEXT, - .cache = 0 - }, - .emit = upload_aa_line_parameters -}; - /*********************************************************************** * Line stipple packet */ @@ -448,6 +403,32 @@ static int upload_invarient_state( struct brw_context *brw ) BRW_BATCH_STRUCT(brw, &vfs); } + if (!BRW_IS_965(brw)) + { + struct brw_aa_line_parameters balp; + + /* use legacy aa line coverage computation */ + memset(&balp, 0, sizeof(balp)); + balp.header.opcode = CMD_AA_LINE_PARAMETERS; + balp.header.length = sizeof(balp) / 4 - 2; + + BRW_BATCH_STRUCT(brw, &balp); + } + + { + struct brw_polygon_stipple_offset bpso; + + /* This is invarient state in gallium: + */ + memset(&bpso, 0, sizeof(bpso)); + bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; + bpso.header.length = sizeof(bpso)/4-2; + bpso.bits0.y_offset = 0; + bpso.bits0.x_offset = 0; + + BRW_BATCH_STRUCT(brw, &bpso); + } + return 0; } @@ -460,6 +441,11 @@ const struct brw_tracked_state brw_invarient_state = { .emit = upload_invarient_state }; + +/*********************************************************************** + * State base address + */ + /** * Define the base addresses which some state is referenced from. * diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c index 54d09d9e45..d3bb882b1a 100644 --- a/src/gallium/drivers/i965/brw_pipe_blend.c +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -1,64 +1,188 @@ +#include "util/u_memory.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" - /* _NEW_COLOR */ - if (key->logic_op != GL_COPY) { - cc.cc2.logicop_enable = 1; - cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op); - } else if (key->color_blend) { - GLenum eqRGB = key->blend_eq_rgb; - GLenum eqA = key->blend_eq_a; - GLenum srcRGB = key->blend_src_rgb; - GLenum dstRGB = key->blend_dst_rgb; - GLenum srcA = key->blend_src_a; - GLenum dstA = key->blend_dst_a; - - if (eqRGB == GL_MIN || eqRGB == GL_MAX) { - srcRGB = dstRGB = GL_ONE; - } - - if (eqA == GL_MIN || eqA == GL_MAX) { - srcA = dstA = GL_ONE; - } - - cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); - cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); - cc.cc6.blend_function = brw_translate_blend_equation(eqRGB); - - cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); - cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); - cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA); - - cc.cc3.blend_enable = 1; - cc.cc3.ia_blend_enable = (srcA != srcRGB || - dstA != dstRGB || - eqA != eqRGB); +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_debug.h" + +static int translate_logicop(unsigned logicop) +{ + switch (logicop) { + case PIPE_LOGICOP_CLEAR: + return BRW_LOGICOPFUNCTION_CLEAR; + case PIPE_LOGICOP_AND: + return BRW_LOGICOPFUNCTION_AND; + case PIPE_LOGICOP_AND_REVERSE: + return BRW_LOGICOPFUNCTION_AND_REVERSE; + case PIPE_LOGICOP_COPY: + return BRW_LOGICOPFUNCTION_COPY; + case PIPE_LOGICOP_COPY_INVERTED: + return BRW_LOGICOPFUNCTION_COPY_INVERTED; + case PIPE_LOGICOP_AND_INVERTED: + return BRW_LOGICOPFUNCTION_AND_INVERTED; + case PIPE_LOGICOP_NOOP: + return BRW_LOGICOPFUNCTION_NOOP; + case PIPE_LOGICOP_XOR: + return BRW_LOGICOPFUNCTION_XOR; + case PIPE_LOGICOP_OR: + return BRW_LOGICOPFUNCTION_OR; + case PIPE_LOGICOP_OR_INVERTED: + return BRW_LOGICOPFUNCTION_OR_INVERTED; + case PIPE_LOGICOP_NOR: + return BRW_LOGICOPFUNCTION_NOR; + case PIPE_LOGICOP_EQUIV: + return BRW_LOGICOPFUNCTION_EQUIV; + case PIPE_LOGICOP_INVERT: + return BRW_LOGICOPFUNCTION_INVERT; + case PIPE_LOGICOP_OR_REVERSE: + return BRW_LOGICOPFUNCTION_OR_REVERSE; + case PIPE_LOGICOP_NAND: + return BRW_LOGICOPFUNCTION_NAND; + case PIPE_LOGICOP_SET: + return BRW_LOGICOPFUNCTION_SET; + default: + assert(0); + return BRW_LOGICOPFUNCTION_SET; } +} + - if (key->dither) { - cc.cc5.dither_enable = 1; - cc.cc6.y_dither_offset = 0; - cc.cc6.x_dither_offset = 0; +static unsigned translate_blend_equation( unsigned mode ) +{ + switch (mode) { + case PIPE_BLEND_ADD: + return BRW_BLENDFUNCTION_ADD; + case PIPE_BLEND_MIN: + return BRW_BLENDFUNCTION_MIN; + case PIPE_BLEND_MAX: + return BRW_BLENDFUNCTION_MAX; + case PIPE_BLEND_SUBTRACT: + return BRW_BLENDFUNCTION_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; + default: + assert(0); + return BRW_BLENDFUNCTION_ADD; } +} - if (INTEL_DEBUG & DEBUG_STATS) - cc.cc5.statistics_enable = 1; +static unsigned translate_blend_factor( unsigned factor ) +{ + switch(factor) { + case PIPE_BLENDFACTOR_ZERO: + return BRW_BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BRW_BLENDFACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return BRW_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BRW_BLENDFACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BRW_BLENDFACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_DST_COLOR: + return BRW_BLENDFACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BRW_BLENDFACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BRW_BLENDFACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BRW_BLENDFACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BRW_BLENDFACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BRW_BLENDFACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BRW_BLENDFACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BRW_BLENDFACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BRW_BLENDFACTOR_INV_CONST_ALPHA; + default: + assert(0); + return BRW_BLENDFACTOR_ZERO; + } } +static void *brw_create_blend_state( struct pipe_context *pipe, + const struct pipe_blend_state *templ ) +{ + struct brw_blend_state *blend = CALLOC_STRUCT(brw_blend_state); + + if (templ->logicop_enable) { + blend->cc2.logicop_enable = 1; + blend->cc5.logicop_func = translate_logicop(templ->logicop_func); + } + else if (templ->blend_enable) { + blend->cc6.dest_blend_factor = translate_blend_factor(templ->rgb_dst_factor); + blend->cc6.src_blend_factor = translate_blend_factor(templ->rgb_src_factor); + blend->cc6.blend_function = translate_blend_equation(templ->rgb_func); + + blend->cc5.ia_dest_blend_factor = translate_blend_factor(templ->alpha_dst_factor); + blend->cc5.ia_src_blend_factor = translate_blend_factor(templ->alpha_src_factor); + blend->cc5.ia_blend_function = translate_blend_equation(templ->alpha_func); + + blend->cc3.blend_enable = 1; + blend->cc3.ia_blend_enable = + (blend->cc6.dest_blend_factor != blend->cc5.ia_dest_blend_factor || + blend->cc6.src_blend_factor != blend->cc5.ia_src_blend_factor || + blend->cc6.blend_function != blend->cc5.ia_blend_function); + } + + blend->cc5.dither_enable = templ->dither; + + if (BRW_DEBUG & DEBUG_STATS) + blend->cc5.statistics_enable = 1; + + return (void *)blend; +} + +static void brw_bind_blend_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + brw->curr.blend = (const struct brw_blend_state *)cso; + brw->state.dirty.mesa |= PIPE_NEW_BLEND; +} + +static void brw_delete_blend_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + assert((const void *)cso != (const void *)brw->curr.blend); + FREE(cso); +} static void brw_set_blend_color(struct pipe_context *pipe, - const float *blend_color) + const struct pipe_blend_color *blend_color) { struct brw_context *brw = brw_context(pipe); - struct brw_blend_constant_color *bcc = &brw->curr.blend_color.bcc; + struct brw_blend_constant_color *bcc = &brw->curr.bcc; memset(bcc, 0, sizeof(*bcc)); bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR; bcc->header.length = sizeof(*bcc)/4-2; - bcc->blend_constant_color[0] = blend_color[0]; - bcc->blend_constant_color[1] = blend_color[1]; - bcc->blend_constant_color[2] = blend_color[2]; - bcc->blend_constant_color[3] = blend_color[3]; + bcc->blend_constant_color[0] = blend_color->color[0]; + bcc->blend_constant_color[1] = blend_color->color[1]; + bcc->blend_constant_color[2] = blend_color->color[2]; + bcc->blend_constant_color[3] = blend_color->color[3]; - brw->state.dirty.pipe |= PIPE_NEW_BLEND_COLOR; + brw->state.dirty.mesa |= PIPE_NEW_BLEND_COLOR; +} + + +void brw_pipe_blend_init( struct brw_context *brw ) +{ + brw->base.set_blend_color = brw_set_blend_color; + brw->base.create_blend_state = brw_create_blend_state; + brw->base.bind_blend_state = brw_bind_blend_state; + brw->base.delete_blend_state = brw_delete_blend_state; +} + +void brw_pipe_blend_cleanup( struct brw_context *brw ) +{ } diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c index 29f135d37a..33fe517e0b 100644 --- a/src/gallium/drivers/i965/brw_pipe_depth.c +++ b/src/gallium/drivers/i965/brw_pipe_depth.c @@ -1,58 +1,142 @@ +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "brw_context.h" +#include "brw_defines.h" + +static unsigned brw_translate_compare_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return BRW_COMPAREFUNCTION_NEVER; + case PIPE_FUNC_LESS: + return BRW_COMPAREFUNCTION_LESS; + case PIPE_FUNC_LEQUAL: + return BRW_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_GREATER: + return BRW_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_GEQUAL: + return BRW_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_NOTEQUAL: + return BRW_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_EQUAL: + return BRW_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_ALWAYS: + return BRW_COMPAREFUNCTION_ALWAYS; + default: + assert(0); + return BRW_COMPAREFUNCTION_ALWAYS; + } +} + +static unsigned translate_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: + return BRW_STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return BRW_STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return BRW_STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return BRW_STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return BRW_STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return BRW_STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return BRW_STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return BRW_STENCILOP_INVERT; + default: + assert(0); + return BRW_STENCILOP_ZERO; + } +} + + static void * -brw_create_depth_stencil( struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *tmpl ) +brw_create_depth_stencil_state( struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *templ ) { - if (tmpl->stencil[0].enable) { - cc.cc0.stencil_enable = 1; - cc.cc0.stencil_func = - intel_translate_compare_func(key->stencil_func[0]); - cc.cc0.stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[0]); - cc.cc0.stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]); - cc.cc0.stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]); - cc.cc1.stencil_ref = key->stencil_ref[0]; - cc.cc1.stencil_write_mask = key->stencil_write_mask[0]; - cc.cc1.stencil_test_mask = key->stencil_test_mask[0]; - - if (tmpl->stencil[1].enable) { - cc.cc0.bf_stencil_enable = 1; - cc.cc0.bf_stencil_func = - intel_translate_compare_func(key->stencil_func[1]); - cc.cc0.bf_stencil_fail_op = - intel_translate_stencil_op(key->stencil_fail_op[1]); - cc.cc0.bf_stencil_pass_depth_fail_op = - intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]); - cc.cc0.bf_stencil_pass_depth_pass_op = - intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]); - cc.cc1.bf_stencil_ref = key->stencil_ref[1]; - cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1]; - cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1]; + struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state); + + if (templ->stencil[0].enabled) { + zstencil->cc0.stencil_enable = 1; + zstencil->cc0.stencil_func = + brw_translate_compare_func(templ->stencil[0].func); + zstencil->cc0.stencil_fail_op = + translate_stencil_op(templ->stencil[0].fail_op); + zstencil->cc0.stencil_pass_depth_fail_op = + translate_stencil_op(templ->stencil[0].zfail_op); + zstencil->cc0.stencil_pass_depth_pass_op = + translate_stencil_op(templ->stencil[0].zpass_op); + zstencil->cc1.stencil_ref = templ->stencil[0].ref_value; + zstencil->cc1.stencil_write_mask = templ->stencil[0].writemask; + zstencil->cc1.stencil_test_mask = templ->stencil[0].valuemask; + + if (templ->stencil[1].enabled) { + zstencil->cc0.bf_stencil_enable = 1; + zstencil->cc0.bf_stencil_func = + brw_translate_compare_func(templ->stencil[1].func); + zstencil->cc0.bf_stencil_fail_op = + translate_stencil_op(templ->stencil[1].fail_op); + zstencil->cc0.bf_stencil_pass_depth_fail_op = + translate_stencil_op(templ->stencil[1].zfail_op); + zstencil->cc0.bf_stencil_pass_depth_pass_op = + translate_stencil_op(templ->stencil[1].zpass_op); + zstencil->cc1.bf_stencil_ref = templ->stencil[1].ref_value; + zstencil->cc2.bf_stencil_write_mask = templ->stencil[1].writemask; + zstencil->cc2.bf_stencil_test_mask = templ->stencil[1].valuemask; } - /* Not really sure about this: - */ - cc.cc0.stencil_write_enable = (cc.cc1.stencil_write_mask || - cc.cc2.bf_stencil_write_mask); + zstencil->cc0.stencil_write_enable = (zstencil->cc1.stencil_write_mask || + zstencil->cc2.bf_stencil_write_mask); } - if (key->alpha_enabled) { - cc.cc3.alpha_test = 1; - cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func); - cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; - - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref); + if (templ->alpha.enabled) { + zstencil->cc3.alpha_test = 1; + zstencil->cc3.alpha_test_func = brw_translate_compare_func(templ->alpha.func); + zstencil->cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + zstencil->cc7.alpha_ref.ub[0] = float_to_ubyte(templ->alpha.ref_value); } - /* _NEW_DEPTH */ - if (key->depth_test) { - cc.cc2.depth_test = 1; - cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func); - cc.cc2.depth_write_enable = key->depth_write; + if (templ->depth.enabled) { + zstencil->cc2.depth_test = 1; + zstencil->cc2.depth_test_function = brw_translate_compare_func(templ->depth.func); + zstencil->cc2.depth_write_enable = templ->depth.writemask; } + return (void *)zstencil; +} + +static void brw_bind_depth_stencil_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + brw->curr.zstencil = (const struct brw_depth_stencil_state *)cso; + brw->state.dirty.mesa |= PIPE_NEW_DEPTH_STENCIL_ALPHA; +} + +static void brw_delete_depth_stencil_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + assert((const void *)cso != (const void *)brw->curr.zstencil); + FREE(cso); +} + + +void brw_pipe_depth_stencil_init( struct brw_context *brw ) +{ + brw->base.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; + brw->base.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; + brw->base.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; +} + +void brw_pipe_depth_stencil_cleanup( struct brw_context *brw ) +{ } diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index dbf97a0544..6391717227 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -1,25 +1,61 @@ +#include "util/u_math.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "brw_context.h" /** * called from intelDrawBuffer() */ -static void brw_set_draw_region( struct pipe_context *pipe, - struct intel_region *color_regions[], - struct intel_region *depth_region, - GLuint num_color_regions) +static void brw_set_framebuffer_state( struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb ) +{ + struct brw_context *brw = brw_context(pipe); + unsigned i; + + /* Dimensions: + */ + if (brw->curr.fb.width != fb->width || + brw->curr.fb.height != fb->height) { + brw->curr.fb.width = fb->width; + brw->curr.fb.height = fb->height; + brw->state.dirty.mesa |= PIPE_NEW_FRAMEBUFFER_DIMENSIONS; + } + + /* Z/Stencil + */ + if (brw->curr.fb.zsbuf != fb->zsbuf) { + pipe_surface_reference(&brw->curr.fb.zsbuf, fb->zsbuf); + brw->state.dirty.mesa |= PIPE_NEW_DEPTH_BUFFER; + } + + /* Color buffers: + */ + for (i = 0; i < MAX2(fb->nr_cbufs, brw->curr.fb.nr_cbufs); i++) { + if (brw->curr.fb.cbufs[i] != fb->cbufs[i]) { + brw->state.dirty.mesa |= PIPE_NEW_COLOR_BUFFERS; + pipe_surface_reference(&brw->curr.fb.cbufs[i], fb->cbufs[i]); + } + } + + brw->curr.fb.nr_cbufs = fb->nr_cbufs; +} + +static void brw_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) { struct brw_context *brw = brw_context(pipe); - GLuint i; - - /* release old color/depth regions */ - if (brw->state.depth_region != depth_region) - brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; - for (i = 0; i < brw->state.nr_color_regions; i++) - intel_region_release(&brw->state.color_regions[i]); - intel_region_release(&brw->state.depth_region); - - /* reference new color/depth regions */ - for (i = 0; i < num_color_regions; i++) - intel_region_reference(&brw->state.color_regions[i], color_regions[i]); - intel_region_reference(&brw->state.depth_region, depth_region); - brw->state.nr_color_regions = num_color_regions; + brw->curr.viewport = *viewport; + brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT; +} + + +void brw_pipe_framebuffer_init( struct brw_context *brw ) +{ + brw->base.set_framebuffer_state = brw_set_framebuffer_state; + brw->base.set_framebuffer_state = brw_set_framebuffer_state; +} + +void brw_pipe_framebuffer_cleanup( struct brw_context *brw ) +{ } diff --git a/src/gallium/drivers/i965/brw_pipe_misc.c b/src/gallium/drivers/i965/brw_pipe_misc.c new file mode 100644 index 0000000000..fb8d7ecc59 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_misc.c @@ -0,0 +1,14 @@ + +static void brw_set_polygon_stipple( struct pipe_context *pipe, + const unsigned *stipple ) +{ + struct brw_polygon_stipple *bps = &brw->curr.bps; + GLuint i; + + memset(bps, 0, sizeof *bps); + bps->header.opcode = CMD_POLY_STIPPLE_PATTERN; + bps->header.length = sizeof *bps/4-2; + + for (i = 0; i < 32; i++) + bps->stipple[i] = brw->curr.poly_stipple[i]; /* don't invert */ +} diff --git a/src/gallium/drivers/i965/brw_pipe_rast.h b/src/gallium/drivers/i965/brw_pipe_rast.h index 6ceaa1fb09..800a9208a7 100644 --- a/src/gallium/drivers/i965/brw_pipe_rast.h +++ b/src/gallium/drivers/i965/brw_pipe_rast.h @@ -9,6 +9,7 @@ struct brw_rasterizer_state { /* Precalculated hardware state: */ struct brw_clip_prog_key clip_key; + struct brw_line_stipple bls; }; #endif diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c new file mode 100644 index 0000000000..b3069f08c0 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -0,0 +1,52 @@ + +#include "util/u_memory.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_debug.h" + + + +static void *brw_create_sampler_state( struct pipe_context *pipe, + const struct pipe_sampler_state *templ ) +{ + struct brw_sampler_state *sampler = CALLOC_STRUCT(brw_sampler_state); + + + return (void *)sampler; +} + +static void brw_bind_sampler_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + brw->curr.sampler = (const struct brw_sampler_state *)cso; + brw->state.dirty.mesa |= PIPE_NEW_SAMPLER; +} + +static void brw_delete_sampler_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + FREE(cso); +} + +static void brw_set_sampler_textures(struct pipe_context *pipe, + unsigned num_textures, + struct pipe_texture **tex) +{ + struct brw_context *brw = brw_context(pipe); + + brw->state.dirty.mesa |= PIPE_NEW_BOUND_TEXTURES; +} + + +void brw_sampler_init( struct brw_context *brw ) +{ + brw->base.set_sampler_textures = brw_set_sampler_textures; + brw->base.create_sampler_state = brw_create_sampler_state; + brw->base.bind_sampler_state = brw_bind_sampler_state; + brw->base.destroy_sampler_state = brw_destroy_sampler_state; +} diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c index 812b761d40..22cea4b7d8 100644 --- a/src/gallium/drivers/i965/brw_state_debug.c +++ b/src/gallium/drivers/i965/brw_state_debug.c @@ -40,35 +40,7 @@ struct dirty_bit_map { #define DEFINE_BIT(name) {name, #name, 0} static struct dirty_bit_map mesa_bits[] = { - DEFINE_BIT(_NEW_MODELVIEW), - DEFINE_BIT(_NEW_PROJECTION), - DEFINE_BIT(_NEW_TEXTURE_MATRIX), - DEFINE_BIT(_NEW_COLOR_MATRIX), - DEFINE_BIT(_NEW_ACCUM), - DEFINE_BIT(_NEW_COLOR), - DEFINE_BIT(_NEW_DEPTH), - DEFINE_BIT(_NEW_EVAL), - DEFINE_BIT(_NEW_FOG), - DEFINE_BIT(_NEW_HINT), - DEFINE_BIT(_NEW_LIGHT), - DEFINE_BIT(_NEW_LINE), - DEFINE_BIT(_NEW_PIXEL), - DEFINE_BIT(_NEW_POINT), - DEFINE_BIT(_NEW_POLYGON), - DEFINE_BIT(_NEW_POLYGONSTIPPLE), - DEFINE_BIT(_NEW_SCISSOR), - DEFINE_BIT(_NEW_STENCIL), - DEFINE_BIT(_NEW_TEXTURE), - DEFINE_BIT(_NEW_TRANSFORM), - DEFINE_BIT(_NEW_VIEWPORT), - DEFINE_BIT(_NEW_PACKUNPACK), - DEFINE_BIT(_NEW_ARRAY), - DEFINE_BIT(_NEW_RENDERMODE), - DEFINE_BIT(_NEW_BUFFERS), - DEFINE_BIT(_NEW_MULTISAMPLE), - DEFINE_BIT(_NEW_TRACK_MATRIX), - DEFINE_BIT(_NEW_PROGRAM), - DEFINE_BIT(_NEW_PROGRAM_CONSTANTS), + DEFINE_BIT(PIPE_NEW_BLEND_COLOR), {0, 0, 0} }; @@ -88,7 +60,6 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_INDEX_BUFFER), DEFINE_BIT(BRW_NEW_VERTICES), DEFINE_BIT(BRW_NEW_BATCH), - DEFINE_BIT(BRW_NEW_DEPTH_BUFFER), {0, 0, 0} }; diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c index c5244e58ab..458058d668 100644 --- a/src/gallium/drivers/i965/brw_util.c +++ b/src/gallium/drivers/i965/brw_util.c @@ -35,60 +35,4 @@ -GLuint brw_translate_blend_equation( GLenum mode ) -{ - switch (mode) { - case GL_FUNC_ADD: - return BRW_BLENDFUNCTION_ADD; - case GL_MIN: - return BRW_BLENDFUNCTION_MIN; - case GL_MAX: - return BRW_BLENDFUNCTION_MAX; - case GL_FUNC_SUBTRACT: - return BRW_BLENDFUNCTION_SUBTRACT; - case GL_FUNC_REVERSE_SUBTRACT: - return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; - default: - assert(0); - return BRW_BLENDFUNCTION_ADD; - } -} -GLuint brw_translate_blend_factor( GLenum factor ) -{ - switch(factor) { - case GL_ZERO: - return BRW_BLENDFACTOR_ZERO; - case GL_SRC_ALPHA: - return BRW_BLENDFACTOR_SRC_ALPHA; - case GL_ONE: - return BRW_BLENDFACTOR_ONE; - case GL_SRC_COLOR: - return BRW_BLENDFACTOR_SRC_COLOR; - case GL_ONE_MINUS_SRC_COLOR: - return BRW_BLENDFACTOR_INV_SRC_COLOR; - case GL_DST_COLOR: - return BRW_BLENDFACTOR_DST_COLOR; - case GL_ONE_MINUS_DST_COLOR: - return BRW_BLENDFACTOR_INV_DST_COLOR; - case GL_ONE_MINUS_SRC_ALPHA: - return BRW_BLENDFACTOR_INV_SRC_ALPHA; - case GL_DST_ALPHA: - return BRW_BLENDFACTOR_DST_ALPHA; - case GL_ONE_MINUS_DST_ALPHA: - return BRW_BLENDFACTOR_INV_DST_ALPHA; - case GL_SRC_ALPHA_SATURATE: - return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; - case GL_CONSTANT_COLOR: - return BRW_BLENDFACTOR_CONST_COLOR; - case GL_ONE_MINUS_CONSTANT_COLOR: - return BRW_BLENDFACTOR_INV_CONST_COLOR; - case GL_CONSTANT_ALPHA: - return BRW_BLENDFACTOR_CONST_ALPHA; - case GL_ONE_MINUS_CONSTANT_ALPHA: - return BRW_BLENDFACTOR_INV_CONST_ALPHA; - default: - assert(0); - return BRW_BLENDFACTOR_ZERO; - } -} diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index 16a2324049..4989aae830 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -105,11 +105,11 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* as far as we can tell */ key->computes_depth = (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; - /* BRW_NEW_DEPTH_BUFFER + /* PIPE_NEW_DEPTH_BUFFER * Override for NULL depthbuffer case, required by the Pixel Shader Computed * Depth field. */ - if (brw->state.depth_region == NULL) + if (brw->curr.fb.zsbuf == NULL) key->computes_depth = 0; /* _NEW_COLOR */ @@ -295,7 +295,8 @@ static void upload_wm_unit( struct brw_context *brw ) const struct brw_tracked_state brw_wm_unit = { .dirty = { - .mesa = (_NEW_POLYGON | + .mesa = (PIPE_NEW_DEPTH_BUFFER | + _NEW_POLYGON | _NEW_POLYGONSTIPPLE | _NEW_LINE | _NEW_COLOR | @@ -303,7 +304,6 @@ const struct brw_tracked_state brw_wm_unit = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | - BRW_NEW_DEPTH_BUFFER | BRW_NEW_NR_WM_SURFACES), .cache = (CACHE_NEW_WM_PROG | -- cgit v1.2.3 From 09c231f84a20a306a173b60c82484ce1f9331edf Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 26 Oct 2009 00:20:33 +0000 Subject: i965g: still working on compilation --- src/gallium/auxiliary/tgsi/tgsi_scan.h | 3 + src/gallium/drivers/i965/Makefile | 9 +- src/gallium/drivers/i965/brw_batchbuffer.c | 14 +- src/gallium/drivers/i965/brw_context.h | 18 +- src/gallium/drivers/i965/brw_eu_emit.c | 4 +- src/gallium/drivers/i965/brw_pipe_fb.c | 2 +- src/gallium/drivers/i965/brw_pipe_flush.c | 9 +- src/gallium/drivers/i965/brw_pipe_query.c | 110 +++++++----- src/gallium/drivers/i965/brw_pipe_sampler.c | 81 +++++++++ src/gallium/drivers/i965/brw_screen_surface.c | 156 ++++++++++++++--- src/gallium/drivers/i965/brw_screen_texture.c | 218 ++++++++++++++++++++++++ src/gallium/drivers/i965/brw_sf.c | 80 ++++----- src/gallium/drivers/i965/brw_sf.h | 13 +- src/gallium/drivers/i965/brw_sf_emit.c | 145 +++++++++------- src/gallium/drivers/i965/brw_sf_state.c | 178 +++++++++---------- src/gallium/drivers/i965/brw_state.h | 13 +- src/gallium/drivers/i965/brw_state_batch.c | 8 +- src/gallium/drivers/i965/brw_state_cache.c | 64 ++++--- src/gallium/drivers/i965/brw_state_debug.c | 19 ++- src/gallium/drivers/i965/brw_state_dump.c | 64 +++---- src/gallium/drivers/i965/brw_state_upload.c | 37 ++-- src/gallium/drivers/i965/brw_tex.c | 50 ------ src/gallium/drivers/i965/brw_tex_layout.c | 218 ------------------------ src/gallium/drivers/i965/brw_urb.c | 10 +- src/gallium/drivers/i965/brw_vs.h | 2 +- src/gallium/drivers/i965/brw_vs_emit.c | 20 +-- src/gallium/drivers/i965/brw_vs_state.c | 4 +- src/gallium/drivers/i965/brw_winsys.h | 18 +- src/gallium/drivers/i965/brw_wm.c | 4 +- src/gallium/drivers/i965/brw_wm.h | 36 ++-- src/gallium/drivers/i965/brw_wm_debug.c | 68 ++++---- src/gallium/drivers/i965/brw_wm_emit.c | 8 +- src/gallium/drivers/i965/brw_wm_fp.c | 18 +- src/gallium/drivers/i965/brw_wm_glsl.c | 16 +- src/gallium/drivers/i965/brw_wm_pass0.c | 6 +- src/gallium/drivers/i965/brw_wm_pass1.c | 2 +- src/gallium/drivers/i965/brw_wm_pass2.c | 4 +- src/gallium/drivers/i965/brw_wm_sampler_state.c | 170 ++++-------------- src/gallium/drivers/i965/brw_wm_state.c | 6 +- 39 files changed, 1007 insertions(+), 898 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_screen_texture.c delete mode 100644 src/gallium/drivers/i965/brw_tex.c delete mode 100644 src/gallium/drivers/i965/brw_tex_layout.c (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 8a7ee0c7e4..6754001e88 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -61,6 +61,9 @@ struct tgsi_shader_info boolean uses_kill; /**< KIL or KILP instruction used? */ boolean uses_fogcoord; /**< fragment shader uses fog coord? */ boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */ + + uint texture_max; + uint texture_mask; }; diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 40e8aa8786..c3dbad72ae 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -28,10 +28,7 @@ C_SOURCES = \ brw_pipe_blend.c \ brw_pipe_depth.c \ brw_pipe_fb.c \ - brw_pipe_flush.c \ brw_pipe_query.c \ - brw_pipe_shader.c \ - brw_screen_surface.c \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ @@ -40,8 +37,6 @@ C_SOURCES = \ brw_state_dump.c \ brw_state_upload.c \ brw_swtnl.c \ - brw_tex.c \ - brw_tex_layout.c \ brw_urb.c \ brw_util.c \ brw_vs.c \ @@ -60,8 +55,12 @@ C_SOURCES = \ brw_wm_sampler_state.c \ brw_wm_state.c \ brw_wm_surface_state.c \ + brw_screen_surface.c \ + brw_screen_texture.c \ brw_bo.c \ brw_batchbuffer.c \ + brw_pipe_shader.c \ + brw_pipe_flush.c \ intel_tex_layout.c include ../../Makefile.template diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index 8bcac76ede..45fbd59273 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -105,13 +105,13 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file, } - if (INTEL_DEBUG & DEBUG_BATCH) - fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, + if (BRW_DEBUG & DEBUG_BATCH) + debug_printf("%s:%d: Batchbuffer flush with %db used\n", file, line, used); /* Emit a flush if the bufmgr doesn't do it for us. */ if (intel->always_flush_cache || !intel->ttm) { - *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd(); + *(GLuint *) (batch->ptr) = ((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); batch->ptr += 4; used = batch->ptr - batch->map; } @@ -136,15 +136,15 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file, batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 ); - if (INTEL_DEBUG & DEBUG_BATCH) { + if (BRW_DEBUG & DEBUG_BATCH) { dri_bo_map(batch->buf, GL_FALSE); intel_decode(batch->buf->virtual, used / 4, batch->buf->offset, brw->brw_screen->pci_id); dri_bo_unmap(batch->buf); } - if (INTEL_DEBUG & DEBUG_SYNC) { - fprintf(stderr, "waiting for idle\n"); + if (BRW_DEBUG & DEBUG_SYNC) { + debug_printf("waiting for idle\n"); dri_bo_map(batch->buf, GL_TRUE); dri_bo_unmap(batch->buf); } @@ -166,7 +166,7 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, int ret; if (batch->ptr - batch->map > batch->buf->size) - _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n", + debug_printf ("bad relocation ptr %p map %p offset %d size %d\n", batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); ret = batch->sws->bo_emit_reloc(batch->buf, diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index df43d8ba4d..10c1cf6f33 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -190,6 +190,8 @@ struct brw_fragment_shader { #define PIPE_NEW_FRAMEBUFFER_DIMENSIONS 0x10000 #define PIPE_NEW_DEPTH_BUFFER 0x20000 #define PIPE_NEW_COLOR_BUFFERS 0x40000 +#define PIPE_NEW_QUERY 0x80000 +#define PIPE_NEW_SCISSOR 0x100000 @@ -204,7 +206,7 @@ struct brw_fragment_shader { #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100 #define BRW_NEW_PSP 0x800 #define BRW_NEW_WM_SURFACES 0x1000 -#define BRW_NEW_FENCE 0x2000 +#define BRW_NEW_xxx 0x2000 /* was FENCE */ #define BRW_NEW_INDICES 0x4000 #define BRW_NEW_VERTICES 0x8000 /** @@ -373,6 +375,7 @@ struct brw_cache_item { struct brw_cache { struct brw_context *brw; + struct brw_winsys_screen *sws; struct brw_cache_item **items; GLuint size, n_items; @@ -380,6 +383,7 @@ struct brw_cache { GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */ GLuint aux_size[BRW_MAX_CACHE]; char *name[BRW_MAX_CACHE]; + /* Record of the last BOs chosen for each cache_id. Used to set * brw->state.dirty.cache when a new cache item is chosen. @@ -448,7 +452,7 @@ struct brw_query_object { int last_index; /* Total count of pixels from previous BOs */ - unsigned int count; + uint64_t result; }; @@ -477,11 +481,18 @@ struct brw_context const struct brw_rasterizer_state *rast; const struct brw_depth_stencil_state *zstencil; + const struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + const struct pipe_sampler *sampler[PIPE_MAX_SAMPLERS]; + unsigned num_textures; + unsigned num_samplers; + + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; unsigned num_vertex_elements; unsigned num_vertex_buffers; + struct pipe_scissor_state scissor; struct pipe_framebuffer_state fb; struct pipe_viewport_state vp; struct pipe_clip_state ucp; @@ -492,6 +503,8 @@ struct brw_context struct brw_blend_constant_color bcc; struct brw_polygon_stipple bps; + + /** * Index buffer for this draw_prims call. * @@ -688,6 +701,7 @@ struct brw_context struct brw_winsys_buffer *bo; int index; GLboolean active; + int stats_wm; } query; struct { diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index f6b8843e01..f7fa520348 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -1262,7 +1262,7 @@ void brw_SAMPLE(struct brw_compile *p, GLboolean need_stall = 0; if (writemask == 0) { - /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ + /*debug_printf("%s: zero writemask??\n", __FUNCTION__); */ return; } @@ -1294,7 +1294,7 @@ void brw_SAMPLE(struct brw_compile *p, if (newmask != writemask) { need_stall = 1; - /* _mesa_printf("need stall %x %x\n", newmask , writemask); */ + /* debug_printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index 6391717227..c65f9bc374 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -53,7 +53,7 @@ static void brw_set_viewport_state( struct pipe_context *pipe, void brw_pipe_framebuffer_init( struct brw_context *brw ) { brw->base.set_framebuffer_state = brw_set_framebuffer_state; - brw->base.set_framebuffer_state = brw_set_framebuffer_state; + brw->base.set_viewport_state = brw_set_viewport_state; } void brw_pipe_framebuffer_cleanup( struct brw_context *brw ) diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index 65e7151517..fb4a784de9 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -52,14 +52,7 @@ static void brw_note_fence( struct brw_context *brw, GLuint fence ) */ static GLuint brw_flush_cmd( void ) { - struct brw_mi_flush flush; - - return ; - - flush.opcode = CMD_MI_FLUSH; - flush.pad = 0; - flush.flags = BRW_FLUSH_STATE_CACHE; - return *(GLuint *)&flush; + return ((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); } diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c index a2da1373bf..18a9b71af0 100644 --- a/src/gallium/drivers/i965/brw_pipe_query.c +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -46,25 +46,38 @@ #include "brw_reg.h" /** Waits on the query object's BO and totals the results for this query */ -static void -brw_queryobj_get_results(struct brw_query_object *query) +static boolean +brw_query_get_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64_t *result) { - int i; - uint64_t *results; - - if (query->bo == NULL) - return; + struct brw_context *brw = brw_context(pipe); + struct brw_query_object *query = (struct brw_query_object *)q; /* Map and count the pixels from the current query BO */ - dri_bo_map(query->bo, GL_FALSE); - results = query->bo->virtual; - for (i = query->first_index; i <= query->last_index; i++) { - query->Base.Result += results[i * 2 + 1] - results[i * 2]; + if (query->bo) { + int i; + uint64_t *map; + + if (brw->sws->bo_is_busy(query->bo) && !wait) + return FALSE; + + map = brw->sws->bo_map(query->bo, GL_FALSE); + if (map == NULL) + return FALSE; + + for (i = query->first_index; i <= query->last_index; i++) { + query->result += map[i * 2 + 1] - map[i * 2]; + } + + brw->sws->bo_unmap(query->bo); + brw->sws->bo_unreference(query->bo); + query->bo = NULL; } - dri_bo_unmap(query->bo); - brw->sws->bo_unreference(query->bo); - query->bo = NULL; + *result = query->result; + return TRUE; } static struct pipe_query * @@ -72,12 +85,12 @@ brw_query_create(struct pipe_context *pipe, unsigned type ) { struct brw_query_object *query; - switch (query->type) { + switch (type) { case PIPE_QUERY_OCCLUSION_COUNTER: query = CALLOC_STRUCT( brw_query_object ); if (query == NULL) return NULL; - return &query->Base; + return (struct pipe_query *)query; default: return NULL; @@ -87,6 +100,7 @@ brw_query_create(struct pipe_context *pipe, unsigned type ) static void brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q) { + struct brw_context *brw = brw_context(pipe); struct brw_query_object *query = (struct brw_query_object *)q; brw->sws->bo_unreference(query->bo); @@ -94,24 +108,25 @@ brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q) } static void -brw_begin_query(struct pipe_context *pipe, struct pipe_query *q) +brw_query_begin(struct pipe_context *pipe, struct pipe_query *q) { struct brw_context *brw = brw_context(pipe); struct brw_query_object *query = (struct brw_query_object *)q; /* Reset our driver's tracking of query state. */ brw->sws->bo_unreference(query->bo); + query->result = 0; query->bo = NULL; query->first_index = -1; query->last_index = -1; insert_at_head(&brw->query.active_head, query); - brw->stats_wm++; - brw->dirty.mesa |= PIPE_NEW_QUERY; + brw->query.stats_wm++; + brw->state.dirty.mesa |= PIPE_NEW_QUERY; } static void -brw_end_query(struct pipe_context *pipe, struct pipe_query *q) +brw_query_end(struct pipe_context *pipe, struct pipe_query *q) { struct brw_context *brw = brw_context(pipe); struct brw_query_object *query = (struct brw_query_object *)q; @@ -129,27 +144,13 @@ brw_end_query(struct pipe_context *pipe, struct pipe_query *q) } remove_from_list(query); - brw->stats_wm--; - brw->dirty.mesa |= PIPE_NEW_QUERY; + brw->query.stats_wm--; + brw->state.dirty.mesa |= PIPE_NEW_QUERY; } -static void brw_wait_query(struct pipe_context *pipe, struct pipe_query *q) -{ - struct brw_query_object *query = (struct brw_query_object *)q; - - brw_queryobj_get_results(query); - query->Base.Ready = GL_TRUE; -} - -static void brw_check_query(struct pipe_context *pipe, struct pipe_query *q) -{ - struct brw_query_object *query = (struct brw_query_object *)q; - - if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) { - brw_queryobj_get_results(query); - query->Base.Ready = GL_TRUE; - } -} +/*********************************************************************** + * Internal functions and callbacks to implement queries + */ /** Called to set up the query BO and account for its aperture space */ void @@ -201,8 +202,17 @@ brw_emit_query_begin(struct brw_context *brw) foreach(query, &brw->query.active_head) { if (query->bo != brw->query.bo) { + uint64_t tmp; + + /* Propogate the results from this buffer to all of the + * active queries, as the bo is going away. + */ if (query->bo != NULL) - brw_queryobj_get_results(query); + brw_query_get_result( &brw->base, + (struct pipe_query *)query, + FALSE, + &tmp ); + brw->sws->bo_reference(brw->query.bo); query->bo = brw->query.bo; query->first_index = brw->query.index; @@ -235,12 +245,18 @@ brw_emit_query_end(struct brw_context *brw) brw->query.index++; } -void brw_init_queryobj_functions(struct dd_function_table *functions) +void brw_pipe_query_init( struct brw_context *brw ) { - functions->NewQueryObject = brw_new_query_object; - functions->DeleteQuery = brw_delete_query; - functions->BeginQuery = brw_begin_query; - functions->EndQuery = brw_end_query; - functions->CheckQuery = brw_check_query; - functions->WaitQuery = brw_wait_query; + brw->base.create_query = brw_query_create; + brw->base.destroy_query = brw_query_destroy; + brw->base.begin_query = brw_query_begin; + brw->base.end_query = brw_query_end; + brw->base.get_query_result = brw_query_get_result; +} + + +void brw_pipe_query_cleanup( struct brw_context *brw ) +{ + /* Unreference brw->query.bo ?? + */ } diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c index b3069f08c0..bc20eef6fb 100644 --- a/src/gallium/drivers/i965/brw_pipe_sampler.c +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -14,6 +14,87 @@ static void *brw_create_sampler_state( struct pipe_context *pipe, { struct brw_sampler_state *sampler = CALLOC_STRUCT(brw_sampler_state); + switch (key->minfilter) { + case GL_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + break; + case GL_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + break; + case GL_NEAREST_MIPMAP_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; + break; + case GL_LINEAR_MIPMAP_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; + break; + case GL_LINEAR_MIPMAP_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; + break; + default: + break; + } + + /* Set Anisotropy: + */ + if (key->max_aniso > 1.0) { + sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; + sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; + + if (key->max_aniso > 2.0) { + sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2, + BRW_ANISORATIO_16); + } + } + else { + switch (key->magfilter) { + case GL_NEAREST: + sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + break; + case GL_LINEAR: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + default: + break; + } + } + + sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); + sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); + + /* Set LOD bias: + */ + sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6); + + sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ + sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ + + /* Set shadow function: + */ + if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) { + /* Shadowing is "enabled" by emitting a particular sampler + * message (sample_c). So need to recompile WM program when + * shadow comparison is enabled on each/any texture unit. + */ + sampler->ss0.shadow_function = + intel_translate_shadow_compare_func(key->comparefunc); + } + + /* Set BaseMipLevel, MaxLOD, MinLOD: + */ + sampler->ss0.base_level = U_FIXED(0, 1); + + sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6); + sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6); return (void *)sampler; } diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c index 544be6a089..e0df6cc629 100644 --- a/src/gallium/drivers/i965/brw_screen_surface.c +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -1,27 +1,131 @@ - /* _NEW_BUFFERS */ - if (IS_965(brw->brw_screen->pci_id) && - !IS_G4X(brw->brw_screen->pci_id)) { - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - - /* The original gen4 hardware couldn't set up WM surfaces pointing - * at an offset within a tile, which can happen when rendering to - * anything but the base level of a texture or the +X face/0 depth. - * This was fixed with the 4 Series hardware. - * - * For these original chips, you would have to make the depth and - * color destination surfaces include information on the texture - * type, LOD, face, and various limits to use them as a destination. - * I would have done this, but there's also a nasty requirement that - * the depth and the color surfaces all be of the same LOD, which - * may be a worse requirement than this alignment. (Also, we may - * want to just demote the texture to untiled, instead). - */ - if (irb->region && - irb->region->tiling != I915_TILING_NONE && - (irb->region->draw_offset & 4095)) { - DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n"); - return GL_TRUE; - } + +#include "pipe/p_screen.h" +#include "brw_screen.h" + +struct brw_surface_id { + unsigned face:3; + unsigned zslice:13; + unsigned level:16; +}; + +static boolean need_linear_view( struct brw_screen *brw_screen, + struct brw_texture *brw_texture, + unsigned face, + unsigned level, + unsigned zslice ) +{ +#if 0 + /* XXX: what about IDGNG? + */ + if (!BRW_IS_G4X(brw->brw_screen->pci_id)) + { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + + /* The original gen4 hardware couldn't set up WM surfaces pointing + * at an offset within a tile, which can happen when rendering to + * anything but the base level of a texture or the +X face/0 depth. + * This was fixed with the 4 Series hardware. + * + * For these original chips, you would have to make the depth and + * color destination surfaces include information on the texture + * type, LOD, face, and various limits to use them as a destination. + * + * This is easy in Gallium as surfaces are all backed by + * textures, but there's also a nasty requirement that the depth + * and the color surfaces all be of the same LOD, which is + * harder to get around as we can't look at a surface in + * isolation and decide if it's legal. + * + * Instead, end up being pessimistic and say that for i965, + * ... ?? + */ + if (brw_tex->tiling != I915_TILING_NONE && + (brw_tex_image_offset(brw_tex, face, level, zslize) & 4095)) { + if (BRW_DEBUG & DEBUG_VIEW) + debug_printf("%s: need surface view for non-aligned tex image\n", + __FUNCTION__); + return GL_TRUE; } + } +#endif + + /* Tiled 3d textures don't have subsets that look like 2d surfaces: + */ + + /* Everything else should be fine to render to in-place: + */ + return GL_FALSE; +} + +/* Look at all texture views and figure out if any of them need to be + * back-copied into the texture for sampling + */ +void brw_update_texture( struct pipe_screen *screen, + struct pipe_texture *texture ) +{ + /* currently nothing to do */ +} + + +static struct pipe_surface *create_linear_view( struct brw_screen *brw_screen, + struct brw_texture *brw_tex, + struct brw_surface_id id ) +{ + +} + +static struct pipe_surface *create_in_place_view( struct brw_screen *brw_screen, + struct brw_texture *brw_tex, + struct brw_surface_id id ) +{ + struct brw_surface *surface = CALLOC_STRUCT(brw_surface); + surface->id = id; + +} + +/* Get a surface which is view into a texture + */ +struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned face, unsigned level, + unsigned zslice, + unsigned usage ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_surface_id id; + + id.face = face; + id.level = level; + id.zslice = zslice; + + if (need_linear_view(brw_screen, brw_tex, id)) + type = BRW_VIEW_LINEAR; + else + type = BRW_VIEW_IN_PLACE; + + + foreach (surface, texture->views[type]) { + if (id.value == surface->id.value) + return surface; + } + + switch (type) { + case BRW_VIEW_LINEAR: + surface = create_linear_view( texture, id, type ); + break; + case BRW_VIEW_IN_PLACE: + surface = create_in_place_view( texture, id, type ); + break; + default: + return NULL; + } + + insert_at_head( texture->views[type], surface ); + return surface; +} + + +void brw_tex_surface_destroy( struct pipe_surface *surface ) +{ +} diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c new file mode 100644 index 0000000000..50c30878c6 --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -0,0 +1,218 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +/* Code to layout images in a mipmap tree for i965. + */ + +#include "brw_tex_layout.h" + +#define FILE_DEBUG_FLAG DEBUG_MIPTREE + +GLboolean brw_miptree_layout(struct brw_context *brw, + struct intel_mipmap_tree *mt, + uint32_t tiling) +{ + /* XXX: these vary depending on image format: */ + /* GLint align_w = 4; */ + + switch (mt->target) { + case GL_TEXTURE_CUBE_MAP: + if (IS_IGDNG(brw->brw_screen->pci_id)) { + GLuint align_h = 2, align_w = 4; + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = mt->width0; + GLuint height = mt->height0; + GLuint qpitch = 0; + GLuint y_pitch = 0; + + mt->pitch = mt->width0; + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + y_pitch = ALIGN(height, align_h); + + if (mt->compressed) { + mt->pitch = ALIGN(mt->width0, align_w); + } + + if (mt->last_level != 0) { + GLuint mip1_width; + + if (mt->compressed) { + mip1_width = ALIGN(minify(mt->width0), align_w) + + ALIGN(minify(minify(mt->width0)), align_w); + } else { + mip1_width = ALIGN(minify(mt->width0), align_w) + + minify(minify(mt->width0)); + } + + if (mip1_width > mt->pitch) { + mt->pitch = mip1_width; + } + } + + mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); + + if (mt->compressed) { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; + } else { + qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; + mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; + } + + for (level = 0; level <= mt->last_level; level++) { + GLuint img_height; + GLuint nr_images = 6; + GLuint q = 0; + + intel_miptree_set_level_info(mt, level, nr_images, x, y, width, + height, 1); + + for (q = 0; q < nr_images; q++) + intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch); + + if (mt->compressed) + img_height = MAX2(1, height/4); + else + img_height = ALIGN(height, align_h); + + if (level == 1) { + x += ALIGN(width, align_w); + } + else { + y += img_height; + } + + width = minify(width); + height = minify(height); + } + + break; + } + + case GL_TEXTURE_3D: { + GLuint width = mt->width0; + GLuint height = mt->height0; + GLuint depth = mt->depth0; + GLuint pack_x_pitch, pack_x_nr; + GLuint pack_y_pitch; + GLuint level; + GLuint align_h = 2; + GLuint align_w = 4; + + mt->total_height = 0; + intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); + + if (mt->compressed) { + mt->pitch = ALIGN(width, align_w); + pack_y_pitch = (height + 3) / 4; + } else { + mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); + pack_y_pitch = ALIGN(mt->height0, align_h); + } + + pack_x_pitch = width; + pack_x_nr = 1; + + for (level = 0 ; level <= mt->last_level ; level++) { + GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; + GLint x = 0; + GLint y = 0; + GLint q, j; + + intel_miptree_set_level_info(mt, level, nr_images, + 0, mt->total_height, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + intel_miptree_set_image_offset(mt, level, q, x, y); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + mt->total_height += y; + width = minify(width); + height = minify(height); + depth = minify(depth); + + if (mt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > ALIGN(width, align_w)) { + pack_x_pitch = ALIGN(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= mt->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = ALIGN(pack_y_pitch, align_h); + } + } + + } + /* The 965's sampler lays cachelines out according to how accesses + * in the texture surfaces run, so they may be "vertical" through + * memory. As a result, the docs say in Surface Padding Requirements: + * Sampling Engine Surfaces that two extra rows of padding are required. + * We don't know of similar requirements for pre-965, but given that + * those docs are silent on padding requirements in general, let's play + * it safe. + */ + if (mt->target == GL_TEXTURE_CUBE_MAP) + mt->total_height += 2; + break; + } + + default: + i945_miptree_layout_2d(intel, mt, tiling); + break; + } + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + mt->pitch, + mt->total_height, + mt->cpp, + mt->pitch * mt->total_height * mt->cpp ); + + return GL_TRUE; +} + diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index 1b73b3fd51..013d839e37 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -29,11 +29,12 @@ * Keith Whitwell */ +#include "pipe/p_state.h" #include "brw_batchbuffer.h" - #include "brw_defines.h" #include "brw_context.h" +#include "brw_pipe_rast.h" #include "brw_eu.h" #include "brw_util.h" #include "brw_sf.h" @@ -45,7 +46,6 @@ static void compile_sf_prog( struct brw_context *brw, struct brw_sf_compile c; const GLuint *program; GLuint program_size; - GLuint i, idx; memset(&c, 0, sizeof(c)); @@ -54,7 +54,7 @@ static void compile_sf_prog( struct brw_context *brw, brw_init_compile(brw, &c.func); c.key = *key; - c.nr_attrs = util_count_bits(c.key.attrs); + c.nr_attrs = c.key.nr_attrs; c.nr_attr_regs = (c.nr_attrs+1)/2; c.nr_setup_attrs = c.key.nr_attrs; c.nr_setup_regs = (c.nr_setup_attrs+1)/2; @@ -62,21 +62,6 @@ static void compile_sf_prog( struct brw_context *brw, c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; - /* Construct map from attribute number to position in the vertex. - */ - for (i = idx = 0; i < VERT_RESULT_MAX; i++) - if (c.key.attrs & (1<= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) { - c.point_attrs[i].CoordReplace = - ctx->Point.CoordReplace[i - VERT_RESULT_TEX0]; - } - else { - c.point_attrs[i].CoordReplace = GL_FALSE; - } - idx++; - } /* Which primitive? Or all three? */ @@ -122,7 +107,7 @@ static void compile_sf_prog( struct brw_context *brw, /* Calculate interpolants for triangle and line rasterization. */ -static void upload_sf_prog(struct brw_context *brw) +static int upload_sf_prog(struct brw_context *brw) { struct brw_sf_prog_key key; @@ -131,46 +116,49 @@ static void upload_sf_prog(struct brw_context *brw) /* Populate the key, noting state dependencies: */ /* CACHE_NEW_VS_PROG */ - key.attrs = brw->vs.prog_data->nr_outputs_written; + key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_OUTPUT] + 1; + + + /* XXX: this is probably where the mapping between vertex shader + * outputs and fragment shader inputs should be handled. Assume + * for now 1:1 correspondance. + * + * XXX: scan frag shader inputs to work out linear vs. perspective + * interpolation below. + * + * XXX: as long as we're hard-wiring, is eg. position required to + * be linear? + */ + key.linear_attrs = 0; + key.persp_attrs = (1 << key.nr_attrs) - 1; /* BRW_NEW_REDUCED_PRIMITIVE */ switch (brw->reduced_primitive) { - case GL_TRIANGLES: - /* NOTE: We just use the edgeflag attribute as an indicator that - * unfilled triangles are active. We don't actually do the - * edgeflag testing here, it is already done in the clip - * program. + case PIPE_PRIM_TRIANGLES: + /* PIPE_NEW_RAST */ - if (key.attrs & (1<curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL || + brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL) key.primitive = SF_UNFILLED_TRIS; else key.primitive = SF_TRIANGLES; break; - case GL_LINES: + case PIPE_PRIM_LINES: key.primitive = SF_LINES; break; - case GL_POINTS: + case PIPE_PRIM_POINTS: key.primitive = SF_POINTS; break; } - key.do_point_sprite = ctx->Point.PointSprite; - key.SpriteOrigin = ctx->Point.SpriteOrigin; - /* _NEW_LIGHT */ - key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); - key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); + key.do_point_sprite = brw->curr.rast->templ.point_sprite; + key.sprite_origin_lower_left = 0; /* XXX: ctx->Point.SpriteOrigin - fix rast state */ + key.do_flat_shading = brw->curr.rast->templ.flatshade; + key.do_twoside_color = brw->curr.rast->templ.light_twoside; - /* _NEW_HINT */ - key.linear_color = 0; - - /* _NEW_POLYGON */ if (key.do_twoside_color) { - /* If we're rendering to a FBO, we have to invert the polygon - * face orientation, just as we invert the viewport in - * sf_unit_create_from_key(). ctx->DrawBuffer->Name will be - * nonzero if we're rendering to such an FBO. - */ - key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0); + key.frontface_ccw = (brw->curr.rast->templ.front_winding == + PIPE_WINDING_CCW); } brw->sws->bo_unreference(brw->sf.prog_bo); @@ -180,14 +168,16 @@ static void upload_sf_prog(struct brw_context *brw) &brw->sf.prog_data); if (brw->sf.prog_bo == NULL) compile_sf_prog( brw, &key ); + + return 0; } const struct brw_tracked_state brw_sf_prog = { .dirty = { - .mesa = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT), + .mesa = (PIPE_NEW_RAST | PIPE_NEW_VERTEX_SHADER), .brw = (BRW_NEW_REDUCED_PRIMITIVE), - .cache = CACHE_NEW_VS_PROG + .cache = 0 }, .prepare = upload_sf_prog }; diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h index c99116b8b1..0b7003dc5e 100644 --- a/src/gallium/drivers/i965/brw_sf.h +++ b/src/gallium/drivers/i965/brw_sf.h @@ -49,14 +49,21 @@ struct brw_sf_prog_key { */ GLuint persp_attrs:32; GLuint linear_attrs:32; + GLuint point_coord_replace_attrs:32; + GLuint nr_attrs:8; GLuint primitive:2; GLuint do_twoside_color:1; GLuint do_flat_shading:1; GLuint frontface_ccw:1; GLuint do_point_sprite:1; GLuint sprite_origin_lower_left:1; - GLuint pad:25; + GLuint pad:17; + + GLuint attr_col0:8; + GLuint attr_col1:8; + GLuint attr_bfc0:8; + GLuint attr_bfc1:8; }; struct brw_sf_point_tex { @@ -101,9 +108,7 @@ struct brw_sf_compile { GLuint nr_setup_attrs; GLuint nr_setup_regs; - GLubyte attr_to_idx[VERT_RESULT_MAX]; - GLubyte idx_to_attr[VERT_RESULT_MAX]; - struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; + GLuint point_coord_replace_mask; }; diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index 4acb2b7d72..db52c9553e 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -43,17 +43,12 @@ static struct brw_reg get_vert_attr(struct brw_sf_compile *c, struct brw_reg vert, GLuint attr) { - GLuint off = c->attr_to_idx[attr] / 2; - GLuint sub = c->attr_to_idx[attr] % 2; + GLuint off = attr / 2; + GLuint sub = attr % 2; return brw_vec4_grf(vert.nr + off, sub * 4); } -static GLboolean have_attr(struct brw_sf_compile *c, - GLuint attr) -{ - return (c->key.attrs & (1<func; - GLuint i; - for (i = 0; i < 2; i++) { - if (have_attr(c, VERT_RESULT_COL0+i) && - have_attr(c, VERT_RESULT_BFC0+i)) - brw_MOV(p, - get_vert_attr(c, vert, VERT_RESULT_COL0+i), - get_vert_attr(c, vert, VERT_RESULT_BFC0+i)); - } + if (c->key.attr_col0 && c->key.attr_bfc0) + brw_MOV(p, + get_vert_attr(c, vert, c->key.attr_col0), + get_vert_attr(c, vert, c->key.attr_bfc0)); + + if (c->key.attr_col1 && c->key.attr_bfc1) + brw_MOV(p, + get_vert_attr(c, vert, c->key.attr_col1), + get_vert_attr(c, vert, c->key.attr_bfc1)); } @@ -89,8 +85,8 @@ static void do_twoside_color( struct brw_sf_compile *c ) * for user-supplied vertex programs, as t_vp_build.c always does * the right thing. */ - if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) && - !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1))) + if (!(c->key.attr_col0 && c->key.attr_bfc0) && + !(c->key.attr_col1 && c->key.attr_bfc1)) return; /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order @@ -126,14 +122,17 @@ static void copy_colors( struct brw_sf_compile *c, struct brw_reg src) { struct brw_compile *p = &c->func; - GLuint i; - for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) { - if (have_attr(c,i)) - brw_MOV(p, - get_vert_attr(c, dst, i), - get_vert_attr(c, src, i)); - } + if (c->key.attr_col0) + brw_MOV(p, + get_vert_attr(c, dst, c->key.attr_col0), + get_vert_attr(c, src, c->key.attr_col0)); + + if (c->key.attr_col1) + brw_MOV(p, + get_vert_attr(c, dst, c->key.attr_col1), + get_vert_attr(c, src, c->key.attr_col1)); + } @@ -146,10 +145,16 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); - GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; + GLuint nr = 0; - if (!nr) + if (c->key.attr_col0) + nr++; + + if (c->key.attr_col1) + nr++; + + if (nr == 0) return; /* Already done in clip program: @@ -184,10 +189,16 @@ static void do_flatshade_line( struct brw_sf_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg ip = brw_ip_reg(); - GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS); GLuint jmpi = 1; + GLuint nr = 0; + + if (c->key.attr_col0) + nr++; + + if (c->key.attr_col1) + nr++; - if (!nr) + if (nr == 0) return; /* Already done in clip program: @@ -319,10 +330,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, *pc_linear = 0; *pc = 0xf; - if (persp_mask & (1 << c->idx_to_attr[reg*2])) + if (persp_mask & (1 << (reg*2))) *pc_persp = 0xf; - if (linear_mask & (1 << c->idx_to_attr[reg*2])) + if (linear_mask & (1 << (reg*2))) *pc_linear = 0xf; /* Maybe only processs one attribute on the final round: @@ -330,10 +341,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, if (reg*2+1 < c->nr_setup_attrs) { *pc |= 0xf0; - if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) + if (persp_mask & (1 << (reg*2+1))) *pc_persp |= 0xf0; - if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) + if (linear_mask & (1 << (reg*2+1))) *pc_linear |= 0xf0; } @@ -513,24 +524,28 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) alloc_regs(c); copy_z_inv_w(c); + for (i = 0; i < c->nr_setup_regs; i++) { - struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]]; + /* XXX: only seems to check point_coord_replace_attrs for every + * second attribute?!? + */ + boolean coord_replace = !!(c->key.point_coord_replace_attrs & (1<<(2*i))); struct brw_reg a0 = offset(c->vert[0], i); GLushort pc, pc_persp, pc_linear; GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); if (pc_persp) { - if (!tex->CoordReplace) { - brw_set_predicate_control_flag_value(p, pc_persp); - brw_MUL(p, a0, a0, c->inv_w[0]); - } + if (coord_replace) { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } } - if (tex->CoordReplace) { - /* Caculate 1.0/PointWidth */ - brw_math(&c->func, + if (coord_replace) { + /* Caculate 1.0/PointWidth */ + brw_math(&c->func, c->tmp, BRW_MATH_FUNCTION_INV, BRW_MATH_SATURATE_NONE, @@ -539,33 +554,37 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) BRW_MATH_DATA_SCALAR, BRW_MATH_PRECISION_FULL); - if (c->key.SpriteOrigin == GL_LOWER_LEFT) { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } else { - brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); - brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); - brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); - } - } else { - brw_MOV(p, c->m1Cx, brw_imm_ud(0)); - brw_MOV(p, c->m2Cy, brw_imm_ud(0)); + if (c->key.sprite_origin_lower_left) { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0])); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } + else { + brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]); + brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0)); + } + } + else { + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); } { brw_set_predicate_control_flag_value(p, pc); - if (tex->CoordReplace) { - if (c->key.sprite_origin_lower_left) { - brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); - brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); - } - else - brw_MOV(p, c->m3C0, brw_imm_f(0.0)); - } else { - brw_MOV(p, c->m3C0, a0); /* constant value */ + if (coord_replace) { + if (c->key.sprite_origin_lower_left) { + brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0)); + brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0)); + } + else { + brw_MOV(p, c->m3C0, brw_imm_f(0.0)); + } + } + else { + brw_MOV(p, c->m3C0, a0); /* constant value */ } /* Copy m0..m3 to URB. diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index 648a16a038..fbc9f15eb4 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -29,58 +29,48 @@ * Keith Whitwell */ +#include "util/u_math.h" +#include "pipe/p_state.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_debug.h" +#include "brw_pipe_rast.h" -static void upload_sf_vp(struct brw_context *brw) +static int upload_sf_vp(struct brw_context *brw) { - const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF; + const struct pipe_viewport_state *vp = &brw->curr.vp; + const struct pipe_scissor_state *scissor = &brw->curr.scissor; struct brw_sf_viewport sfv; - GLfloat y_scale, y_bias; - const GLfloat *v = ctx->Viewport._WindowMap.m; memset(&sfv, 0, sizeof(sfv)); - y_scale = 1.0; - y_bias = 0; + /* PIPE_NEW_VIEWPORT, PIPE_NEW_SCISSOR */ - /* _NEW_VIEWPORT */ + sfv.viewport.m00 = vp->scale[0]; + sfv.viewport.m11 = vp->scale[1]; + sfv.viewport.m22 = vp->scale[2]; + sfv.viewport.m30 = vp->translate[0]; + sfv.viewport.m31 = vp->translate[1]; + sfv.viewport.m32 = vp->translate[2]; - sfv.viewport.m00 = v[MAT_SX]; - sfv.viewport.m11 = v[MAT_SY] * y_scale; - sfv.viewport.m22 = v[MAT_SZ] * depth_scale; - sfv.viewport.m30 = v[MAT_TX]; - sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias; - sfv.viewport.m32 = v[MAT_TZ] * depth_scale; - - /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT - * for DrawBuffer->_[XY]{min,max} - */ - - /* The scissor only needs to handle the intersection of drawable and - * scissor rect. - * - * Note that the hardware's coordinates are inclusive, while Mesa's min is - * inclusive but max is exclusive. - */ - /* Y=0=bottom */ - sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; - sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; - sfv.scissor.ymin = ctx->DrawBuffer->_Ymin; - sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1; + sfv.scissor.xmin = scissor->minx; + sfv.scissor.xmax = scissor->maxx; /* -1 ?? */ + sfv.scissor.ymin = scissor->miny; + sfv.scissor.ymax = scissor->maxy; /* -1 ?? */ brw->sws->bo_unreference(brw->sf.vp_bo); brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); + + return 0; } const struct brw_tracked_state brw_sf_vp = { .dirty = { - .mesa = (_NEW_VIEWPORT | - _NEW_SCISSOR | - _NEW_BUFFERS), + .mesa = (PIPE_NEW_VIEWPORT | + PIPE_NEW_SCISSOR), .brw = 0, .cache = 0 }, @@ -90,15 +80,17 @@ const struct brw_tracked_state brw_sf_vp = { struct brw_sf_unit_key { unsigned int total_grf; unsigned int urb_entry_read_length; - unsigned int nr_urb_entries, urb_size, sfsize; - - GLenum front_face, cull_face, provoking_vertex; + unsigned scissor:1; unsigned line_smooth:1; unsigned point_sprite:1; unsigned point_attenuated:1; - unsigned render_to_fbo:1; + unsigned front_face:2; + unsigned cull_mode:2; + unsigned flatshade_first:1; + unsigned gl_rasterization_rules:1; + unsigned line_last_pixel_enable:1; float line_width; float point_size; }; @@ -106,6 +98,7 @@ struct brw_sf_unit_key { static void sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) { + const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ; memset(key, 0, sizeof(*key)); /* CACHE_NEW_SF_PROG */ @@ -117,25 +110,22 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) key->urb_size = brw->urb.vsize; key->sfsize = brw->urb.sfsize; - key->scissor = ctx->Scissor.Enabled; - key->front_face = ctx->Polygon.FrontFace; - - if (ctx->Polygon.CullFlag) - key->cull_face = ctx->Polygon.CullFaceMode; - else - key->cull_face = GL_NONE; - - key->line_width = ctx->Line.Width; - key->line_smooth = ctx->Line.SmoothFlag; - - key->point_sprite = ctx->Point.PointSprite; - key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); - key->point_attenuated = ctx->Point._Attenuated; - - /* _NEW_LIGHT */ - key->provoking_vertex = ctx->Light.ProvokingVertex; - - key->render_to_fbo = 1; + /* PIPE_NEW_RAST */ + key->scissor = rast->scissor; + key->front_face = rast->front_winding; + key->cull_mode = rast->cull_mode; + key->line_smooth = rast->line_smooth; + key->line_width = rast->line_width; + key->flatshade_first = rast->flatshade_first; + key->line_last_pixel_enable = rast->line_last_pixel; + key->gl_rasterization_rules = rast->gl_rasterization_rules; + + key->point_sprite = rast->point_sprite; + key->point_attenuated = rast->point_size_per_vertex; + + key->point_size = CLAMP(rast->point_size, + rast->point_size_min, + rast->point_size_max); } static struct brw_winsys_buffer * @@ -147,7 +137,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, int chipset_max_threads; memset(&sf, 0, sizeof(sf)); - sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */ sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -174,10 +164,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1; - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) sf.thread4.max_threads = 0; - if (INTEL_DEBUG & DEBUG_STATS) + if (BRW_DEBUG & DEBUG_STATS) sf.thread4.stats_enable = 1; /* CACHE_NEW_SF_VP */ @@ -185,31 +175,30 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.sf5.viewport_transform = 1; - /* _NEW_SCISSOR */ if (key->scissor) sf.sf6.scissor = 1; - /* _NEW_POLYGON */ - if (key->front_face == GL_CCW) + if (key->front_face == PIPE_WINDING_CCW) sf.sf5.front_winding = BRW_FRONTWINDING_CCW; else sf.sf5.front_winding = BRW_FRONTWINDING_CW; - switch (key->cull_face) { - case GL_FRONT: - sf.sf6.cull_mode = BRW_CULLMODE_FRONT; + switch (key->cull_mode) { + case PIPE_WINDING_CCW: + case PIPE_WINDING_CW: + sf.sf6.cull_mode = (key->front_face == key->cull_mode ? + BRW_CULLMODE_FRONT : + BRW_CULLMODE_BACK); break; - case GL_BACK: - sf.sf6.cull_mode = BRW_CULLMODE_BACK; - break; - case GL_FRONT_AND_BACK: + case PIPE_WINDING_BOTH: sf.sf6.cull_mode = BRW_CULLMODE_BOTH; break; - case GL_NONE: + case PIPE_WINDING_NONE: sf.sf6.cull_mode = BRW_CULLMODE_NONE; break; default: assert(0); + sf.sf6.cull_mode = BRW_CULLMODE_NONE; break; } @@ -223,9 +212,9 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, else if (sf.sf6.line_width <= 0x2) sf.sf6.line_width = 0; - /* _NEW_BUFFERS */ - key->render_to_fbo = 1; - if (!key->render_to_fbo) { + /* XXX: gl_rasterization_rules? something else? + */ + if (0) { /* Rendering to an OpenGL window */ sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; } @@ -261,7 +250,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: */ - if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) { + if (!key->flatshade_first) { sf.sf7.trifan_pv = 2; sf.sf7.linestrip_pv = 1; sf.sf7.tristrip_pv = 2; @@ -270,12 +259,19 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.sf7.linestrip_pv = 0; sf.sf7.tristrip_pv = 0; } - sf.sf7.line_last_pixel_enable = 0; + + sf.sf7.line_last_pixel_enable = key->line_last_pixel_enable; /* Set bias for OpenGL rasterization rules: */ - sf.sf6.dest_org_vbias = 0x8; - sf.sf6.dest_org_hbias = 0x8; + if (key->gl_rasterization_rules) { + sf.sf6.dest_org_vbias = 0x8; + sf.sf6.dest_org_hbias = 0x8; + } + else { + sf.sf6.dest_org_vbias = 0x0; + sf.sf6.dest_org_hbias = 0x0; + } bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT, key, sizeof(*key), @@ -287,23 +283,23 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. */ /* Emit SF program relocation */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - sf.thread0.grf_reg_count << 1, - offsetof(struct brw_sf_unit_state, thread0), - brw->sf.prog_bo); + brw->sws->bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + sf.thread0.grf_reg_count << 1, + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_bo); /* Emit SF viewport relocation */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), - offsetof(struct brw_sf_unit_state, sf5), - brw->sf.vp_bo); + brw->sws->bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), + offsetof(struct brw_sf_unit_state, sf5), + brw->sf.vp_bo); return bo; } -static void upload_sf_unit( struct brw_context *brw ) +static int upload_sf_unit( struct brw_context *brw ) { struct brw_sf_unit_key key; struct brw_winsys_buffer *reloc_bufs[2]; @@ -321,16 +317,12 @@ static void upload_sf_unit( struct brw_context *brw ) if (brw->sf.state_bo == NULL) { brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs); } + return 0; } const struct brw_tracked_state brw_sf_unit = { .dirty = { - .mesa = (_NEW_POLYGON | - _NEW_LIGHT | - _NEW_LINE | - _NEW_POINT | - _NEW_SCISSOR | - _NEW_BUFFERS), + .mesa = (PIPE_NEW_RAST), .brw = BRW_NEW_URB_FENCE, .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index 663fc839df..2275e9ad69 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -168,9 +168,20 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, void brw_destroy_batch_cache( struct brw_context *brw ); void brw_clear_batch_cache( struct brw_context *brw ); -/* brw_wm_surface_state.c */ +/*********************************************************************** + * brw_wm_surface_state.c + */ struct brw_winsys_buffer * brw_create_constant_surface( struct brw_context *brw, struct brw_surface_key *key ); +/*********************************************************************** + * brw_state_debug.c + */ +void brw_update_dirty_counts( unsigned mesa, + unsigned brw, + unsigned cache ); + + + #endif diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c index 324fce5163..7d212e5c24 100644 --- a/src/gallium/drivers/i965/brw_state_batch.c +++ b/src/gallium/drivers/i965/brw_state_batch.c @@ -46,7 +46,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, struct brw_cached_batch_item *item = brw->cached_batch_items; struct header *newheader = (struct header *)data; - if (brw->emit_state_always) { + if (brw->flags.always_emit_state) { brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS); return GL_TRUE; } @@ -56,8 +56,8 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) return GL_FALSE; if (item->sz != sz) { - _mesa_free(item->header); - item->header = _mesa_malloc(sz); + FREE(item->header); + item->header = MALLOC(sz); item->sz = sz; } goto emit; @@ -67,7 +67,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw, assert(!item); item = CALLOC_STRUCT(brw_cached_batch_item); - item->header = _mesa_malloc(sz); + item->header = MALLOC(sz); item->sz = sz; item->next = brw->cached_batch_items; brw->cached_batch_items = item; diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c index 97f88b3ab3..4310d01ba2 100644 --- a/src/gallium/drivers/i965/brw_state_cache.c +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -55,7 +55,9 @@ * only one of the two buffers referenced gets put into the offset, and the * incorrect program is run for the other instance. */ +#include "util/u_memory.h" +#include "brw_debug.h" #include "brw_state.h" #include "brw_batchbuffer.h" @@ -107,9 +109,9 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, if (bo == cache->last_bo[cache_id]) return; /* no change */ - brw->sws->bo_unreference(cache->last_bo[cache_id]); + cache->sws->bo_unreference(cache->last_bo[cache_id]); cache->last_bo[cache_id] = bo; - brw->sws->bo_reference(cache->last_bo[cache_id]); + cache->sws->bo_reference(cache->last_bo[cache_id]); cache->brw->state.dirty.cache |= 1 << cache_id; } @@ -127,7 +129,7 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, for (c = cache->items[hash % cache->size]; c; c = c->next) bucketcount++; - fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size, + debug_printf("bucket %d/%d = %d/%d items\n", hash % cache->size, cache->size, bucketcount, cache->n_items); #endif @@ -154,7 +156,7 @@ rehash(struct brw_cache *cache) GLuint size, i; size = cache->size * 3; - items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items)); + items = (struct brw_cache_item**) CALLOC(size, sizeof(*items)); for (i = 0; i < cache->size; i++) for (c = cache->items[i]; c; c = next) { @@ -194,7 +196,7 @@ brw_search_cache(struct brw_cache *cache, update_cache_last(cache, cache_id, item->bo); - brw->sws->bo_reference(item->bo); + cache->sws->bo_reference(item->bo); return item->bo; } @@ -219,20 +221,25 @@ brw_upload_cache( struct brw_cache *cache, struct brw_winsys_buffer *bo; int i; - /* Create the buffer object to contain the data */ - bo = brw->sws->bo_alloc(cache->sws, - cache->buffer_type[cache_id], data_size, 1 << 6); + /* Create the buffer object to contain the data. For now, use a + * single buffer type to describe all cached state atoms. Later, + * may want to take advantage of hardware distinctions between + * these various entities. + */ + bo = cache->sws->bo_alloc(cache->sws, + BRW_BUFFER_TYPE_STATE_CACHE, + data_size, 1 << 6); /* Set up the memory containing the key, aux_data, and reloc_bufs */ - tmp = _mesa_malloc(key_size + aux_size + relocs_size); + tmp = MALLOC(key_size + aux_size + relocs_size); memcpy(tmp, key, key_size); memcpy(tmp + key_size, aux, cache->aux_size[cache_id]); memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); for (i = 0; i < nr_reloc_bufs; i++) { if (reloc_bufs[i] != NULL) - brw->sws->bo_reference(reloc_bufs[i]); + cache->sws->bo_reference(reloc_bufs[i]); } item->cache_id = cache_id; @@ -243,7 +250,7 @@ brw_upload_cache( struct brw_cache *cache, item->nr_reloc_bufs = nr_reloc_bufs; item->bo = bo; - brw->sws->bo_reference(bo); + cache->sws->bo_reference(bo); item->data_size = data_size; if (cache->n_items > cache->size * 1.5) @@ -259,13 +266,13 @@ brw_upload_cache( struct brw_cache *cache, *(void **)aux_return = (void *)((char *)item->key + item->key_size); } - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("upload %s: %d bytes to cache id %d\n", + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("upload %s: %d bytes to cache id %d\n", cache->name[cache_id], data_size, cache_id); /* Copy data to the buffer */ - dri_bo_subdata(bo, 0, data_size, data); + cache->sws->bo_subdata(bo, 0, data_size, data); update_cache_last(cache, cache_id, bo); @@ -292,7 +299,7 @@ brw_cache_data_sz(struct brw_cache *cache, reloc_bufs, nr_reloc_bufs); if (item) { update_cache_last(cache, cache_id, item->bo); - brw->sws->bo_reference(item->bo); + cache->sws->bo_reference(item->bo); return item->bo; } @@ -349,11 +356,12 @@ brw_init_non_surface_cache(struct brw_context *brw) struct brw_cache *cache = &brw->cache; cache->brw = brw; + cache->sws = brw->sws; cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + CALLOC(cache->size, sizeof(struct brw_cache_item)); brw_init_cache_id(cache, "CC_VP", @@ -457,7 +465,7 @@ brw_init_surface_cache(struct brw_context *brw) cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) - _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); + CALLOC(cache->size, sizeof(struct brw_cache_item)); brw_init_cache_id(cache, "SS_SURFACE", @@ -487,8 +495,8 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) struct brw_cache_item *c, *next; GLuint i; - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { @@ -507,7 +515,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) cache->n_items = 0; if (brw->curbe.last_buf) { - _mesa_free(brw->curbe.last_buf); + FREE(brw->curbe.last_buf); brw->curbe.last_buf = NULL; } @@ -527,8 +535,8 @@ brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo) struct brw_cache_item **prev; GLuint i; - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); for (i = 0; i < cache->size; i++) { for (prev = &cache->items[i]; *prev;) { @@ -540,8 +548,8 @@ brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo) *prev = c->next; for (j = 0; j < c->nr_reloc_bufs; j++) - brw->sws->bo_unreference(c->reloc_bufs[j]); - brw->sws->bo_unreference(c->bo); + cache->sws->bo_unreference(c->reloc_bufs[j]); + cache->sws->bo_unreference(c->bo); free((void *)c->key); free(c); cache->n_items--; @@ -555,8 +563,8 @@ brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo) void brw_state_cache_check_size(struct brw_context *brw) { - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); /* un-tuned guess. We've got around 20 state objects for a total of around * 32k, so 1000 of them is around 1.5MB. @@ -574,8 +582,8 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) { GLuint i; - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("%s\n", __FUNCTION__); + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c index 22cea4b7d8..cc4744dc16 100644 --- a/src/gallium/drivers/i965/brw_state_debug.c +++ b/src/gallium/drivers/i965/brw_state_debug.c @@ -109,8 +109,25 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits) if (bit_map[i].bit == 0) return; - fprintf(stderr, "0x%08x: %12d (%s)\n", + debug_printf("0x%08x: %12d (%s)\n", bit_map[i].bit, bit_map[i].count, bit_map[i].name); } } +void +brw_update_dirty_counts( unsigned mesa, + unsigned brw, + unsigned cache ) +{ + static int dirty_count = 0; + + brw_update_dirty_count(mesa_bits, mesa); + brw_update_dirty_count(brw_bits, brw); + brw_update_dirty_count(cache_bits, cache); + if (dirty_count++ % 1000 == 0) { + brw_print_dirty_count(mesa_bits, mesa); + brw_print_dirty_count(brw_bits, brw); + brw_print_dirty_count(cache_bits, cache); + debug_printf("\n"); + } +} diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c index 1bc83fb9c1..72604304d4 100644 --- a/src/gallium/drivers/i965/brw_state_dump.c +++ b/src/gallium/drivers/i965/brw_state_dump.c @@ -28,6 +28,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_winsys.h" /** * Prints out a header, the contents, and the message associated with @@ -44,28 +45,32 @@ state_out(const char *name, void *data, uint32_t hw_offset, int index, { va_list va; - fprintf(stderr, "%8s: 0x%08x: 0x%08x: ", - name, hw_offset + index * 4, ((uint32_t *)data)[index]); + debug_printf("%8s: 0x%08x: 0x%08x: ", + name, hw_offset + index * 4, ((uint32_t *)data)[index]); va_start(va, fmt); - vfprintf(stderr, fmt, va); + debug_vprintf(fmt, va); va_end(va); } /** Generic, undecoded state buffer debug printout */ static void -state_struct_out(const char *name, struct brw_winsys_buffer *buffer, unsigned int state_size) +state_struct_out(struct brw_winsys_screen *sws, + const char *name, + struct brw_winsys_buffer *buffer, + unsigned int state_size) { int i; + void *data; if (buffer == NULL) return; - dri_bo_map(buffer, GL_FALSE); + data = sws->bo_map(buffer, GL_FALSE); for (i = 0; i < state_size / 4; i++) { - state_out(name, buffer->virtual, buffer->offset, i, + state_out(name, data, buffer->offset, i, "dword %d\n", i); } - dri_bo_unmap(buffer); + sws->bo_unmap(buffer); } static const char * @@ -106,12 +111,11 @@ static void dump_wm_surface_state(struct brw_context *brw) char name[20]; if (surf_bo == NULL) { - fprintf(stderr, " WM SS%d: NULL\n", i); + debug_printf(" WM SS%d: NULL\n", i); continue; } - dri_bo_map(surf_bo, GL_FALSE); + surf = (struct brw_surface_state *)brw->sws->bo_map(surf_bo, GL_FALSE); surfoff = surf_bo->offset; - surf = (struct brw_surface_state *)(surf_bo->virtual); sprintf(name, "WM SS%d", i); state_out(name, surf, surfoff, 0, "%s %s\n", @@ -127,7 +131,7 @@ static void dump_wm_surface_state(struct brw_context *brw) state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", surf->ss5.x_offset, surf->ss5.y_offset); - dri_bo_unmap(surf_bo); + brw->sws->bo_unmap(surf_bo); } } @@ -140,9 +144,7 @@ static void dump_sf_viewport_state(struct brw_context *brw) if (brw->sf.vp_bo == NULL) return; - dri_bo_map(brw->sf.vp_bo, GL_FALSE); - - vp = brw->sf.vp_bo->virtual; + vp = (struct brw_sf_viewport *)brw->sws->bo_map(brw->sf.vp_bo, GL_FALSE); vp_off = brw->sf.vp_bo->offset; state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); @@ -157,10 +159,12 @@ static void dump_sf_viewport_state(struct brw_context *brw) state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", vp->scissor.xmax, vp->scissor.ymax); - dri_bo_unmap(brw->sf.vp_bo); + brw->sws->bo_unmap(brw->sf.vp_bo); } -static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog) +static void brw_debug_prog(struct brw_winsys_screen *sws, + const char *name, + struct brw_winsys_buffer *prog) { unsigned int i; uint32_t *data; @@ -168,12 +172,10 @@ static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog) if (prog == NULL) return; - dri_bo_map(prog, GL_FALSE); - - data = prog->virtual; + data = (uint32_t *)sws->bo_map(prog, GL_FALSE); for (i = 0; i < prog->size / 4 / 4; i++) { - fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", + debug_printf("%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", name, (unsigned int)prog->offset + i * 4 * 4, data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); /* Stop at the end of the program. It'd be nice to keep track of the actual @@ -186,7 +188,7 @@ static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog) break; } - dri_bo_unmap(prog); + sws->bo_unmap(prog); } @@ -202,19 +204,21 @@ static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog) */ void brw_debug_batch(struct brw_context *brw) { - state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces); + struct brw_winsys_screen *sws = brw->sws; + + state_struct_out(sws, "WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces); dump_wm_surface_state(brw); - state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state)); - brw_debug_prog("VS prog", brw->vs.prog_bo); + state_struct_out(sws, "VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state)); + brw_debug_prog(sws, "VS prog", brw->vs.prog_bo); - state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state)); - brw_debug_prog("GS prog", brw->gs.prog_bo); + state_struct_out(sws, "GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state)); + brw_debug_prog(sws, "GS prog", brw->gs.prog_bo); - state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state)); + state_struct_out(sws, "SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state)); dump_sf_viewport_state(brw); - brw_debug_prog("SF prog", brw->sf.prog_bo); + brw_debug_prog(sws, "SF prog", brw->sf.prog_bo); - state_struct_out("WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state)); - brw_debug_prog("WM prog", brw->wm.prog_bo); + state_struct_out(sws, "WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state)); + brw_debug_prog(sws, "WM prog", brw->wm.prog_bo); } diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index 8659e35289..eff3a40a46 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -34,6 +34,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_batchbuffer.h" +#include "brw_debug.h" /* This is used to initialize brw->state.atoms[]. We could use this * list directly except for a single atom, brw_constant_buffer, which @@ -83,12 +84,8 @@ const struct brw_tracked_state *atoms[] = &brw_blend_constant_color, &brw_depthbuffer, - &brw_polygon_stipple, - &brw_polygon_stipple_offset, - &brw_line_stipple, - &brw_aa_line_parameters, &brw_psp_urb_cbs, @@ -163,11 +160,12 @@ enum pipe_error brw_validate_state( struct brw_context *brw ) { struct brw_state_flags *state = &brw->state.dirty; GLuint i; + int ret; brw_clear_validated_bos(brw); - brw_add_validated_bo(brw, intel->batch->buf); + brw_add_validated_bo(brw, brw->batch->buf); - if (brw->emit_state_always) { + if (brw->flags.always_emit_state) { state->mesa |= ~0; state->brw |= ~0; state->cache |= ~0; @@ -199,10 +197,10 @@ enum pipe_error brw_validate_state( struct brw_context *brw ) * If this fails, we can experience GPU lock-ups. */ { - const struct brw_fragment_program *fp = brw->fragment_program; + const struct brw_fragment_shader *fp = brw->curr.fragment_shader; if (fp) { - assert(fp->info.max_sampler <= brw->nr_samplers && - fp->info.max_texture <= brw->nr_textures); + assert(fp->info.file_max[TGSI_FILE_SAMPLER] < brw->curr.num_samplers && + fp->info.texture_max < brw->curr.num_textures); } } @@ -213,18 +211,18 @@ enum pipe_error brw_validate_state( struct brw_context *brw ) enum pipe_error brw_upload_state(struct brw_context *brw) { struct brw_state_flags *state = &brw->state.dirty; + int ret; int i; - static int dirty_count = 0; brw_clear_validated_bos(brw); - if (INTEL_DEBUG) { + if (BRW_DEBUG) { /* Debug version which enforces various sanity checks on the * state flags which are generated and checked to help ensure * state atoms are ordered correctly in the list. */ struct brw_state_flags examined, prev; - _mesa_memset(&examined, 0, sizeof(examined)); + memset(&examined, 0, sizeof(examined)); prev = *state; for (i = 0; i < Elements(atoms); i++) { @@ -268,19 +266,14 @@ enum pipe_error brw_upload_state(struct brw_context *brw) } } - if (INTEL_DEBUG & DEBUG_STATE) { - brw_update_dirty_count(mesa_bits, state->mesa); - brw_update_dirty_count(brw_bits, state->brw); - brw_update_dirty_count(cache_bits, state->cache); - if (dirty_count++ % 1000 == 0) { - brw_print_dirty_count(mesa_bits, state->mesa); - brw_print_dirty_count(brw_bits, state->brw); - brw_print_dirty_count(cache_bits, state->cache); - debug_printf("\n"); - } + if (BRW_DEBUG & DEBUG_STATE) { + brw_update_dirty_counts( state->mesa, + state->brw, + state->cache ); } /* Clear dirty flags: */ memset(state, 0, sizeof(*state)); + return 0; } diff --git a/src/gallium/drivers/i965/brw_tex.c b/src/gallium/drivers/i965/brw_tex.c deleted file mode 100644 index 6f7adb6393..0000000000 --- a/src/gallium/drivers/i965/brw_tex.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_context.h" - -/** - * Finalizes all textures, completing any rendering that needs to be done - * to prepare them. - */ -void brw_validate_textures( struct brw_context *brw ) -{ - int i; - - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; - - if (texUnit->_ReallyEnabled) { - intel_finalize_mipmap_tree(intel, i); - } - } -} diff --git a/src/gallium/drivers/i965/brw_tex_layout.c b/src/gallium/drivers/i965/brw_tex_layout.c deleted file mode 100644 index 50c30878c6..0000000000 --- a/src/gallium/drivers/i965/brw_tex_layout.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -/* Code to layout images in a mipmap tree for i965. - */ - -#include "brw_tex_layout.h" - -#define FILE_DEBUG_FLAG DEBUG_MIPTREE - -GLboolean brw_miptree_layout(struct brw_context *brw, - struct intel_mipmap_tree *mt, - uint32_t tiling) -{ - /* XXX: these vary depending on image format: */ - /* GLint align_w = 4; */ - - switch (mt->target) { - case GL_TEXTURE_CUBE_MAP: - if (IS_IGDNG(brw->brw_screen->pci_id)) { - GLuint align_h = 2, align_w = 4; - GLuint level; - GLuint x = 0; - GLuint y = 0; - GLuint width = mt->width0; - GLuint height = mt->height0; - GLuint qpitch = 0; - GLuint y_pitch = 0; - - mt->pitch = mt->width0; - intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); - y_pitch = ALIGN(height, align_h); - - if (mt->compressed) { - mt->pitch = ALIGN(mt->width0, align_w); - } - - if (mt->last_level != 0) { - GLuint mip1_width; - - if (mt->compressed) { - mip1_width = ALIGN(minify(mt->width0), align_w) - + ALIGN(minify(minify(mt->width0)), align_w); - } else { - mip1_width = ALIGN(minify(mt->width0), align_w) - + minify(minify(mt->width0)); - } - - if (mip1_width > mt->pitch) { - mt->pitch = mip1_width; - } - } - - mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); - - if (mt->compressed) { - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; - mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; - } else { - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; - mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; - } - - for (level = 0; level <= mt->last_level; level++) { - GLuint img_height; - GLuint nr_images = 6; - GLuint q = 0; - - intel_miptree_set_level_info(mt, level, nr_images, x, y, width, - height, 1); - - for (q = 0; q < nr_images; q++) - intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch); - - if (mt->compressed) - img_height = MAX2(1, height/4); - else - img_height = ALIGN(height, align_h); - - if (level == 1) { - x += ALIGN(width, align_w); - } - else { - y += img_height; - } - - width = minify(width); - height = minify(height); - } - - break; - } - - case GL_TEXTURE_3D: { - GLuint width = mt->width0; - GLuint height = mt->height0; - GLuint depth = mt->depth0; - GLuint pack_x_pitch, pack_x_nr; - GLuint pack_y_pitch; - GLuint level; - GLuint align_h = 2; - GLuint align_w = 4; - - mt->total_height = 0; - intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); - - if (mt->compressed) { - mt->pitch = ALIGN(width, align_w); - pack_y_pitch = (height + 3) / 4; - } else { - mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); - pack_y_pitch = ALIGN(mt->height0, align_h); - } - - pack_x_pitch = width; - pack_x_nr = 1; - - for (level = 0 ; level <= mt->last_level ; level++) { - GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; - GLint x = 0; - GLint y = 0; - GLint q, j; - - intel_miptree_set_level_info(mt, level, nr_images, - 0, mt->total_height, - width, height, depth); - - for (q = 0; q < nr_images;) { - for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { - intel_miptree_set_image_offset(mt, level, q, x, y); - x += pack_x_pitch; - } - - x = 0; - y += pack_y_pitch; - } - - - mt->total_height += y; - width = minify(width); - height = minify(height); - depth = minify(depth); - - if (mt->compressed) { - pack_y_pitch = (height + 3) / 4; - - if (pack_x_pitch > ALIGN(width, align_w)) { - pack_x_pitch = ALIGN(width, align_w); - pack_x_nr <<= 1; - } - } else { - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->pitch); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - pack_y_pitch = ALIGN(pack_y_pitch, align_h); - } - } - - } - /* The 965's sampler lays cachelines out according to how accesses - * in the texture surfaces run, so they may be "vertical" through - * memory. As a result, the docs say in Surface Padding Requirements: - * Sampling Engine Surfaces that two extra rows of padding are required. - * We don't know of similar requirements for pre-965, but given that - * those docs are silent on padding requirements in general, let's play - * it safe. - */ - if (mt->target == GL_TEXTURE_CUBE_MAP) - mt->total_height += 2; - break; - } - - default: - i945_miptree_layout_2d(intel, mt, tiling); - break; - } - DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, - mt->pitch, - mt->total_height, - mt->cpp, - mt->pitch * mt->total_height * mt->cpp ); - - return GL_TRUE; -} - diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c index a2277519ad..ff2466528d 100644 --- a/src/gallium/drivers/i965/brw_urb.c +++ b/src/gallium/drivers/i965/brw_urb.c @@ -184,17 +184,17 @@ static void recalculate_urb_fence( struct brw_context *brw ) * entries and the values for minimum nr of entries * provided above. */ - _mesa_printf("couldn't calculate URB layout!\n"); + debug_printf("couldn't calculate URB layout!\n"); exit(1); } - if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) - _mesa_printf("URB CONSTRAINED\n"); + if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) + debug_printf("URB CONSTRAINED\n"); } done: - if (INTEL_DEBUG & DEBUG_URB) - _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", + if (BRW_DEBUG & DEBUG_URB) + debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", brw->urb.vs_start, brw->urb.gs_start, brw->urb.clip_start, diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h index 54f7d7d7c4..e33fa2f0aa 100644 --- a/src/gallium/drivers/i965/brw_vs.h +++ b/src/gallium/drivers/i965/brw_vs.h @@ -64,7 +64,7 @@ struct brw_vs_compile { struct brw_reg r0; struct brw_reg r1; - struct brw_reg regs[PROGRAM_ADDRESS+1][128]; + struct brw_reg regs[TGSI_FILE_COUNT][128]; struct brw_reg tmp; struct brw_reg stack; diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 086f54799e..04132a167b 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -242,10 +242,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->prog_data.total_grf = reg; - if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); - _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); - _mesa_printf("%s reg = %d\n", __FUNCTION__, reg); + if (BRW_DEBUG & DEBUG_VS) { + debug_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); + debug_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); + debug_printf("%s reg = %d\n", __FUNCTION__, reg); } } @@ -1248,10 +1248,10 @@ void brw_vs_emit(struct brw_vs_compile *c ) GLuint index; GLuint file; - if (INTEL_DEBUG & DEBUG_VS) { - _mesa_printf("vs-mesa:\n"); + if (BRW_DEBUG & DEBUG_VS) { + debug_printf("vs-mesa:\n"); _mesa_print_program(&c->vp->program.Base); - _mesa_printf("\n"); + debug_printf("\n"); } brw_set_compression_control(p, BRW_COMPRESSION_NONE); @@ -1526,12 +1526,12 @@ void brw_vs_emit(struct brw_vs_compile *c ) post_vs_emit(c, end_inst, last_inst); - if (INTEL_DEBUG & DEBUG_VS) { + if (BRW_DEBUG & DEBUG_VS) { int i; - _mesa_printf("vs-native:\n"); + debug_printf("vs-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + debug_printf("\n"); } } diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c index 1717223e49..05a91f2de4 100644 --- a/src/gallium/drivers/i965/brw_vs_state.c +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -122,7 +122,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2, 1, chipset_max_threads) - 1; - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) vs.thread4.max_threads = 0; /* No samplers for ARB_vp programs: @@ -131,7 +131,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) */ vs.vs5.sampler_count = 0; - if (INTEL_DEBUG & DEBUG_STATS) + if (BRW_DEBUG & DEBUG_STATS) vs.thread4.stats_enable = 1; /* Vertex program always enabled: diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index 51e23b9640..33032276bc 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -69,6 +69,7 @@ enum brw_buffer_type BRW_BUFFER_TYPE_SHADER_CONSTANTS, BRW_BUFFER_TYPE_WM_SCRATCH, BRW_BUFFER_TYPE_BATCH, + BRW_BUFFER_TYPE_STATE_CACHE, }; @@ -156,11 +157,15 @@ struct brw_winsys_screen { unsigned offset, struct brw_winsys_buffer *b2); - void (*bo_subdata)(struct brw_winsys_buffer *dst, + void (*bo_subdata)(struct brw_winsys_buffer *buffer, size_t offset, size_t size, const void *data); + boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer); + boolean (*bo_references)(struct brw_winsys_buffer *a, + struct brw_winsys_buffer *b); + /* XXX: couldn't this be handled by returning true/false on * bo_emit_reloc? */ @@ -171,18 +176,13 @@ struct brw_winsys_screen { /** * Map a buffer. */ - void *(*buffer_map)(struct brw_winsys *iws, - struct brw_winsys_buffer *buffer, - boolean write); + void *(*bo_map)(struct brw_winsys_buffer *buffer, + boolean write); /** * Unmap a buffer. */ - void (*buffer_unmap)(struct brw_winsys *iws, - struct brw_winsys_buffer *buffer); - - void (*buffer_destroy)(struct brw_winsys *iws, - struct brw_winsys_buffer *buffer); + void (*bo_unmap)(struct brw_winsys_buffer *buffer); /*@}*/ diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 764708f7df..3d889699f8 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -178,8 +178,8 @@ static void do_wm_prog( struct brw_context *brw, brw_wm_non_glsl_emit(brw, c); } - if (INTEL_DEBUG & DEBUG_WM) - fprintf(stderr, "\n"); + if (BRW_DEBUG & DEBUG_WM) + debug_printf("\n"); /* get the program */ diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index bf241f5fa4..5bc2a49c1f 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -33,6 +33,7 @@ #ifndef BRW_WM_H #define BRW_WM_H +#include "tgsi/tgsi_ureg.h" #include "brw_context.h" #include "brw_eu.h" @@ -57,17 +58,18 @@ #define AA_ALWAYS 2 struct brw_wm_prog_key { + unsigned proj_attrib_mask; /**< one bit per fragment program attribute */ + unsigned linear_attrib_mask:1; /**< linear interpolation vs perspective interp */ + GLuint source_depth_reg:3; GLuint aa_dest_stencil_reg:3; GLuint dest_depth_reg:3; GLuint nr_depth_regs:3; - GLuint computes_depth:1; /* could be derived from program string */ + GLuint computes_depth:1; GLuint source_depth_to_render_target:1; GLuint flat_shade:1; - GLuint linear_color:1; /**< linear interpolation vs perspective interp */ GLuint runtime_check_aads_emit:1; - - GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ + GLuint shadowtex_mask:16; GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ @@ -75,7 +77,7 @@ struct brw_wm_prog_key { GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; GLuint program_string_id:32; - GLuint drawable_height; + GLuint vp_nr_outputs_written; }; @@ -151,7 +153,7 @@ struct brw_wm_instruction { }; -#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3) +#define BRW_WM_MAX_INSN 2048 #define BRW_WM_MAX_GRF 128 /* hardware limit */ #define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) #define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) @@ -161,11 +163,19 @@ struct brw_wm_instruction { #define BRW_WM_MAX_SUBROUTINE 16 +struct ureg_instruction { + unsigned opcode:8; + unsigned tex_target:3; + struct ureg_dst dst; + struct ureg_src src[3]; +}; + /* New opcodes to track internal operations required for WM unit. * These are added early so that the registers used can be tracked, * freed and reused like those of other instructions. */ +#define MAX_OPCODE TGSI_OPCODE_LAST #define WM_PIXELXY (MAX_OPCODE) #define WM_DELTAXY (MAX_OPCODE + 1) #define WM_PIXELW (MAX_OPCODE + 2) @@ -177,7 +187,7 @@ struct brw_wm_instruction { #define WM_FRONTFACING (MAX_OPCODE + 8) #define MAX_WM_OPCODE (MAX_OPCODE + 9) -#define PROGRAM_PAYLOAD (PROGRAM_FILE_MAX) +#define PROGRAM_PAYLOAD (TGSI_FILE_COUNT) #define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) struct brw_wm_compile { @@ -198,15 +208,15 @@ struct brw_wm_compile { * simplifying and adding instructions for interpolation and * framebuffer writes. */ - struct prog_instruction prog_instructions[BRW_WM_MAX_INSN]; + struct ureg_instruction prog_instructions[BRW_WM_MAX_INSN]; GLuint nr_fp_insns; GLuint fp_temp; GLuint fp_interp_emitted; GLuint fp_fragcolor_emitted; - struct prog_src_register pixel_xy; - struct prog_src_register delta_xy; - struct prog_src_register pixel_w; + struct ureg_src pixel_xy; + struct ureg_src delta_xy; + struct ureg_src pixel_w; struct brw_wm_value vreg[BRW_WM_MAX_VREG]; @@ -217,7 +227,7 @@ struct brw_wm_compile { struct { struct brw_wm_value depth[4]; /* includes r0/r1 */ - struct brw_wm_value input_interp[FRAG_ATTRIB_MAX]; + struct brw_wm_value input_interp[PIPE_MAX_SHADER_INPUTS]; } payload; @@ -295,7 +305,7 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean ps_uses_depth, struct brw_wm_prog_key *key ); -GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); +//GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); void emit_ddxy(struct brw_compile *p, diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c index c6659646f2..04dec5ba39 100644 --- a/src/gallium/drivers/i965/brw_wm_debug.c +++ b/src/gallium/drivers/i965/brw_wm_debug.c @@ -41,21 +41,21 @@ void brw_wm_print_value( struct brw_wm_compile *c, if (c->state >= PASS2_DONE) brw_print_reg(value->hw_reg); else if( value == &c->undef_value ) - _mesa_printf("undef"); + debug_printf("undef"); else if( value - c->vreg >= 0 && value - c->vreg < BRW_WM_MAX_VREG) - _mesa_printf("r%d", value - c->vreg); + debug_printf("r%d", value - c->vreg); else if (value - c->creg >= 0 && value - c->creg < BRW_WM_MAX_PARAM) - _mesa_printf("c%d", value - c->creg); + debug_printf("c%d", value - c->creg); else if (value - c->payload.input_interp >= 0 && value - c->payload.input_interp < FRAG_ATTRIB_MAX) - _mesa_printf("i%d", value - c->payload.input_interp); + debug_printf("i%d", value - c->payload.input_interp); else if (value - c->payload.depth >= 0 && value - c->payload.depth < FRAG_ATTRIB_MAX) - _mesa_printf("d%d", value - c->payload.depth); + debug_printf("d%d", value - c->payload.depth); else - _mesa_printf("?"); + debug_printf("?"); } void brw_wm_print_ref( struct brw_wm_compile *c, @@ -64,16 +64,16 @@ void brw_wm_print_ref( struct brw_wm_compile *c, struct brw_reg hw_reg = ref->hw_reg; if (ref->unspill_reg) - _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot); + debug_printf("UNSPILL(%x)/", ref->value->spill_slot); if (c->state >= PASS2_DONE) brw_print_reg(ref->hw_reg); else { - _mesa_printf("%s", hw_reg.negate ? "-" : ""); - _mesa_printf("%s", hw_reg.abs ? "abs/" : ""); + debug_printf("%s", hw_reg.negate ? "-" : ""); + debug_printf("%s", hw_reg.abs ? "abs/" : ""); brw_wm_print_value(c, ref->value); if ((hw_reg.nr&1) || hw_reg.subnr) { - _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); + debug_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr); } } } @@ -84,22 +84,22 @@ void brw_wm_print_insn( struct brw_wm_compile *c, GLuint i, arg; GLuint nr_args = brw_wm_nr_args(inst->opcode); - _mesa_printf("["); + debug_printf("["); for (i = 0; i < 4; i++) { if (inst->dst[i]) { brw_wm_print_value(c, inst->dst[i]); if (inst->dst[i]->spill_slot) - _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot); + debug_printf("/SPILL(%x)",inst->dst[i]->spill_slot); } else - _mesa_printf("#"); + debug_printf("#"); if (i < 3) - _mesa_printf(","); + debug_printf(","); } - _mesa_printf("]"); + debug_printf("]"); if (inst->writemask != BRW_WRITEMASK_XYZW) - _mesa_printf(".%s%s%s%s", + debug_printf(".%s%s%s%s", GET_BIT(inst->writemask, 0) ? "x" : "", GET_BIT(inst->writemask, 1) ? "y" : "", GET_BIT(inst->writemask, 2) ? "z" : "", @@ -107,58 +107,58 @@ void brw_wm_print_insn( struct brw_wm_compile *c, switch (inst->opcode) { case WM_PIXELXY: - _mesa_printf(" = PIXELXY"); + debug_printf(" = PIXELXY"); break; case WM_DELTAXY: - _mesa_printf(" = DELTAXY"); + debug_printf(" = DELTAXY"); break; case WM_PIXELW: - _mesa_printf(" = PIXELW"); + debug_printf(" = PIXELW"); break; case WM_WPOSXY: - _mesa_printf(" = WPOSXY"); + debug_printf(" = WPOSXY"); break; case WM_PINTERP: - _mesa_printf(" = PINTERP"); + debug_printf(" = PINTERP"); break; case WM_LINTERP: - _mesa_printf(" = LINTERP"); + debug_printf(" = LINTERP"); break; case WM_CINTERP: - _mesa_printf(" = CINTERP"); + debug_printf(" = CINTERP"); break; case WM_FB_WRITE: - _mesa_printf(" = FB_WRITE"); + debug_printf(" = FB_WRITE"); break; case WM_FRONTFACING: - _mesa_printf(" = FRONTFACING"); + debug_printf(" = FRONTFACING"); break; default: - _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode)); + debug_printf(" = %s", _mesa_opcode_string(inst->opcode)); break; } if (inst->saturate) - _mesa_printf("_SAT"); + debug_printf("_SAT"); for (arg = 0; arg < nr_args; arg++) { - _mesa_printf(" ["); + debug_printf(" ["); for (i = 0; i < 4; i++) { if (inst->src[arg][i]) { brw_wm_print_ref(c, inst->src[arg][i]); } else - _mesa_printf("%%"); + debug_printf("%%"); if (i < 3) - _mesa_printf(","); + debug_printf(","); else - _mesa_printf("]"); + debug_printf("]"); } } - _mesa_printf("\n"); + debug_printf("\n"); } void brw_wm_print_program( struct brw_wm_compile *c, @@ -166,9 +166,9 @@ void brw_wm_print_program( struct brw_wm_compile *c, { GLuint insn; - _mesa_printf("%s:\n", stage); + debug_printf("%s:\n", stage); for (insn = 0; insn < c->nr_insns; insn++) brw_wm_print_insn(c, &c->instruction[insn]); - _mesa_printf("\n"); + debug_printf("\n"); } diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 7df9b79d7a..5f7ae6592c 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -1481,7 +1481,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) break; default: - _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n", + debug_printf("Unsupported opcode %i (%s) in fragment shader\n", inst->opcode, inst->opcode < MAX_OPCODE ? _mesa_opcode_string(inst->opcode) : "unknown"); @@ -1494,12 +1494,12 @@ void brw_wm_emit( struct brw_wm_compile *c ) inst->dst[i]->spill_slot); } - if (INTEL_DEBUG & DEBUG_WM) { + if (BRW_DEBUG & DEBUG_WM) { int i; - _mesa_printf("wm-native:\n"); + debug_printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + debug_printf("\n"); } } diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index be240031c7..d594730730 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -142,7 +142,7 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c ) int bit = _mesa_ffs( ~c->fp_temp ); if (!bit) { - _mesa_printf("%s: out of temporaries\n", __FILE__); + debug_printf("%s: out of temporaries\n", __FILE__); exit(1); } @@ -977,7 +977,7 @@ static void print_insns( const struct prog_instruction *insn, { GLuint i; for (i = 0; i < nr; i++, insn++) { - _mesa_printf("%3d: ", i); + debug_printf("%3d: ", i); if (insn->Opcode < MAX_OPCODE) _mesa_print_instruction(insn); else if (insn->Opcode < MAX_WM_OPCODE) { @@ -988,7 +988,7 @@ static void print_insns( const struct prog_instruction *insn, 3); } else - _mesa_printf("965 Opcode %d\n", insn->Opcode); + debug_printf("965 Opcode %d\n", insn->Opcode); } } @@ -1002,10 +1002,10 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) struct brw_fragment_program *fp = c->fp; GLuint insn; - if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pre-fp:\n"); + if (BRW_DEBUG & DEBUG_WM) { + debug_printf("pre-fp:\n"); _mesa_print_program(&fp->program.Base); - _mesa_printf("\n"); + debug_printf("\n"); } c->pixel_xy = src_undef(); @@ -1103,10 +1103,10 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) } } - if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("pass_fp:\n"); + if (BRW_DEBUG & DEBUG_WM) { + debug_printf("pass_fp:\n"); print_insns( c->prog_instructions, c->nr_fp_insns ); - _mesa_printf("\n"); + debug_printf("\n"); } } diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index a8de5fdd0b..3118e615f9 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -1694,7 +1694,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) c->cur_inst = i; #if 0 - _mesa_printf("Inst %d: ", i); + debug_printf("Inst %d: ", i); _mesa_print_instruction(inst); #endif @@ -1920,7 +1920,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } break; default: - _mesa_printf("unsupported IR in fragment shader %d\n", + debug_printf("unsupported IR in fragment shader %d\n", inst->Opcode); } @@ -1931,11 +1931,11 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) } post_wm_emit(c); - if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("wm-native:\n"); + if (BRW_DEBUG & DEBUG_WM) { + debug_printf("wm-native:\n"); for (i = 0; i < p->nr_insn; i++) brw_disasm(stderr, &p->store[i]); - _mesa_printf("\n"); + debug_printf("\n"); } } @@ -1945,8 +1945,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) */ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) { - if (INTEL_DEBUG & DEBUG_WM) { - _mesa_printf("brw_wm_glsl_emit:\n"); + if (BRW_DEBUG & DEBUG_WM) { + debug_printf("brw_wm_glsl_emit:\n"); } /* initial instruction translation/simplification */ @@ -1955,7 +1955,7 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) /* actual code generation */ brw_wm_emit_glsl(brw, c); - if (INTEL_DEBUG & DEBUG_WM) { + if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_program(c, "brw_wm_glsl_emit done"); } diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 31b0270e84..71e4c56835 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -101,7 +101,7 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, GLuint i = c->prog_data.nr_params++; if (i >= BRW_WM_MAX_PARAM) { - _mesa_printf("%s: out of params\n", __FUNCTION__); + debug_printf("%s: out of params\n", __FUNCTION__); c->prog_data.error = 1; return NULL; } @@ -150,7 +150,7 @@ static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c, return c->imm_ref[i].ref; } else { - _mesa_printf("%s: out of imm_refs\n", __FUNCTION__); + debug_printf("%s: out of imm_refs\n", __FUNCTION__); c->prog_data.error = 1; return NULL; } @@ -434,7 +434,7 @@ void brw_wm_pass0( struct brw_wm_compile *c ) } } - if (INTEL_DEBUG & DEBUG_WM) { + if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_program(c, "pass0"); } } diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index f2ae3a958f..85a3a55ca4 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -284,7 +284,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) track_arg(c, inst, 2, read2); } - if (INTEL_DEBUG & DEBUG_WM) { + if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_program(c, "pass1"); } } diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c index 6faea018fb..a19ca62328 100644 --- a/src/gallium/drivers/i965/brw_wm_pass2.c +++ b/src/gallium/drivers/i965/brw_wm_pass2.c @@ -331,13 +331,13 @@ void brw_wm_pass2( struct brw_wm_compile *c ) } } - if (INTEL_DEBUG & DEBUG_WM) { + if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_program(c, "pass2"); } c->state = PASS2_DONE; - if (INTEL_DEBUG & DEBUG_WM) { + if (BRW_DEBUG & DEBUG_WM) { brw_wm_print_program(c, "pass2/done"); } } diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c index a8993f9312..32692d533c 100644 --- a/src/gallium/drivers/i965/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c @@ -76,8 +76,9 @@ static GLint S_FIXED(GLfloat value, GLuint frac_bits) } -static struct brw_winsys_buffer *upload_default_color( struct brw_context *brw, - const GLfloat *color ) +static struct brw_winsys_buffer * +upload_default_color( struct brw_context *brw, + const GLfloat *color ) { struct brw_sampler_default_color sdc; @@ -117,63 +118,6 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, { _mesa_memset(sampler, 0, sizeof(*sampler)); - switch (key->minfilter) { - case GL_NEAREST: - sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; - sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; - break; - case GL_LINEAR: - sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; - sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; - break; - case GL_NEAREST_MIPMAP_NEAREST: - sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; - sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; - break; - case GL_LINEAR_MIPMAP_NEAREST: - sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; - sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; - break; - case GL_NEAREST_MIPMAP_LINEAR: - sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; - sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; - break; - case GL_LINEAR_MIPMAP_LINEAR: - sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; - sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; - break; - default: - break; - } - - /* Set Anisotropy: - */ - if (key->max_aniso > 1.0) { - sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; - sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; - - if (key->max_aniso > 2.0) { - sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2, - BRW_ANISORATIO_16); - } - } - else { - switch (key->magfilter) { - case GL_NEAREST: - sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; - break; - case GL_LINEAR: - sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; - break; - default: - break; - } - } - - sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); - sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); - sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); - /* Cube-maps on 965 and later must use the same wrap mode for all 3 * coordinate dimensions. Futher, only CUBE and CLAMP are valid. */ @@ -198,36 +142,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key, } - /* Set shadow function: - */ - if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) { - /* Shadowing is "enabled" by emitting a particular sampler - * message (sample_c). So need to recompile WM program when - * shadow comparison is enabled on each/any texture unit. - */ - sampler->ss0.shadow_function = - intel_translate_shadow_compare_func(key->comparefunc); - } - - /* Set LOD bias: - */ - sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6); - - sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ - sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ - - /* Set BaseMipLevel, MaxLOD, MinLOD: - * - * XXX: I don't think that using firstLevel, lastLevel works, - * because we always setup the surface state as if firstLevel == - * level zero. Probably have to subtract firstLevel from each of - * these: - */ - sampler->ss0.base_level = U_FIXED(0, 1); - sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6); - sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6); - sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ } @@ -237,57 +152,42 @@ static void brw_wm_sampler_populate_key(struct brw_context *brw, struct wm_sampler_key *key) { - int unit; + int nr = MIN2(brw->curr.number_textures, + brw->curr.number_samplers); + int i; memset(key, 0, sizeof(*key)); - for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { - if (ctx->Texture.Unit[unit]._ReallyEnabled) { - struct wm_sampler_entry *entry = &key->sampler[unit]; - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; - struct gl_texture_object *texObj = texUnit->_Current; - struct intel_texture_object *intelObj = intel_texture_object(texObj); - struct gl_texture_image *firstImage = - texObj->Image[0][intelObj->firstLevel]; - - entry->tex_target = texObj->Target; - - entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP) - ? ctx->Texture.CubeMapSeamless : GL_FALSE; - - entry->wrap_r = texObj->WrapR; - entry->wrap_s = texObj->WrapS; - entry->wrap_t = texObj->WrapT; - - entry->maxlod = texObj->MaxLod; - entry->minlod = texObj->MinLod; - entry->lod_bias = texUnit->LodBias + texObj->LodBias; - entry->max_aniso = texObj->MaxAnisotropy; - entry->minfilter = texObj->MinFilter; - entry->magfilter = texObj->MagFilter; - entry->comparemode = texObj->CompareMode; - entry->comparefunc = texObj->CompareFunc; - - brw->sws->bo_unreference(brw->wm.sdc_bo[unit]); - if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { - float bordercolor[4] = { - texObj->BorderColor[0], - texObj->BorderColor[0], - texObj->BorderColor[0], - texObj->BorderColor[0] - }; - /* GL specs that border color for depth textures is taken from the - * R channel, while the hardware uses A. Spam R into all the - * channels for safety. - */ - brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor); - } else { - brw->wm.sdc_bo[unit] = upload_default_color(brw, - texObj->BorderColor); - } - key->sampler_count = unit + 1; + for (i = 0; i < nr; i++) { + const struct brw_texture *tex = brw->curr.texture[i]; + const struct brw_sampler *sampler = brw->curr.sampler[i]; + struct wm_sampler_entry *entry = &key->sampler[i]; + + entry->tex_target = texObj->Target; + entry->seamless_cube_map = FALSE; /* XXX: add this to gallium */ + entry->ss0 = sampler->ss0; + entry->ss1 = sampler->ss1; + entry->ss3 = sampler->ss3; + + brw->sws->bo_unreference(brw->wm.sdc_bo[i]); + if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { + float bordercolor[4] = { + texObj->BorderColor[0], + texObj->BorderColor[0], + texObj->BorderColor[0], + texObj->BorderColor[0] + }; + /* GL specs that border color for depth textures is taken from the + * R channel, while the hardware uses A. Spam R into all the + * channels for safety. + */ + brw->wm.sdc_bo[i] = upload_default_color(brw, bordercolor); + } else { + brw->wm.sdc_bo[i] = upload_default_color(brw, texObj->BorderColor); } } + + key->sampler_count = nr; } /* All samplers must be uploaded in a single contiguous array, which @@ -354,7 +254,7 @@ static void upload_wm_samplers( struct brw_context *brw ) const struct brw_tracked_state brw_wm_samplers = { .dirty = { - .mesa = _NEW_TEXTURE, + .mesa = PIPE_NEW_BOUND_TEXTURES | PIPE_NEW_SAMPLER, .brw = 0, .cache = 0 }, diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index 4989aae830..edabf6ceb6 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -65,7 +65,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) memset(key, 0, sizeof(*key)); - if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) key->max_threads = 1; else { /* WM maximum threads is number of EUs times number of threads per EU. */ @@ -120,7 +120,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); /* _NEW_QUERY */ - key->stats_wm = intel->stats_wm; + key->stats_wm = (brw->query.stats_wm != 0); /* _NEW_LINE */ key->line_stipple = ctx->Line.StippleFlag; @@ -215,7 +215,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.wm5.line_stipple = key->line_stipple; - if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm) + if (BRW_DEBUG & DEBUG_STATS || key->stats_wm) wm.wm4.stats_enable = 1; bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, -- cgit v1.2.3 From 5a304995e09d8dbfd40a2dfab32eacb7e85798e3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 26 Oct 2009 01:11:36 +0000 Subject: i965g: still working on compilation --- src/gallium/drivers/i965/brw_context.h | 15 +- src/gallium/drivers/i965/brw_gs.c | 2 +- src/gallium/drivers/i965/brw_swtnl.c | 144 ++--- src/gallium/drivers/i965/brw_urb.c | 5 +- src/gallium/drivers/i965/brw_vs.c | 31 +- src/gallium/drivers/i965/brw_vs.h | 14 +- src/gallium/drivers/i965/brw_vs_emit.c | 733 ++++++++++++------------ src/gallium/drivers/i965/brw_wm_glsl.c | 4 +- src/gallium/drivers/i965/brw_wm_surface_state.c | 7 +- 9 files changed, 485 insertions(+), 470 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 10c1cf6f33..8aaf895d20 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -154,6 +154,7 @@ struct brw_vertex_shader { const struct tgsi_token *tokens; struct tgsi_shader_info info; + unsigned id; struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ GLboolean use_const_buffer; }; @@ -165,6 +166,7 @@ struct brw_fragment_shader { GLboolean isGLSL; + unsigned id; struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ GLboolean use_const_buffer; }; @@ -280,10 +282,13 @@ struct brw_vs_prog_data { GLuint curb_read_length; GLuint urb_read_length; GLuint total_grf; - GLuint nr_outputs_written; - GLuint nr_params; /**< number of float params/constants */ - GLuint inputs_read; + GLuint nr_outputs; + GLuint nr_inputs; + + GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */ + + GLboolean copy_edgeflag; /* Used for calculating urb partitions: */ @@ -475,8 +480,8 @@ struct brw_context /* Active state from the state tracker: */ struct { - const struct brw_vertex_shader *vertex_shader; - const struct brw_fragment_shader *fragment_shader; + struct brw_vertex_shader *vertex_shader; + struct brw_fragment_shader *fragment_shader; const struct brw_blend_state *blend; const struct brw_rasterizer_state *rast; const struct brw_depth_stencil_state *zstencil; diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 3ecaa74e4f..693d8bfdf8 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -148,7 +148,7 @@ static void populate_key( struct brw_context *brw, memset(key, 0, sizeof(*key)); /* CACHE_NEW_VS_PROG */ - key->nr_attrs = brw->vs.prog_data->nr_outputs_written; + key->nr_attrs = brw->vs.prog_data->nr_outputs; /* BRW_NEW_PRIMITIVE */ key->primitive = gs_prim[brw->primitive]; diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c index d2df8af9f4..464013e7c4 100644 --- a/src/gallium/drivers/i965/brw_swtnl.c +++ b/src/gallium/drivers/i965/brw_swtnl.c @@ -1,111 +1,93 @@ -/* XXX: could split the primitive list to fallback only on the - * non-conformant primitives. - */ -static GLboolean check_fallbacks( struct brw_context *brw, - const struct _mesa_prim *prim, - GLuint nr_prims ) +#include "brw_context.h" +#include "brw_pipe_rast.h" + + +static GLboolean need_swtnl( struct brw_context *brw ) { - GLuint i; + const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ; /* If we don't require strict OpenGL conformance, never * use fallbacks. If we're forcing fallbacks, always * use fallfacks. */ if (brw->flags.no_swtnl) - return GL_FALSE; + return FALSE; if (brw->flags.force_swtnl) - return GL_TRUE; + return TRUE; - if (brw->curr.rast->tmpl.smooth_polys) { - for (i = 0; i < nr_prims; i++) - if (reduced_prim[prim[i].mode] == GL_TRIANGLES) - return GL_TRUE; + /* Exceeding hw limits on number of VS inputs? + */ + if (brw->curr.num_vertex_elements == 0 || + brw->curr.num_vertex_elements >= BRW_VEP_MAX) { + return TRUE; } - /* BRW hardware will do AA lines, but they are non-conformant it - * seems. TBD whether we keep this fallback: + /* Position array with zero stride? + * + * XXX: position isn't always at zero... + * XXX: eliminate zero-stride arrays */ - if (ctx->Line.SmoothFlag) { - for (i = 0; i < nr_prims; i++) - if (reduced_prim[prim[i].mode] == GL_LINES) - return GL_TRUE; + { + int ve0_vb = brw->curr.vertex_element[0].vertex_buffer_index; + + if (brw->curr.vertex_buffer[ve0_vb].stride == 0) + return TRUE; } - /* Stipple -- these fallbacks could be resolved with a little - * bit of work? + /* XXX: short-circuit */ - if (ctx->Line.StippleFlag) { - for (i = 0; i < nr_prims; i++) { - /* GS doesn't get enough information to know when to reset - * the stipple counter?!? - */ - if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) - return GL_TRUE; - - if (prim[i].mode == GL_POLYGON && - (ctx->Polygon.FrontMode == GL_LINE || - ctx->Polygon.BackMode == GL_LINE)) - return GL_TRUE; - } - } + return FALSE; - if (ctx->Point.SmoothFlag) { - for (i = 0; i < nr_prims; i++) - if (prim[i].mode == GL_POINTS) - return GL_TRUE; - } + if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) { + if (rast->poly_smooth) + return TRUE; - /* BRW hardware doesn't handle GL_CLAMP texturing correctly; - * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP - * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and - * we want strict conformance, force the fallback. - * Right now, we only do this for 2D textures. - */ + } + + if (brw->reduced_primitive == PIPE_PRIM_LINES || + (brw->reduced_primitive == PIPE_PRIM_TRIANGLES && + (rast->fill_cw == PIPE_POLYGON_MODE_LINE || + rast->fill_ccw == PIPE_POLYGON_MODE_LINE))) { - int u; - for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) { - struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u]; - if (texUnit->Enabled) { - if (texUnit->Enabled & TEXTURE_1D_BIT) { - if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) { - return GL_TRUE; - } - } - if (texUnit->Enabled & TEXTURE_2D_BIT) { - if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) { - return GL_TRUE; - } - } - if (texUnit->Enabled & TEXTURE_3D_BIT) { - if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP || - texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) { - return GL_TRUE; - } - } - } - } + /* BRW hardware will do AA lines, but they are non-conformant it + * seems. TBD whether we keep this fallback: + */ + if (rast->line_smooth) + return TRUE; + + /* XXX: was a fallback in mesa (gs doesn't get enough + * information to know when to reset stipple counter), but there + * must be a way around it. + */ + if (rast->line_stipple_enable && + (brw->reduced_primitive == PIPE_PRIM_TRIANGLES || + brw->primitive == PIPE_PRIM_LINE_LOOP || + brw->primitive == PIPE_PRIM_LINE_STRIP)) + return TRUE; } - /* Exceeding hw limits on number of VS inputs? - */ - if (brw->nr_ve == 0 || - brw->nr_ve >= BRW_VEP_MAX) { - return TRUE; + + if (brw->reduced_primitive == PIPE_PRIM_POINTS || + (brw->reduced_primitive == PIPE_PRIM_TRIANGLES && + (rast->fill_cw == PIPE_POLYGON_MODE_POINT || + rast->fill_ccw == PIPE_POLYGON_MODE_POINT))) + { + if (rast->point_smooth) + return TRUE; } - /* Position array with zero stride? + /* BRW hardware doesn't handle CLAMP texturing correctly; + * brw_wm_sampler_state:translate_wrap_mode() treats CLAMP + * as CLAMP_TO_EDGE instead. If we're using CLAMP, and + * we want strict conformance, force the fallback. + * + * XXX: need a workaround for this. */ - if (brw->vs[brw->ve[0]]->stride == 0) - return TRUE; - - /* Nothing stopping us from the fast path now */ - return GL_FALSE; + return FALSE; } diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c index ff2466528d..57fd8f20b2 100644 --- a/src/gallium/drivers/i965/brw_urb.c +++ b/src/gallium/drivers/i965/brw_urb.c @@ -35,6 +35,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_debug.h" #define VS 0 #define GS 1 @@ -111,7 +112,7 @@ static GLboolean check_urb_layout( struct brw_context *brw ) /* Most minimal update, forces re-emit of URB fence packet after GS * unit turned on/off. */ -static void recalculate_urb_fence( struct brw_context *brw ) +static int recalculate_urb_fence( struct brw_context *brw ) { GLuint csize = brw->curbe.total_size; GLuint vsize = brw->vs.prog_data->urb_entry_size; @@ -204,6 +205,8 @@ done: brw->state.dirty.brw |= BRW_NEW_URB_FENCE; } + + return 0; } diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index 010ac115d3..3965ca6c54 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -28,17 +28,19 @@ * Authors: * Keith Whitwell */ - + +#include "tgsi/tgsi_dump.h" #include "brw_context.h" #include "brw_vs.h" #include "brw_util.h" #include "brw_state.h" +#include "brw_pipe_rast.h" static void do_vs_prog( struct brw_context *brw, - struct brw_vertex_program *vp, + struct brw_vertex_shader *vp, struct brw_vs_prog_key *key ) { GLuint program_size; @@ -51,16 +53,12 @@ static void do_vs_prog( struct brw_context *brw, brw_init_compile(brw, &c.func); c.vp = vp; - c.prog_data.nr_outputs_written = vp->program.Base.OutputsWritten; - c.prog_data.inputs_read = vp->program.Base.InputsRead; - - if (c.key.copy_edgeflag) { - c.prog_data.nr_outputs_written |= 1<info.num_outputs; + c.prog_data.nr_inputs = vp->info.num_inputs; + c.prog_data.copy_edgeflag = c.key.copy_edgeflag; if (0) - tgsi_dump(&c.vp->tokens, 0); + tgsi_dump(c.vp->tokens, 0); /* Emit GEN4 code. */ @@ -80,11 +78,10 @@ static void do_vs_prog( struct brw_context *brw, } -static void brw_upload_vs_prog(struct brw_context *brw) +static int brw_upload_vs_prog(struct brw_context *brw) { struct brw_vs_prog_key key; - struct brw_vertex_program *vp = - (struct brw_vertex_program *)brw->vertex_program; + struct brw_vertex_shader *vp = brw->curr.vertex_shader; memset(&key, 0, sizeof(key)); @@ -92,9 +89,9 @@ static void brw_upload_vs_prog(struct brw_context *brw) * the inputs it asks for, whether they are varying or not. */ key.program_string_id = vp->id; - key.nr_userclip = brw->nr_userclip; - key.copy_edgeflag = (brw->rast->fill_ccw != PIPE_POLYGON_MODE_FILL || - brw->rast->fill_cw != PIPE_POLYGON_MODE_FILL); + key.nr_userclip = brw->curr.ucp.nr; + key.copy_edgeflag = (brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL || + brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL); /* Make an early check for the key. */ @@ -105,6 +102,8 @@ static void brw_upload_vs_prog(struct brw_context *brw) &brw->vs.prog_data); if (brw->vs.prog_bo == NULL) do_vs_prog(brw, vp, &key); + + return 0; } diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h index e33fa2f0aa..58119567dc 100644 --- a/src/gallium/drivers/i965/brw_vs.h +++ b/src/gallium/drivers/i965/brw_vs.h @@ -46,17 +46,22 @@ struct brw_vs_prog_key { }; + +#define MAX_IF_DEPTH 32 +#define MAX_LOOP_DEPTH 32 + struct brw_vs_compile { struct brw_compile func; struct brw_vs_prog_key key; struct brw_vs_prog_data prog_data; - struct brw_vertex_program *vp; + struct brw_vertex_shader *vp; GLuint nr_inputs; + GLuint nr_outputs; + GLboolean copy_edgeflag; GLuint first_output; - GLuint nr_outputs; GLuint first_overflow_output; /**< VERT_ATTRIB_x */ GLuint first_tmp; @@ -80,8 +85,13 @@ struct brw_vs_compile { GLint index; struct brw_reg reg; } current_const[3]; + + struct brw_instruction *if_inst[MAX_IF_DEPTH]; + struct brw_instruction *loop_inst[MAX_LOOP_DEPTH]; + }; + void brw_vs_emit( struct brw_vs_compile *c ); #endif diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 04132a167b..4daa98b29e 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -28,11 +28,25 @@ * Authors: * Keith Whitwell */ - #include "pipe/p_shader_tokens.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "tgsi/tgsi_ureg.h" + #include "brw_context.h" #include "brw_vs.h" +#include "brw_debug.h" + + +struct ureg_instruction { + unsigned opcode:8; + unsigned tex_target:3; + struct ureg_dst dst; + struct ureg_src src[3]; +}; static struct brw_reg get_tmp( struct brw_vs_compile *c ) @@ -72,8 +86,8 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * works if everything fits in the GRF. * XXX this heuristic/check may need some fine tuning... */ - if (c->vp->program.Base.Parameters->NumParameters + - c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF) + if (c->vp->info.file_max[TGSI_FILE_CONSTANT] + + c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 > BRW_MAX_GRF) c->vp->use_const_buffer = GL_TRUE; else c->vp->use_const_buffer = GL_FALSE; @@ -106,25 +120,21 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } else { /* use a section of the GRF for constants */ - GLuint nr_params = c->vp->program.Base.Parameters->NumParameters; + GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1; for (i = 0; i < nr_params; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + c->regs[TGSI_FILE_CONSTANT][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); } reg += (nr_params + 1) / 2; c->prog_data.curb_read_length = reg - 1; - c->prog_data.nr_params = nr_params * 4; } /* Allocate input regs: */ - c->nr_inputs = 0; - for (i = 0; i < VERT_ATTRIB_MAX; i++) { - if (c->prog_data.inputs_read & (1 << i)) { - c->nr_inputs++; - c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } + c->nr_inputs = c->vp->info.num_inputs; + for (i = 0; i < c->nr_inputs; i++) { + c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0); + reg++; } /* If there are no inputs, we'll still be reading one attribute's worth @@ -144,45 +154,51 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) else mrf = 4; - for (i = 0; i < c->prog_data.nr_outputs_written; i++) { - c->nr_outputs++; - assert(i < Elements(c->regs[PROGRAM_OUTPUT])); - if (i == VERT_RESULT_HPOS) { - c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + /* XXX: need to access vertex output semantics here: + */ + c->nr_outputs = c->prog_data.nr_outputs; + for (i = 0; i < c->prog_data.nr_outputs; i++) { + assert(i < Elements(c->regs[TGSI_FILE_OUTPUT])); + + /* XXX: Hardwire position to zero: + */ + if (i == 0) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; } - else if (i == VERT_RESULT_PSIZ) { - c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); + /* XXX: disable psiz: + */ + else if (0) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); reg++; mrf++; /* just a placeholder? XXX fix later stages & remove this */ } + else if (mrf < 16) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } else { - if (mrf < 16) { - c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf); - mrf++; - } - else { - /* too many vertex results to fit in MRF, use GRF for overflow */ - if (!c->first_overflow_output) - c->first_overflow_output = i; - c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } + /* too many vertex results to fit in MRF, use GRF for overflow */ + if (!c->first_overflow_output) + c->first_overflow_output = i; + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; } } /* Allocate program temporaries: */ - for (i = 0; i < c->vp->program.Base.NumTemporaries; i++) { - c->regs[PROGRAM_TEMPORARY][i] = brw_vec8_grf(reg, 0); + + for (i = 0; i < c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1; i++) { + c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0); reg++; } /* Address reg(s). Don't try to use the internal address reg until * deref time. */ - for (i = 0; i < c->vp->program.Base.NumAddressRegs; i++) { - c->regs[PROGRAM_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE, + for (i = 0; i < c->vp->info.file_max[TGSI_FILE_ADDRESS]+1; i++) { + c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE, reg, 0, BRW_REGISTER_TYPE_D, @@ -243,8 +259,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->prog_data.total_grf = reg; if (BRW_DEBUG & DEBUG_VS) { - debug_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); - debug_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); + debug_printf("%s NumAddrRegs %d\n", __FUNCTION__, + c->vp->info.file_max[TGSI_FILE_ADDRESS]+1); + debug_printf("%s NumTemps %d\n", __FUNCTION__, + c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1); debug_printf("%s reg = %d\n", __FUNCTION__, reg); } } @@ -740,25 +758,25 @@ static void emit_nrm( struct brw_vs_compile *c, static struct brw_reg get_constant(struct brw_vs_compile *c, - const struct prog_instruction *inst, + const struct ureg_instruction *inst, GLuint argIndex) { - const struct prog_src_register *src = &inst->SrcReg[argIndex]; + const struct ureg_src src = inst->src[argIndex]; struct brw_compile *p = &c->func; struct brw_reg const_reg; struct brw_reg const2_reg; - const GLboolean relAddr = src->RelAddr; + const GLboolean relAddr = src.Indirect; assert(argIndex < 3); - if (c->current_const[argIndex].index != src->Index || relAddr) { - struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; + if (c->current_const[argIndex].index != src.Index || relAddr) { + struct brw_reg addrReg = c->regs[TGSI_FILE_ADDRESS][0]; - c->current_const[argIndex].index = src->Index; + c->current_const[argIndex].index = src.Index; #if 0 printf(" fetch const[%d] for arg %d into reg %d\n", - src->Index, argIndex, c->current_const[argIndex].reg.nr); + src.Index, argIndex, c->current_const[argIndex].reg.nr); #endif /* need to fetch the constant now */ brw_dp_READ_4_vs(p, @@ -766,7 +784,7 @@ get_constant(struct brw_vs_compile *c, 0, /* oword */ relAddr, /* relative indexing? */ addrReg, /* address register */ - 16 * src->Index, /* byte offset */ + 16 * src.Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ ); @@ -783,7 +801,7 @@ get_constant(struct brw_vs_compile *c, 1, /* oword */ relAddr, /* relative indexing? */ addrReg, /* address register */ - 16 * src->Index, /* byte offset */ + 16 * src.Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER ); } @@ -813,30 +831,24 @@ get_constant(struct brw_vs_compile *c, /* TODO: relative addressing! */ static struct brw_reg get_reg( struct brw_vs_compile *c, - gl_register_file file, + enum tgsi_file_type file, GLuint index ) { switch (file) { - case PROGRAM_TEMPORARY: - case PROGRAM_INPUT: - case PROGRAM_OUTPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + case TGSI_FILE_CONSTANT: assert(c->regs[file][index].nr != 0); return c->regs[file][index]; - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - case PROGRAM_UNIFORM: - assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); - return c->regs[PROGRAM_STATE_VAR][index]; - case PROGRAM_ADDRESS: + + case TGSI_FILE_ADDRESS: assert(index == 0); return c->regs[file][index]; - case PROGRAM_UNDEFINED: /* undef values */ + case TGSI_FILE_NULL: /* undef values */ return brw_null_reg(); - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_WRITE_ONLY: default: assert(0); return brw_null_reg(); @@ -853,7 +865,7 @@ static struct brw_reg deref( struct brw_vs_compile *c, { struct brw_compile *p = &c->func; struct brw_reg tmp = vec4(get_tmp(c)); - struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + struct brw_reg addr_reg = c->regs[TGSI_FILE_ADDRESS][0]; struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; struct brw_reg indirect = brw_vec4_indirect(0,0); @@ -886,17 +898,17 @@ static struct brw_reg deref( struct brw_vs_compile *c, */ static struct brw_reg get_src_reg( struct brw_vs_compile *c, - const struct prog_instruction *inst, + const struct ureg_instruction *inst, GLuint argIndex ) { - const GLuint file = inst->SrcReg[argIndex].File; - const GLint index = inst->SrcReg[argIndex].Index; - const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr; + const GLuint file = inst->src[argIndex].File; + const GLint index = inst->src[argIndex].Index; + const GLboolean relAddr = inst->src[argIndex].Indirect; switch (file) { - case PROGRAM_TEMPORARY: - case PROGRAM_INPUT: - case PROGRAM_OUTPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: if (relAddr) { return deref(c, c->regs[file][0], index); } @@ -905,30 +917,25 @@ get_src_reg( struct brw_vs_compile *c, return c->regs[file][index]; } - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - case PROGRAM_UNIFORM: - case PROGRAM_ENV_PARAM: + case TGSI_FILE_CONSTANT: if (c->vp->use_const_buffer) { return get_constant(c, inst, argIndex); } else if (relAddr) { - return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); + return deref(c, c->regs[TGSI_FILE_CONSTANT][0], index); } else { - assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); - return c->regs[PROGRAM_STATE_VAR][index]; + assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0); + return c->regs[TGSI_FILE_CONSTANT][index]; } - case PROGRAM_ADDRESS: + case TGSI_FILE_ADDRESS: assert(index == 0); return c->regs[file][index]; - case PROGRAM_UNDEFINED: + case TGSI_FILE_NULL: /* this is a normal case since we loop over all three src args */ return brw_null_reg(); - case PROGRAM_LOCAL_PARAM: - case PROGRAM_WRITE_ONLY: default: assert(0); return brw_null_reg(); @@ -959,27 +966,27 @@ static void emit_arl( struct brw_vs_compile *c, * Return the brw reg for the given instruction's src argument. */ static struct brw_reg get_arg( struct brw_vs_compile *c, - const struct prog_instruction *inst, + const struct ureg_instruction *inst, GLuint argIndex ) { - const struct prog_src_register *src = &inst->SrcReg[argIndex]; + const struct ureg_src src = inst->src[argIndex]; struct brw_reg reg; - if (src->File == PROGRAM_UNDEFINED) + if (src.File == TGSI_FILE_NULL) return brw_null_reg(); reg = get_src_reg(c, inst, argIndex); /* Convert 3-bit swizzle to 2-bit. */ - reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0), - GET_SWZ(src->Swizzle, 1), - GET_SWZ(src->Swizzle, 2), - GET_SWZ(src->Swizzle, 3)); + reg.dw1.bits.swizzle = BRW_SWIZZLE4(src.SwizzleX, + src.SwizzleY, + src.SwizzleZ, + src.SwizzleW); /* Note this is ok for non-swizzle instructions: */ - reg.negate = src->Negate ? 1 : 0; + reg.negate = src.Negate ? 1 : 0; return reg; } @@ -989,21 +996,21 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, * Get brw register for the given program dest register. */ static struct brw_reg get_dst( struct brw_vs_compile *c, - struct prog_dst_register dst ) + struct ureg_dst dst ) { struct brw_reg reg; switch (dst.File) { - case PROGRAM_TEMPORARY: - case PROGRAM_OUTPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_OUTPUT: assert(c->regs[dst.File][dst.Index].nr != 0); reg = c->regs[dst.File][dst.Index]; break; - case PROGRAM_ADDRESS: + case TGSI_FILE_ADDRESS: assert(dst.Index == 0); reg = c->regs[dst.File][dst.Index]; break; - case PROGRAM_UNDEFINED: + case TGSI_FILE_NULL: /* we may hit this for OPCODE_END, OPCODE_KIL, etc */ reg = brw_null_reg(); break; @@ -1027,15 +1034,16 @@ static void emit_vertex_write( struct brw_vs_compile *c) { struct brw_compile *p = &c->func; struct brw_reg m0 = brw_message_reg(0); - struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS]; + struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; int eot; GLuint len_vertext_header = 2; if (c->key.copy_edgeflag) { + assert(0); brw_MOV(p, - get_reg(c, PROGRAM_OUTPUT, VERT_RESULT_EDGE), - get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG)); + get_reg(c, TGSI_FILE_OUTPUT, 0), + get_reg(c, TGSI_FILE_INPUT, 0)); } /* Build ndc coords */ @@ -1060,7 +1068,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_set_access_mode(p, BRW_ALIGN_16); if (c->prog_data.writes_psiz) { - struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ]; + struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_PSIZ]; brw_MUL(p, brw_writemask(header1, BRW_WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); brw_AND(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8)); } @@ -1138,7 +1146,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) eot, /* writes complete */ 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); -! + if (c->first_overflow_output > 0) { /* Not all of the vertex outputs/results fit into the MRF. * Move the overflowed attributes from the GRF to the MRF and @@ -1148,9 +1156,9 @@ static void emit_vertex_write( struct brw_vs_compile *c) * at mrf[4] atm... */ GLuint i, mrf = 0; - for (i = c->first_overflow_output; i < c->prog_data.nr_outputs_written; i++) { + for (i = c->first_overflow_output; i < c->prog_data.nr_outputs; i++) { /* move from GRF to MRF */ - brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]); + brw_MOV(p, brw_message_reg(4+mrf), c->regs[TGSI_FILE_OUTPUT][i]); mrf++; } @@ -1195,9 +1203,9 @@ post_vs_emit( struct brw_vs_compile *c, } static uint32_t -get_predicate(const struct prog_instruction *inst) +get_predicate(const struct ureg_instruction *inst) { - if (inst->DstReg.CondMask == COND_TR) + if (inst->dst.CondMask == COND_TR) return BRW_PREDICATE_NONE; /* All of GLSL only produces predicates for COND_NE and one channel per @@ -1213,9 +1221,9 @@ get_predicate(const struct prog_instruction *inst) * predicate on that. We can probably support this, but it won't * necessarily be easy. */ - assert(inst->DstReg.CondMask == COND_NE); +/* assert(inst->dst.CondMask == COND_NE); */ - switch (inst->DstReg.CondSwizzle) { + switch (inst->dst.CondSwizzle) { case SWIZZLE_XXXX: return BRW_PREDICATE_ALIGN16_REPLICATE_X; case SWIZZLE_YYYY: @@ -1225,26 +1233,281 @@ get_predicate(const struct prog_instruction *inst) case SWIZZLE_WWWW: return BRW_PREDICATE_ALIGN16_REPLICATE_W; default: - _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n", - inst->DstReg.CondMask); + debug_printf("Unexpected predicate: 0x%08x\n", + inst->dst.CondMask); return BRW_PREDICATE_NORMAL; } } +static void emit_insn(struct brw_vs_compile *c, + const struct tgsi_full_instruction *insn) +{ + struct brw_reg args[3], dst; + GLuint i; + +#if 0 + printf("%d: ", insn); + _mesa_print_instruction(inst); +#endif + + /* Get argument regs. + */ + for (i = 0; i < 3; i++) { + const struct ureg_src src = inst->src[i]; + index = src.Index; + file = src.File; + args[i] = get_arg(c, inst, i); + } + + /* Get dest regs. Note that it is possible for a reg to be both + * dst and arg, given the static allocation of registers. So + * care needs to be taken emitting multi-operation instructions. + */ + index = inst->dst.Index; + file = inst->dst.File; + dst = get_dst(c, inst->dst); + + if (inst->SaturateMode != SATURATE_OFF) { + debug_printf("Unsupported saturate %d in vertex shader", + inst->SaturateMode); + } + + switch (inst->Opcode) { + case TGSI_OPCODE_ABS: + brw_MOV(p, dst, brw_abs(args[0])); + break; + case TGSI_OPCODE_ADD: + brw_ADD(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_COS: + emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_DP3: + brw_DP3(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DP4: + brw_DP4(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DPH: + brw_DPH(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_NRM3: + emit_nrm(c, dst, args[0], 3); + break; + case TGSI_OPCODE_NRM4: + emit_nrm(c, dst, args[0], 4); + break; + case TGSI_OPCODE_DST: + unalias2(c, dst, args[0], args[1], emit_dst_noalias); + break; + case TGSI_OPCODE_EXP: + unalias1(c, dst, args[0], emit_exp_noalias); + break; + case TGSI_OPCODE_EX2: + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_ARL: + emit_arl(c, dst, args[0]); + break; + case TGSI_OPCODE_FLR: + brw_RNDD(p, dst, args[0]); + break; + case TGSI_OPCODE_FRC: + brw_FRC(p, dst, args[0]); + break; + case TGSI_OPCODE_LOG: + unalias1(c, dst, args[0], emit_log_noalias); + break; + case TGSI_OPCODE_LG2: + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_LIT: + unalias1(c, dst, args[0], emit_lit_noalias); + break; + case TGSI_OPCODE_LRP: + unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias); + break; + case TGSI_OPCODE_MAD: + brw_MOV(p, brw_acc_reg(), args[2]); + brw_MAC(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MAX: + emit_max(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MIN: + emit_min(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MOV: + brw_MOV(p, dst, args[0]); + break; + case TGSI_OPCODE_MUL: + brw_MUL(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_POW: + emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_RCP: + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_RSQ: + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_SEQ: + emit_seq(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SIN: + emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_SNE: + emit_sne(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SGE: + emit_sge(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SGT: + emit_sgt(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SLT: + emit_slt(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SLE: + emit_sle(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SUB: + brw_ADD(p, dst, args[0], negate(args[1])); + break; + case TGSI_OPCODE_TRUNC: + /* round toward zero */ + brw_RNDZ(p, dst, args[0]); + break; + case TGSI_OPCODE_XPD: + emit_xpd(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_IF: + assert(if_depth < MAX_IF_DEPTH); + if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); + /* Note that brw_IF smashes the predicate_control field. */ + if_inst[if_depth]->header.predicate_control = get_predicate(inst); + if_depth++; + break; + case TGSI_OPCODE_ELSE: + if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); + break; + case TGSI_OPCODE_ENDIF: + assert(if_depth > 0); + brw_ENDIF(p, if_inst[--if_depth]); + break; + case TGSI_OPCODE_BGNLOOP: + loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_BRK: + brw_set_predicate_control(p, get_predicate(inst)); + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_CONT: + brw_set_predicate_control(p, get_predicate(inst)); + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_ENDLOOP: + { + struct brw_instruction *inst0, *inst1; + GLuint br = 1; + + loop_depth--; + + if (BRW_IS_IGDNG(brw)) + br = 2; + + inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + /* patch all the BREAK/CONT instructions from last BEGINLOOP */ + while (inst0 > loop_inst[loop_depth]) { + inst0--; + if (inst0->header.opcode == TGSI_OPCODE_BRK) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); + inst0->bits3.if_else.pop_count = 0; + } + else if (inst0->header.opcode == TGSI_OPCODE_CONT) { + inst0->bits3.if_else.jump_count = br * (inst1 - inst0); + inst0->bits3.if_else.pop_count = 0; + } + } + } + break; + case TGSI_OPCODE_BRA: + brw_set_predicate_control(p, get_predicate(inst)); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_CAL: + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + brw_save_call(p, inst->Comment, p->nr_insn); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; + case TGSI_OPCODE_RET: + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + break; + case TGSI_OPCODE_END: + end_offset = p->nr_insn; + /* this instruction will get patched later to jump past subroutine + * code, etc. + */ + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; + case TGSI_OPCODE_PRINT: + /* no-op */ + break; + case TGSI_OPCODE_BGNSUB: + brw_save_label(p, inst->Comment, p->nr_insn); + break; + case TGSI_OPCODE_ENDSUB: + /* no-op */ + break; + default: + debug_printf("Unsupported opcode %i (%s) in vertex shader", + inst->Opcode, inst->Opcode < MAX_OPCODE ? + _mesa_opcode_string(inst->Opcode) : + "unknown"); + } + + /* Set the predication update on the last instruction of the native + * instruction sequence. + * + * This would be problematic if it was set on a math instruction, + * but that shouldn't be the case with the current GLSL compiler. + */ + if (inst->CondUpdate) { + struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1]; + + assert(hw_insn->header.destreg__conditionalmod == 0); + hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; + } + + release_tmps(c); +} + + /* Emit the vertex program instructions here. */ void brw_vs_emit(struct brw_vs_compile *c ) { -#define MAX_IF_DEPTH 32 -#define MAX_LOOP_DEPTH 32 struct brw_compile *p = &c->func; struct brw_context *brw = p->brw; - const GLuint nr_insns = c->vp->program.Base.NumInstructions; GLuint insn, if_depth = 0, loop_depth = 0; GLuint end_offset = 0; struct brw_instruction *end_inst, *last_inst; - struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH]; const struct brw_indirect stack_index = brw_indirect(0, 0); + struct tgsi_parse_context parse; + struct tgsi_full_declaration *decl; GLuint index; GLuint file; @@ -1264,258 +1527,8 @@ void brw_vs_emit(struct brw_vs_compile *c ) for (insn = 0; insn < nr_insns; insn++) { - const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn]; - struct brw_reg args[3], dst; - GLuint i; + const struct ureg_instruction *inst = &c->vp->program.Base.Instructions[insn]; -#if 0 - printf("%d: ", insn); - _mesa_print_instruction(inst); -#endif - - /* Get argument regs. - */ - for (i = 0; i < 3; i++) { - const struct prog_src_register *src = &inst->SrcReg[i]; - index = src->Index; - file = src->File; - args[i] = get_arg(c, inst, i); - } - - /* Get dest regs. Note that it is possible for a reg to be both - * dst and arg, given the static allocation of registers. So - * care needs to be taken emitting multi-operation instructions. - */ - index = inst->DstReg.Index; - file = inst->DstReg.File; - dst = get_dst(c, inst->DstReg); - - if (inst->SaturateMode != SATURATE_OFF) { - _mesa_problem(NULL, "Unsupported saturate %d in vertex shader", - inst->SaturateMode); - } - - switch (inst->Opcode) { - case TGSI_OPCODE_ABS: - brw_MOV(p, dst, brw_abs(args[0])); - break; - case TGSI_OPCODE_ADD: - brw_ADD(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_COS: - emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_DP3: - brw_DP3(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_DP4: - brw_DP4(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_DPH: - brw_DPH(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_NRM3: - emit_nrm(c, dst, args[0], 3); - break; - case TGSI_OPCODE_NRM4: - emit_nrm(c, dst, args[0], 4); - break; - case TGSI_OPCODE_DST: - unalias2(c, dst, args[0], args[1], emit_dst_noalias); - break; - case TGSI_OPCODE_EXP: - unalias1(c, dst, args[0], emit_exp_noalias); - break; - case TGSI_OPCODE_EX2: - emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_ARL: - emit_arl(c, dst, args[0]); - break; - case TGSI_OPCODE_FLR: - brw_RNDD(p, dst, args[0]); - break; - case TGSI_OPCODE_FRC: - brw_FRC(p, dst, args[0]); - break; - case TGSI_OPCODE_LOG: - unalias1(c, dst, args[0], emit_log_noalias); - break; - case TGSI_OPCODE_LG2: - emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_LIT: - unalias1(c, dst, args[0], emit_lit_noalias); - break; - case TGSI_OPCODE_LRP: - unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias); - break; - case TGSI_OPCODE_MAD: - brw_MOV(p, brw_acc_reg(), args[2]); - brw_MAC(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_MAX: - emit_max(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_MIN: - emit_min(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_MOV: - brw_MOV(p, dst, args[0]); - break; - case TGSI_OPCODE_MUL: - brw_MUL(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_POW: - emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_RCP: - emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_RSQ: - emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_SEQ: - emit_seq(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SIN: - emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL); - break; - case TGSI_OPCODE_SNE: - emit_sne(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SGE: - emit_sge(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SGT: - emit_sgt(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SLT: - emit_slt(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SLE: - emit_sle(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_SUB: - brw_ADD(p, dst, args[0], negate(args[1])); - break; - case TGSI_OPCODE_TRUNC: - /* round toward zero */ - brw_RNDZ(p, dst, args[0]); - break; - case TGSI_OPCODE_XPD: - emit_xpd(p, dst, args[0], args[1]); - break; - case TGSI_OPCODE_IF: - assert(if_depth < MAX_IF_DEPTH); - if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); - /* Note that brw_IF smashes the predicate_control field. */ - if_inst[if_depth]->header.predicate_control = get_predicate(inst); - if_depth++; - break; - case TGSI_OPCODE_ELSE: - if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); - break; - case TGSI_OPCODE_ENDIF: - assert(if_depth > 0); - brw_ENDIF(p, if_inst[--if_depth]); - break; - case TGSI_OPCODE_BGNLOOP: - loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); - break; - case TGSI_OPCODE_BRK: - brw_set_predicate_control(p, get_predicate(inst)); - brw_BREAK(p); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case TGSI_OPCODE_CONT: - brw_set_predicate_control(p, get_predicate(inst)); - brw_CONT(p); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case TGSI_OPCODE_ENDLOOP: - { - struct brw_instruction *inst0, *inst1; - GLuint br = 1; - - loop_depth--; - - if (BRW_IS_IGDNG(brw)) - br = 2; - - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); - /* patch all the BREAK/CONT instructions from last BEGINLOOP */ - while (inst0 > loop_inst[loop_depth]) { - inst0--; - if (inst0->header.opcode == BRW_TGSI_OPCODE_BREAK) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); - inst0->bits3.if_else.pop_count = 0; - } - else if (inst0->header.opcode == BRW_TGSI_OPCODE_CONTINUE) { - inst0->bits3.if_else.jump_count = br * (inst1 - inst0); - inst0->bits3.if_else.pop_count = 0; - } - } - } - break; - case TGSI_OPCODE_BRA: - brw_set_predicate_control(p, get_predicate(inst)); - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - break; - case TGSI_OPCODE_CAL: - brw_set_access_mode(p, BRW_ALIGN_1); - brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); - brw_set_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(4)); - brw_save_call(p, inst->Comment, p->nr_insn); - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - break; - case TGSI_OPCODE_RET: - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(-4)); - brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); - brw_set_access_mode(p, BRW_ALIGN_16); - break; - case TGSI_OPCODE_END: - end_offset = p->nr_insn; - /* this instruction will get patched later to jump past subroutine - * code, etc. - */ - brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); - break; - case TGSI_OPCODE_PRINT: - /* no-op */ - break; - case TGSI_OPCODE_BGNSUB: - brw_save_label(p, inst->Comment, p->nr_insn); - break; - case TGSI_OPCODE_ENDSUB: - /* no-op */ - break; - default: - _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader", - inst->Opcode, inst->Opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->Opcode) : - "unknown"); - } - - /* Set the predication update on the last instruction of the native - * instruction sequence. - * - * This would be problematic if it was set on a math instruction, - * but that shouldn't be the case with the current GLSL compiler. - */ - if (inst->CondUpdate) { - struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1]; - - assert(hw_insn->header.destreg__conditionalmod == 0); - hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; - } - - release_tmps(c); } end_inst = &p->store[end_offset]; diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index 3118e615f9..23f7ba16fd 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -167,8 +167,8 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, case PROGRAM_PAYLOAD: break; default: - _mesa_problem(NULL, "Unexpected file in get_reg()"); - return brw_null_reg(); + debug_printf("Unexpected file in get_reg()"); + return brw_null_reg(); } assert(index < 256); diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index e1ed6438dc..7157feb6f3 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -516,8 +516,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw, key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; break; default: - _mesa_problem(ctx, "Bad renderbuffer format: %d\n", - irb->texformat->MesaFormat); + debug_printf("Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + assert(0); + key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + return; } key.tiling = region->tiling; if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) { -- cgit v1.2.3 From 7ba2fe40fa092551f1c493d754c80ca93564d32b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 Oct 2009 00:29:21 +0000 Subject: i965g: still working on compilation --- src/gallium/drivers/i965/brw_context.h | 1 + src/gallium/drivers/i965/brw_eu.c | 18 ++--- src/gallium/drivers/i965/brw_eu.h | 4 +- src/gallium/drivers/i965/brw_vs.h | 6 ++ src/gallium/drivers/i965/brw_vs_emit.c | 131 ++++++++++++++++----------------- src/gallium/drivers/i965/brw_wm.h | 9 +-- src/gallium/drivers/i965/brw_wm_glsl.c | 2 +- 7 files changed, 83 insertions(+), 88 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 8aaf895d20..7b85363e9f 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -289,6 +289,7 @@ struct brw_vs_prog_data { GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */ GLboolean copy_edgeflag; + GLboolean writes_psiz; /* Used for calculating urb partitions: */ diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c index df49d4b72f..1189a35b6f 100644 --- a/src/gallium/drivers/i965/brw_eu.c +++ b/src/gallium/drivers/i965/brw_eu.c @@ -152,7 +152,7 @@ const GLuint *brw_get_program( struct brw_compile *p, */ struct brw_glsl_label { - const char *name; /**< the label string */ + GLuint label; /**< the label number */ GLuint position; /**< the position of the brw instruction for this label */ struct brw_glsl_label *next; /**< next in linked list */ }; @@ -164,7 +164,7 @@ struct brw_glsl_label struct brw_glsl_call { GLuint call_inst_pos; /**< location of the CAL instruction */ - const char *sub_name; /**< name of subroutine to call */ + GLuint label; struct brw_glsl_call *next; /**< next in linked list */ }; @@ -173,10 +173,10 @@ struct brw_glsl_call * Called for each OPCODE_BGNSUB. */ void -brw_save_label(struct brw_compile *c, const char *name, GLuint position) +brw_save_label(struct brw_compile *c, unsigned l, GLuint position) { struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label); - label->name = name; + label->label = l; label->position = position; label->next = c->first_label; c->first_label = label; @@ -187,11 +187,11 @@ brw_save_label(struct brw_compile *c, const char *name, GLuint position) * Called for each OPCODE_CAL. */ void -brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos) +brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos) { struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call); call->call_inst_pos = call_pos; - call->sub_name = name; + call->label = label; call->next = c->first_call; c->first_call = call; } @@ -201,11 +201,11 @@ brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos) * Lookup a label, return label's position/offset. */ static GLuint -brw_lookup_label(struct brw_compile *c, const char *name) +brw_lookup_label(struct brw_compile *c, unsigned l) { const struct brw_glsl_label *label; for (label = c->first_label; label; label = label->next) { - if (strcmp(name, label->name) == 0) { + if (l == label->label) { return label->position; } } @@ -224,7 +224,7 @@ brw_resolve_cals(struct brw_compile *c) const struct brw_glsl_call *call; for (call = c->first_call; call; call = call->next) { - const GLuint sub_loc = brw_lookup_label(c, call->sub_name); + const GLuint sub_loc = brw_lookup_label(c, call->label); struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos]; struct brw_instruction *brw_sub_inst = &c->store[sub_loc]; GLint offset = brw_sub_inst - brw_call_inst; diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h index ac5a623cac..3379522104 100644 --- a/src/gallium/drivers/i965/brw_eu.h +++ b/src/gallium/drivers/i965/brw_eu.h @@ -136,10 +136,10 @@ struct brw_compile { void -brw_save_label(struct brw_compile *c, const char *name, GLuint position); +brw_save_label(struct brw_compile *c, unsigned label, GLuint position); void -brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos); +brw_save_call(struct brw_compile *c, unsigned label, GLuint call_pos); void brw_resolve_cals(struct brw_compile *c); diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h index 58119567dc..2a2dbb3457 100644 --- a/src/gallium/drivers/i965/brw_vs.h +++ b/src/gallium/drivers/i965/brw_vs.h @@ -54,6 +54,7 @@ struct brw_vs_compile { struct brw_compile func; struct brw_vs_prog_key key; struct brw_vs_prog_data prog_data; + struct brw_chipset chipset; struct brw_vertex_shader *vp; @@ -88,7 +89,12 @@ struct brw_vs_compile { struct brw_instruction *if_inst[MAX_IF_DEPTH]; struct brw_instruction *loop_inst[MAX_LOOP_DEPTH]; + GLuint insn; + GLuint if_depth; + GLuint loop_depth; + GLuint end_offset; + struct brw_indirect stack_index; }; diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 4daa98b29e..5366ab8514 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -35,19 +35,15 @@ #include "util/u_math.h" #include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_ureg_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" #include "brw_context.h" #include "brw_vs.h" #include "brw_debug.h" -struct ureg_instruction { - unsigned opcode:8; - unsigned tex_target:3; - struct ureg_dst dst; - struct ureg_src src[3]; -}; - static struct brw_reg get_tmp( struct brw_vs_compile *c ) { @@ -149,7 +145,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->first_output = reg; c->first_overflow_output = 0; - if (BRW_IS_IGDNG(c->func.brw)) + if (c->chipset.is_igdng) mrf = 8; else mrf = 4; @@ -251,7 +247,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs); - if (BRW_IS_IGDNG(c->func.brw)) + if (c->chipset.is_igdng) c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4; else c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4; @@ -1058,7 +1054,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) */ if (c->prog_data.writes_psiz || c->key.nr_userclip || - BRW_IS_965(p->brw)) + c->chipset.is_965) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -1089,7 +1085,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - if (BRW_IS_965(p->brw)) { + if (c->chipset.is_965) { brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_L, @@ -1117,7 +1113,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) brw_set_access_mode(p, BRW_ALIGN_1); brw_MOV(p, offset(m0, 2), ndc); - if (BRW_IS_IGDNG(p->brw)) { + if (c->chipset.is_igdng) { /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */ brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */ /* m4, m5 contain the distances from vertex to the user clip planeXXX. @@ -1205,6 +1201,9 @@ post_vs_emit( struct brw_vs_compile *c, static uint32_t get_predicate(const struct ureg_instruction *inst) { + /* XXX: disabling for now + */ +#if 0 if (inst->dst.CondMask == COND_TR) return BRW_PREDICATE_NONE; @@ -1237,11 +1236,15 @@ get_predicate(const struct ureg_instruction *inst) inst->dst.CondMask); return BRW_PREDICATE_NORMAL; } +#else + return BRW_PREDICATE_NORMAL; +#endif } static void emit_insn(struct brw_vs_compile *c, - const struct tgsi_full_instruction *insn) + const struct ureg_instruction *inst) { + struct brw_compile *p = &c->func; struct brw_reg args[3], dst; GLuint i; @@ -1253,9 +1256,6 @@ static void emit_insn(struct brw_vs_compile *c, /* Get argument regs. */ for (i = 0; i < 3; i++) { - const struct ureg_src src = inst->src[i]; - index = src.Index; - file = src.File; args[i] = get_arg(c, inst, i); } @@ -1263,16 +1263,13 @@ static void emit_insn(struct brw_vs_compile *c, * dst and arg, given the static allocation of registers. So * care needs to be taken emitting multi-operation instructions. */ - index = inst->dst.Index; - file = inst->dst.File; dst = get_dst(c, inst->dst); - if (inst->SaturateMode != SATURATE_OFF) { - debug_printf("Unsupported saturate %d in vertex shader", - inst->SaturateMode); + if (inst->dst.Saturate) { + debug_printf("Unsupported saturate in vertex shader"); } - switch (inst->Opcode) { + switch (inst->opcode) { case TGSI_OPCODE_ABS: brw_MOV(p, dst, brw_abs(args[0])); break; @@ -1291,7 +1288,7 @@ static void emit_insn(struct brw_vs_compile *c, case TGSI_OPCODE_DPH: brw_DPH(p, dst, args[0], args[1]); break; - case TGSI_OPCODE_NRM3: + case TGSI_OPCODE_NRM: emit_nrm(c, dst, args[0], 3); break; case TGSI_OPCODE_NRM4: @@ -1384,21 +1381,21 @@ static void emit_insn(struct brw_vs_compile *c, emit_xpd(p, dst, args[0], args[1]); break; case TGSI_OPCODE_IF: - assert(if_depth < MAX_IF_DEPTH); - if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8); + assert(c->if_depth < MAX_IF_DEPTH); + c->if_inst[c->if_depth] = brw_IF(p, BRW_EXECUTE_8); /* Note that brw_IF smashes the predicate_control field. */ - if_inst[if_depth]->header.predicate_control = get_predicate(inst); - if_depth++; + c->if_inst[c->if_depth]->header.predicate_control = get_predicate(inst); + c->if_depth++; break; case TGSI_OPCODE_ELSE: - if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]); + c->if_inst[c->if_depth-1] = brw_ELSE(p, c->if_inst[c->if_depth-1]); break; case TGSI_OPCODE_ENDIF: - assert(if_depth > 0); - brw_ENDIF(p, if_inst[--if_depth]); + assert(c->if_depth > 0); + brw_ENDIF(p, c->if_inst[--c->if_depth]); break; case TGSI_OPCODE_BGNLOOP: - loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8); + c->loop_inst[c->loop_depth++] = brw_DO(p, BRW_EXECUTE_8); break; case TGSI_OPCODE_BRK: brw_set_predicate_control(p, get_predicate(inst)); @@ -1415,14 +1412,14 @@ static void emit_insn(struct brw_vs_compile *c, struct brw_instruction *inst0, *inst1; GLuint br = 1; - loop_depth--; + c->loop_depth--; - if (BRW_IS_IGDNG(brw)) + if (c->chipset.is_igdng) br = 2; - inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]); + inst0 = inst1 = brw_WHILE(p, c->loop_inst[c->loop_depth]); /* patch all the BREAK/CONT instructions from last BEGINLOOP */ - while (inst0 > loop_inst[loop_depth]) { + while (inst0 > c->loop_inst[c->loop_depth]) { inst0--; if (inst0->header.opcode == TGSI_OPCODE_BRK) { inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1); @@ -1442,41 +1439,37 @@ static void emit_insn(struct brw_vs_compile *c, break; case TGSI_OPCODE_CAL: brw_set_access_mode(p, BRW_ALIGN_1); - brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_ADD(p, deref_1d(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); brw_set_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(4)); - brw_save_call(p, inst->Comment, p->nr_insn); + brw_ADD(p, get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(4)); + brw_save_call(p, inst->label, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; case TGSI_OPCODE_RET: - brw_ADD(p, get_addr_reg(stack_index), - get_addr_reg(stack_index), brw_imm_d(-4)); + brw_ADD(p, get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(-4)); brw_set_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0)); + brw_MOV(p, brw_ip_reg(), deref_1d(c->stack_index, 0)); brw_set_access_mode(p, BRW_ALIGN_16); break; case TGSI_OPCODE_END: - end_offset = p->nr_insn; + c->end_offset = p->nr_insn; /* this instruction will get patched later to jump past subroutine * code, etc. */ brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; - case TGSI_OPCODE_PRINT: - /* no-op */ - break; case TGSI_OPCODE_BGNSUB: - brw_save_label(p, inst->Comment, p->nr_insn); + brw_save_label(p, p->nr_insn, p->nr_insn); break; case TGSI_OPCODE_ENDSUB: /* no-op */ break; default: debug_printf("Unsupported opcode %i (%s) in vertex shader", - inst->Opcode, inst->Opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->Opcode) : - "unknown"); + inst->opcode, + tgsi_get_opcode_name(inst->opcode)); } /* Set the predication update on the last instruction of the native @@ -1485,12 +1478,16 @@ static void emit_insn(struct brw_vs_compile *c, * This would be problematic if it was set on a math instruction, * but that shouldn't be the case with the current GLSL compiler. */ +#if 0 + /* XXX: disabled + */ if (inst->CondUpdate) { struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1]; assert(hw_insn->header.destreg__conditionalmod == 0); hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ; } +#endif release_tmps(c); } @@ -1498,24 +1495,19 @@ static void emit_insn(struct brw_vs_compile *c, /* Emit the vertex program instructions here. */ -void brw_vs_emit(struct brw_vs_compile *c ) +void brw_vs_emit(struct brw_vs_compile *c) { struct brw_compile *p = &c->func; - struct brw_context *brw = p->brw; - GLuint insn, if_depth = 0, loop_depth = 0; - GLuint end_offset = 0; struct brw_instruction *end_inst, *last_inst; - const struct brw_indirect stack_index = brw_indirect(0, 0); - struct tgsi_parse_context parse; - struct tgsi_full_declaration *decl; - GLuint index; - GLuint file; + struct ureg_parse_context parse; + struct ureg_declaration *decl; + struct ureg_declaration *imm; + struct ureg_declaration *insn; - if (BRW_DEBUG & DEBUG_VS) { - debug_printf("vs-mesa:\n"); - _mesa_print_program(&c->vp->program.Base); - debug_printf("\n"); - } + if (BRW_DEBUG & DEBUG_VS) + tgsi_dump(c->vp->tokens, 0); + + c->stack_index = brw_indirect(0, 0); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); @@ -1523,12 +1515,15 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* Static register allocation */ brw_vs_alloc_regs(c); - brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); - for (insn = 0; insn < nr_insns; insn++) { + while (ureg_next_decl(&parse, &decl)) { + } - const struct ureg_instruction *inst = &c->vp->program.Base.Instructions[insn]; - + while (ureg_next_immediate(&parse, &imm)) { + } + + while (ureg_next_instruction(&parse, &insn)) { } end_inst = &p->store[end_offset]; diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 5bc2a49c1f..084430cf28 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -34,6 +34,7 @@ #define BRW_WM_H #include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_ureg_parse.h" #include "brw_context.h" #include "brw_eu.h" @@ -163,14 +164,6 @@ struct brw_wm_instruction { #define BRW_WM_MAX_SUBROUTINE 16 -struct ureg_instruction { - unsigned opcode:8; - unsigned tex_target:3; - struct ureg_dst dst; - struct ureg_src src[3]; -}; - - /* New opcodes to track internal operations required for WM unit. * These are added early so that the registers used can be tracked, * freed and reused like those of other instructions. diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index 23f7ba16fd..59bc4ef701 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -1867,7 +1867,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) brw_set_access_mode(p, BRW_ALIGN_16); brw_ADD(p, get_addr_reg(stack_index), get_addr_reg(stack_index), brw_imm_d(4)); - brw_save_call(&c->func, inst->Comment, p->nr_insn); + brw_save_call(&c->func, inst->label, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); brw_pop_insn_state(p); break; -- cgit v1.2.3 From 81b8589f064204d9ddcd7d1f9d43d2dcf5676235 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 28 Oct 2009 21:24:03 +0000 Subject: i965g: still working on compilation --- src/gallium/drivers/i965/brw_vs.c | 2 +- src/gallium/drivers/i965/brw_vs.h | 3 + src/gallium/drivers/i965/brw_vs_emit.c | 199 +++++++++++++++++------- src/gallium/drivers/i965/brw_vs_state.c | 25 +-- src/gallium/drivers/i965/brw_vs_surface_state.c | 33 +++- src/gallium/drivers/i965/brw_wm.c | 36 +++-- 6 files changed, 208 insertions(+), 90 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index 3965ca6c54..26a28114d9 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -57,7 +57,7 @@ static void do_vs_prog( struct brw_context *brw, c.prog_data.nr_inputs = vp->info.num_inputs; c.prog_data.copy_edgeflag = c.key.copy_edgeflag; - if (0) + if (1) tgsi_dump(c.vp->tokens, 0); /* Emit GEN4 code. diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h index 2a2dbb3457..b4e450d89b 100644 --- a/src/gallium/drivers/i965/brw_vs.h +++ b/src/gallium/drivers/i965/brw_vs.h @@ -60,6 +60,9 @@ struct brw_vs_compile { GLuint nr_inputs; GLuint nr_outputs; + GLuint nr_immediates; + GLfloat immediate[128][4]; + GLboolean copy_edgeflag; GLuint first_output; diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 5366ab8514..6809bccdec 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -34,8 +34,7 @@ #include "util/u_memory.h" #include "util/u_math.h" -#include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_ureg_parse.h" +#include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_info.h" @@ -67,6 +66,7 @@ static void release_tmps( struct brw_vs_compile *c ) } + /** * Preallocate GRF register before code emit. * Do things as simply as possible. Allocate and populate all regs @@ -83,10 +83,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * XXX this heuristic/check may need some fine tuning... */ if (c->vp->info.file_max[TGSI_FILE_CONSTANT] + + c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 > BRW_MAX_GRF) c->vp->use_const_buffer = GL_TRUE; - else + else { + /* XXX: immediates can go elsewhere if necessary: + */ + assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + + c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 > BRW_MAX_GRF); + c->vp->use_const_buffer = GL_FALSE; + } /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/ @@ -139,6 +146,29 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) if (c->nr_inputs == 0) reg++; + /* Allocate a GRF and load immediate values by hand with 4 MOVs!!! + * + * XXX: Try to encode float immediates as brw immediates + * XXX: Put immediates into the CURBE. + * XXX: Make sure ureg sets minimal immediate size and respect it + * here. + */ + for (i = 0; i < c->nr_immediates; i++) { + struct brw_reg r; + int j; + + r = brw_vec8_grf(reg, 0); + + for (j = 0; j < 4; j++) { + brw_MOV(&c->func, + brw_writemask(r, (1<immediate[i][j])); + } + + reg++; + } + + /* Allocate outputs. The non-position outputs go straight into message regs. */ c->nr_outputs = 0; @@ -754,21 +784,20 @@ static void emit_nrm( struct brw_vs_compile *c, static struct brw_reg get_constant(struct brw_vs_compile *c, - const struct ureg_instruction *inst, - GLuint argIndex) + GLuint argIndex, + GLuint index, + GLboolean relAddr) { - const struct ureg_src src = inst->src[argIndex]; struct brw_compile *p = &c->func; struct brw_reg const_reg; struct brw_reg const2_reg; - const GLboolean relAddr = src.Indirect; assert(argIndex < 3); - if (c->current_const[argIndex].index != src.Index || relAddr) { + if (c->current_const[argIndex].index != index || relAddr) { struct brw_reg addrReg = c->regs[TGSI_FILE_ADDRESS][0]; - c->current_const[argIndex].index = src.Index; + c->current_const[argIndex].index = index; #if 0 printf(" fetch const[%d] for arg %d into reg %d\n", @@ -780,7 +809,7 @@ get_constant(struct brw_vs_compile *c, 0, /* oword */ relAddr, /* relative indexing? */ addrReg, /* address register */ - 16 * src.Index, /* byte offset */ + 16 * index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ ); @@ -797,7 +826,7 @@ get_constant(struct brw_vs_compile *c, 1, /* oword */ relAddr, /* relative indexing? */ addrReg, /* address register */ - 16 * src.Index, /* byte offset */ + 16 * index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER ); } @@ -894,12 +923,11 @@ static struct brw_reg deref( struct brw_vs_compile *c, */ static struct brw_reg get_src_reg( struct brw_vs_compile *c, - const struct ureg_instruction *inst, - GLuint argIndex ) + GLuint argIndex, + GLuint file, + GLint index, + GLboolean relAddr ) { - const GLuint file = inst->src[argIndex].File; - const GLint index = inst->src[argIndex].Index; - const GLboolean relAddr = inst->src[argIndex].Indirect; switch (file) { case TGSI_FILE_TEMPORARY: @@ -913,9 +941,12 @@ get_src_reg( struct brw_vs_compile *c, return c->regs[file][index]; } + case TGSI_FILE_IMMEDIATE: + return c->regs[file][index]; + case TGSI_FILE_CONSTANT: if (c->vp->use_const_buffer) { - return get_constant(c, inst, argIndex); + return get_constant(c, argIndex, index, relAddr); } else if (relAddr) { return deref(c, c->regs[TGSI_FILE_CONSTANT][0], index); @@ -962,27 +993,32 @@ static void emit_arl( struct brw_vs_compile *c, * Return the brw reg for the given instruction's src argument. */ static struct brw_reg get_arg( struct brw_vs_compile *c, - const struct ureg_instruction *inst, + const struct tgsi_full_src_register *src, GLuint argIndex ) { - const struct ureg_src src = inst->src[argIndex]; struct brw_reg reg; - if (src.File == TGSI_FILE_NULL) + if (src->SrcRegister.File == TGSI_FILE_NULL) return brw_null_reg(); - reg = get_src_reg(c, inst, argIndex); + reg = get_src_reg(c, argIndex, + src->SrcRegister.File, + src->SrcRegister.Index, + src->SrcRegister.Indirect); /* Convert 3-bit swizzle to 2-bit. */ - reg.dw1.bits.swizzle = BRW_SWIZZLE4(src.SwizzleX, - src.SwizzleY, - src.SwizzleZ, - src.SwizzleW); + reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SrcRegister.SwizzleX, + src->SrcRegister.SwizzleY, + src->SrcRegister.SwizzleZ, + src->SrcRegister.SwizzleW); /* Note this is ok for non-swizzle instructions: */ - reg.negate = src.Negate ? 1 : 0; + reg.negate = src->SrcRegister.Negate ? 1 : 0; + + /* XXX: abs, absneg + */ return reg; } @@ -992,19 +1028,21 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, * Get brw register for the given program dest register. */ static struct brw_reg get_dst( struct brw_vs_compile *c, - struct ureg_dst dst ) + unsigned file, + unsigned index, + unsigned writemask ) { struct brw_reg reg; - switch (dst.File) { + switch (file) { case TGSI_FILE_TEMPORARY: case TGSI_FILE_OUTPUT: - assert(c->regs[dst.File][dst.Index].nr != 0); - reg = c->regs[dst.File][dst.Index]; + assert(c->regs[file][index].nr != 0); + reg = c->regs[file][index]; break; case TGSI_FILE_ADDRESS: - assert(dst.Index == 0); - reg = c->regs[dst.File][dst.Index]; + assert(index == 0); + reg = c->regs[file][index]; break; case TGSI_FILE_NULL: /* we may hit this for OPCODE_END, OPCODE_KIL, etc */ @@ -1015,7 +1053,7 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, reg = brw_null_reg(); } - reg.dw1.bits.writemask = dst.WriteMask; + reg.dw1.bits.writemask = writemask; return reg; } @@ -1199,7 +1237,7 @@ post_vs_emit( struct brw_vs_compile *c, } static uint32_t -get_predicate(const struct ureg_instruction *inst) +get_predicate(const struct tgsi_full_instruction *inst) { /* XXX: disabling for now */ @@ -1242,8 +1280,10 @@ get_predicate(const struct ureg_instruction *inst) } static void emit_insn(struct brw_vs_compile *c, - const struct ureg_instruction *inst) + const struct tgsi_full_instruction *inst) { + unsigned opcode = inst->Instruction.Opcode; + unsigned label = inst->InstructionExtLabel.Label; struct brw_compile *p = &c->func; struct brw_reg args[3], dst; GLuint i; @@ -1256,20 +1296,25 @@ static void emit_insn(struct brw_vs_compile *c, /* Get argument regs. */ for (i = 0; i < 3; i++) { - args[i] = get_arg(c, inst, i); + args[i] = get_arg(c, &inst->FullSrcRegisters[i], i); } /* Get dest regs. Note that it is possible for a reg to be both * dst and arg, given the static allocation of registers. So * care needs to be taken emitting multi-operation instructions. */ - dst = get_dst(c, inst->dst); + dst = get_dst(c, + inst->FullDstRegisters[0].DstRegister.File, + inst->FullDstRegisters[0].DstRegister.Index, + inst->FullDstRegisters[0].DstRegister.WriteMask); - if (inst->dst.Saturate) { + /* XXX: saturate + */ + if (inst->Instruction.Saturate != TGSI_SAT_NONE) { debug_printf("Unsupported saturate in vertex shader"); } - switch (inst->opcode) { + switch (opcode) { case TGSI_OPCODE_ABS: brw_MOV(p, dst, brw_abs(args[0])); break; @@ -1443,7 +1488,7 @@ static void emit_insn(struct brw_vs_compile *c, brw_set_access_mode(p, BRW_ALIGN_16); brw_ADD(p, get_addr_reg(c->stack_index), get_addr_reg(c->stack_index), brw_imm_d(4)); - brw_save_call(p, inst->label, p->nr_insn); + brw_save_call(p, label, p->nr_insn); brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); break; case TGSI_OPCODE_RET: @@ -1468,8 +1513,8 @@ static void emit_insn(struct brw_vs_compile *c, break; default: debug_printf("Unsupported opcode %i (%s) in vertex shader", - inst->opcode, - tgsi_get_opcode_name(inst->opcode)); + opcode, + tgsi_get_opcode_name(opcode)); } /* Set the predication update on the last instruction of the native @@ -1498,11 +1543,12 @@ static void emit_insn(struct brw_vs_compile *c, void brw_vs_emit(struct brw_vs_compile *c) { struct brw_compile *p = &c->func; + const struct tgsi_token *tokens = c->vp->tokens; struct brw_instruction *end_inst, *last_inst; - struct ureg_parse_context parse; - struct ureg_declaration *decl; - struct ureg_declaration *imm; - struct ureg_declaration *insn; + struct tgsi_parse_context parse; + struct tgsi_full_instruction *inst; + boolean done = FALSE; + int i; if (BRW_DEBUG & DEBUG_VS) tgsi_dump(c->vp->tokens, 0); @@ -1512,21 +1558,66 @@ void brw_vs_emit(struct brw_vs_compile *c) brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); + /* Inputs */ + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* Nothing to do -- using info from tgsi_scan(). + */ + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: { + static const float id[4] = {0,0,0,1}; + const float *imm = &parse.FullToken.FullImmediate.u[i].Float; + unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + + for (i = 0; i < size; i++) + c->immediate[c->nr_immediates][i] = imm[i]; + + for ( ; i < 4; i++) + c->immediate[c->nr_immediates][i] = id[i]; + + c->nr_immediates++; + break; + } + + case TGSI_TOKEN_TYPE_INSTRUCTION: + done = 1; + break; + } + } + /* Static register allocation */ brw_vs_alloc_regs(c); brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); - while (ureg_next_decl(&parse, &decl)) { - } - - while (ureg_next_immediate(&parse, &imm)) { - } - - while (ureg_next_instruction(&parse, &insn)) { + /* Instructions + */ + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + case TGSI_TOKEN_TYPE_IMMEDIATE: + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + inst = &parse.FullToken.FullInstruction; + emit_insn( c, inst ); + break; + + default: + assert( 0 ); + } } + tgsi_parse_free( &parse ); - end_inst = &p->store[end_offset]; + end_inst = &p->store[c->end_offset]; last_inst = &p->store[p->nr_insn]; /* The END instruction will be patched to jump to this code */ diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c index 05a91f2de4..549696f7ae 100644 --- a/src/gallium/drivers/i965/brw_vs_state.c +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -29,8 +29,10 @@ * Keith Whitwell */ +#include "util/u_math.h" +#include "brw_debug.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" @@ -64,8 +66,8 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) /* BRW_NEW_NR_VS_SURFACES */ key->nr_surfaces = brw->vs.nr_surfaces; - /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ - if (ctx->Transform.ClipPlanesEnabled) { + /* PIPE_NEW_CLIP */ + if (brw->curr.ucp.nr) { /* Note that we read in the userclip planes as well, hence * clip_start: */ @@ -86,7 +88,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) memset(&vs, 0, sizeof(vs)); vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ - vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + vs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; /* Choosing multiple program flow means that we may get 2-vertex threads, * which will have the channel mask for dwords 4-7 enabled in the thread, @@ -119,6 +121,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) chipset_max_threads = 32; else chipset_max_threads = 16; + vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2, 1, chipset_max_threads) - 1; @@ -145,16 +148,16 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) NULL, NULL); /* Emit VS program relocation */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - vs.thread0.grf_reg_count << 1, - offsetof(struct brw_vs_unit_state, thread0), - brw->vs.prog_bo); + brw->sws->bo_emit_reloc(bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + vs.thread0.grf_reg_count << 1, + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_bo); return bo; } -static void prepare_vs_unit(struct brw_context *brw) +static int prepare_vs_unit(struct brw_context *brw) { struct brw_vs_unit_key key; @@ -168,11 +171,13 @@ static void prepare_vs_unit(struct brw_context *brw) if (brw->vs.state_bo == NULL) { brw->vs.state_bo = vs_unit_create_from_key(brw, &key); } + + return 0; } const struct brw_tracked_state brw_vs_unit = { .dirty = { - .mesa = _NEW_TRANSFORM, + .mesa = (PIPE_NEW_CLIP), .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE), diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c index 319e29bfcb..9a9d47a8a3 100644 --- a/src/gallium/drivers/i965/brw_vs_surface_state.c +++ b/src/gallium/drivers/i965/brw_vs_surface_state.c @@ -32,6 +32,11 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_winsys.h" + +/* XXX: disabled true constant buffer functionality + */ + /* Creates a new VS constant buffer reflecting the current VS program's * constants, if needed by the VS program. @@ -39,9 +44,12 @@ * Otherwise, constants go through the CURBEs using the brw_constant_buffer * state atom. */ -static drm_intel_bo * +#if 0 +static struct brw_winsys_buffer * brw_vs_update_constant_buffer(struct brw_context *brw) { + /* XXX: true constant buffers + */ struct brw_vertex_program *vp = (struct brw_vertex_program *) brw->vertex_program; const struct gl_program_parameter_list *params = vp->program.Base.Parameters; @@ -61,21 +69,20 @@ brw_vs_update_constant_buffer(struct brw_context *brw) return const_buffer; } +#endif /** * Update the surface state for a VS constant buffer. * * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer. */ +#if 0 static void brw_update_vs_constant_surface( struct brw_context *brw, GLuint surf) { - struct brw_context *brw = brw_context(ctx); struct brw_surface_key key; - struct brw_vertex_program *vp = - (struct brw_vertex_program *) brw->vertex_program; - const struct gl_program_parameter_list *params = vp->program.Base.Parameters; + struct pipe_buffer *cb = brw->curr.vs_constants; assert(surf == 0); @@ -121,6 +128,7 @@ brw_update_vs_constant_surface( struct brw_context *brw, brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); } } +#endif /** @@ -129,6 +137,7 @@ brw_update_vs_constant_surface( struct brw_context *brw, static struct brw_winsys_buffer * brw_vs_get_binding_table(struct brw_context *brw) { +#if 0 struct brw_winsys_buffer *bind_bo; bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, @@ -169,6 +178,9 @@ brw_vs_get_binding_table(struct brw_context *brw) } return bind_bo; +#else + return NULL; +#endif } /** @@ -178,8 +190,9 @@ brw_vs_get_binding_table(struct brw_context *brw) * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and * CACHE_NEW_SURF_BIND for the binding table upload. */ -static void prepare_vs_surfaces(struct brw_context *brw ) +static int prepare_vs_surfaces(struct brw_context *brw ) { +#if 0 int i; int nr_surfaces = 0; @@ -195,6 +208,7 @@ static void prepare_vs_surfaces(struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; brw->vs.nr_surfaces = nr_surfaces; } +#endif /* Note that we don't end up updating the bind_bo if we don't have a * surface to be pointing at. This should be relatively harmless, as it @@ -204,12 +218,15 @@ static void prepare_vs_surfaces(struct brw_context *brw ) brw->sws->bo_unreference(brw->vs.bind_bo); brw->vs.bind_bo = brw_vs_get_binding_table(brw); } + + return 0; } const struct brw_tracked_state brw_vs_surfaces = { .dirty = { - .mesa = (_NEW_PROGRAM_CONSTANTS), - .brw = (BRW_NEW_VERTEX_PROGRAM), + .mesa = (PIPE_NEW_VERTEX_CONSTANTS | + PIPE_NEW_VERTEX_SHADER), + .brw = 0, .cache = 0 }, .prepare = prepare_vs_surfaces, diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 3d889699f8..f0dabfcfd0 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -28,11 +28,14 @@ * Authors: * Keith Whitwell */ - + +#include "tgsi/tgsi_info.h" + #include "brw_context.h" #include "brw_util.h" #include "brw_wm.h" #include "brw_state.h" +#include "brw_debug.h" /** Return number of src args for given instruction */ @@ -54,7 +57,7 @@ GLuint brw_wm_nr_args( GLuint opcode ) return 3; default: assert(opcode < MAX_OPCODE); - return _mesa_num_inst_src_regs(opcode); + return tgsi_get_opcode_info(opcode)->num_src; } } @@ -62,17 +65,17 @@ GLuint brw_wm_nr_args( GLuint opcode ) GLuint brw_wm_is_scalar_result( GLuint opcode ) { switch (opcode) { - case OPCODE_COS: - case OPCODE_EX2: - case OPCODE_LG2: - case OPCODE_POW: - case OPCODE_RCP: - case OPCODE_RSQ: - case OPCODE_SIN: - case OPCODE_DP3: - case OPCODE_DP4: - case OPCODE_DPH: - case OPCODE_DST: + case TGSI_OPCODE_COS: + case TGSI_OPCODE_EX2: + case TGSI_OPCODE_LG2: + case TGSI_OPCODE_POW: + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_SIN: + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + case TGSI_OPCODE_DST: return 1; default: @@ -134,7 +137,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) * we'll use one of two code generators. */ static void do_wm_prog( struct brw_context *brw, - struct brw_fragment_program *fp, + struct brw_fragment_shader *fp, struct brw_wm_prog_key *key) { struct brw_wm_compile *c; @@ -163,7 +166,7 @@ static void do_wm_prog( struct brw_context *brw, brw_init_compile(brw, &c->func); /* temporary sanity check assertion */ - ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + assert(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); /* * Shader which use GLSL features such as flow control are handled @@ -200,8 +203,7 @@ static void brw_wm_populate_key( struct brw_context *brw, struct brw_wm_prog_key *key ) { /* BRW_NEW_FRAGMENT_PROGRAM */ - const struct brw_fragment_program *fp = - (struct brw_fragment_program *)brw->fragment_program; + const struct brw_fragment_program *fp = brw->curr.fragment_shader; GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; GLuint lookup = 0; GLuint line_aa; -- cgit v1.2.3 From 99cc0fd67597cbcd6106afcf437a0d5e2431c9df Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 29 Oct 2009 20:18:01 +0000 Subject: i965g: work in progress on fragment shaders --- src/gallium/drivers/i965/brw_context.h | 10 +- src/gallium/drivers/i965/brw_eu.c | 20 +- src/gallium/drivers/i965/brw_eu.h | 8 +- src/gallium/drivers/i965/brw_pipe_depth.c | 42 +- src/gallium/drivers/i965/brw_pipe_rast.c | 18 + src/gallium/drivers/i965/brw_pipe_rast.h | 1 + src/gallium/drivers/i965/brw_pipe_shader.c | 4 +- src/gallium/drivers/i965/brw_screen.h | 7 + src/gallium/drivers/i965/brw_vs_emit.c | 2 - src/gallium/drivers/i965/brw_wm.c | 167 ++--- src/gallium/drivers/i965/brw_wm.h | 41 +- src/gallium/drivers/i965/brw_wm_debug.c | 17 +- src/gallium/drivers/i965/brw_wm_emit.c | 195 +++--- src/gallium/drivers/i965/brw_wm_fp.c | 1031 ++++++++++------------------ src/gallium/drivers/i965/brw_wm_glsl.c | 12 +- src/gallium/drivers/i965/brw_wm_pass0.c | 73 +- src/gallium/drivers/i965/brw_wm_pass1.c | 26 +- src/gallium/drivers/i965/brw_wm_state.c | 8 +- 18 files changed, 682 insertions(+), 1000 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 7b85363e9f..e6c3161066 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -132,6 +132,8 @@ struct brw_depth_stencil_state { struct brw_cc2 cc2; struct brw_cc3 cc3; struct brw_cc7 cc7; + + unsigned iz_lookup; }; @@ -164,7 +166,10 @@ struct brw_fragment_shader { const struct tgsi_token *tokens; struct tgsi_shader_info info; - GLboolean isGLSL; + unsigned iz_lookup; + + boolean uses_depth:1; + boolean has_flow_control:1; unsigned id; struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ @@ -194,6 +199,7 @@ struct brw_fragment_shader { #define PIPE_NEW_COLOR_BUFFERS 0x40000 #define PIPE_NEW_QUERY 0x80000 #define PIPE_NEW_SCISSOR 0x100000 +#define PIPE_NEW_BOUND_TEXTURES 0x200000 @@ -487,7 +493,7 @@ struct brw_context const struct brw_rasterizer_state *rast; const struct brw_depth_stencil_state *zstencil; - const struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + const struct brw_texture *texture[PIPE_MAX_SAMPLERS]; const struct pipe_sampler *sampler[PIPE_MAX_SAMPLERS]; unsigned num_textures; unsigned num_samplers; diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c index 1189a35b6f..de43b14512 100644 --- a/src/gallium/drivers/i965/brw_eu.c +++ b/src/gallium/drivers/i965/brw_eu.c @@ -150,22 +150,22 @@ const GLuint *brw_get_program( struct brw_compile *p, /** * For each OPCODE_BGNSUB we create one of these. */ -struct brw_glsl_label +struct brw_eu_label { GLuint label; /**< the label number */ GLuint position; /**< the position of the brw instruction for this label */ - struct brw_glsl_label *next; /**< next in linked list */ + struct brw_eu_label *next; /**< next in linked list */ }; /** * For each OPCODE_CAL we create one of these. */ -struct brw_glsl_call +struct brw_eu_call { GLuint call_inst_pos; /**< location of the CAL instruction */ GLuint label; - struct brw_glsl_call *next; /**< next in linked list */ + struct brw_eu_call *next; /**< next in linked list */ }; @@ -175,7 +175,7 @@ struct brw_glsl_call void brw_save_label(struct brw_compile *c, unsigned l, GLuint position) { - struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label); + struct brw_eu_label *label = CALLOC_STRUCT(brw_eu_label); label->label = l; label->position = position; label->next = c->first_label; @@ -189,7 +189,7 @@ brw_save_label(struct brw_compile *c, unsigned l, GLuint position) void brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos) { - struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call); + struct brw_eu_call *call = CALLOC_STRUCT(brw_eu_call); call->call_inst_pos = call_pos; call->label = label; call->next = c->first_call; @@ -203,7 +203,7 @@ brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos) static GLuint brw_lookup_label(struct brw_compile *c, unsigned l) { - const struct brw_glsl_label *label; + const struct brw_eu_label *label; for (label = c->first_label; label; label = label->next) { if (l == label->label) { return label->position; @@ -221,7 +221,7 @@ brw_lookup_label(struct brw_compile *c, unsigned l) void brw_resolve_cals(struct brw_compile *c) { - const struct brw_glsl_call *call; + const struct brw_eu_call *call; for (call = c->first_call; call; call = call->next) { const GLuint sub_loc = brw_lookup_label(c, call->label); @@ -235,7 +235,7 @@ brw_resolve_cals(struct brw_compile *c) /* free linked list of calls */ { - struct brw_glsl_call *call, *next; + struct brw_eu_call *call, *next; for (call = c->first_call; call; call = next) { next = call->next; FREE(call); @@ -245,7 +245,7 @@ brw_resolve_cals(struct brw_compile *c) /* free linked list of labels */ { - struct brw_glsl_label *label, *next; + struct brw_eu_label *label, *next; for (label = c->first_label; label; label = next) { next = label->next; FREE(label); diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h index 3379522104..7bddc3859c 100644 --- a/src/gallium/drivers/i965/brw_eu.h +++ b/src/gallium/drivers/i965/brw_eu.h @@ -109,8 +109,8 @@ struct brw_indirect { }; -struct brw_glsl_label; -struct brw_glsl_call; +struct brw_eu_label; +struct brw_eu_call; @@ -130,8 +130,8 @@ struct brw_compile { GLboolean single_program_flow; struct brw_context *brw; - struct brw_glsl_label *first_label; /**< linked list of labels */ - struct brw_glsl_call *first_call; /**< linked list of CALs */ + struct brw_eu_label *first_label; /**< linked list of labels */ + struct brw_eu_call *first_call; /**< linked list of CALs */ }; diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c index 33fe517e0b..e010d76e0d 100644 --- a/src/gallium/drivers/i965/brw_pipe_depth.c +++ b/src/gallium/drivers/i965/brw_pipe_depth.c @@ -5,6 +5,10 @@ #include "brw_context.h" #include "brw_defines.h" +/* XXX: Fixme - include this to get IZ_ defines + */ +#include "brw_wm.h" + static unsigned brw_translate_compare_func(unsigned func) { switch (func) { @@ -55,13 +59,9 @@ static unsigned translate_stencil_op(unsigned op) } } - -static void * -brw_create_depth_stencil_state( struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *templ ) +static void create_bcc_state( struct brw_depth_stencil_state *zstencil, + const struct pipe_depth_stencil_alpha_state *templ ) { - struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state); - if (templ->stencil[0].enabled) { zstencil->cc0.stencil_enable = 1; zstencil->cc0.stencil_func = @@ -108,6 +108,36 @@ brw_create_depth_stencil_state( struct pipe_context *pipe, zstencil->cc2.depth_test_function = brw_translate_compare_func(templ->depth.func); zstencil->cc2.depth_write_enable = templ->depth.writemask; } +} + +static void create_wm_iz_state( struct brw_depth_stencil_state *zstencil ) +{ + if (zstencil->cc3.alpha_test) + zstencil->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (zstencil->cc2.depth_test) + zstencil->iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (zstencil->cc2.depth_write_enable) + zstencil->iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + if (zstencil->cc0.stencil_enable) + zstencil->iz_lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (zstencil->cc0.stencil_write_enable) + zstencil->iz_lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + +} + + +static void * +brw_create_depth_stencil_state( struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *templ ) +{ + struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state); + + create_bcc_state( zstencil, templ ); + create_wm_iz_state( zstencil ); return (void *)zstencil; } diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c index 86822d478a..51159bf147 100644 --- a/src/gallium/drivers/i965/brw_pipe_rast.c +++ b/src/gallium/drivers/i965/brw_pipe_rast.c @@ -64,3 +64,21 @@ calculate_line_stipple_rast() bls.bits1.inverse_repeat_count = tmpi; } + + + +static void +calculate_wm_lookup() +{ + if (rast->fill_cw == PIPE_POLYGON_MODE_LINE && + rast->fill_ccw == PIPE_POLYGON_MODE_LINE) { + line_aa = AA_ALWAYS; + } + else if (rast->fill_cw == PIPE_POLYGON_MODE_LINE || + rast->fill_ccw == PIPE_POLYGON_MODE_LINE) { + line_aa = AA_SOMETIMES; + } + else { + line_aa = AA_NEVER; + } +} diff --git a/src/gallium/drivers/i965/brw_pipe_rast.h b/src/gallium/drivers/i965/brw_pipe_rast.h index 800a9208a7..9354f01e18 100644 --- a/src/gallium/drivers/i965/brw_pipe_rast.h +++ b/src/gallium/drivers/i965/brw_pipe_rast.h @@ -10,6 +10,7 @@ struct brw_rasterizer_state { */ struct brw_clip_prog_key clip_key; struct brw_line_stipple bls; + unsigned unfilled_aa_line; }; #endif diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 8b61da763c..6e37eac634 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -39,7 +39,7 @@ * as flow conditionals, loops, subroutines. * Some GLSL shaders may use these features, others might not. */ -GLboolean brw_wm_is_glsl(const struct brw_fragment_shader *fp) +GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp) { return (fp->info.insn_count[TGSI_OPCODE_ARL] > 0 || fp->info.insn_count[TGSI_OPCODE_IF] > 0 || @@ -144,7 +144,7 @@ static void brwProgramStringNotify( struct brw_context *brw, if (newFP == curFP) brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; - newFP->isGLSL = brw_wm_is_glsl(fprog); + newFP->has_flow_control = brw_wm_has_flow_control(fprog); } else if (target == GL_VERTEX_PROGRAM_ARB) { struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index eafd8ddf77..efa27db1e0 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -64,6 +64,13 @@ struct brw_buffer boolean is_user_buffer; }; +struct brw_texture +{ + struct pipe_texture base; + + ubyte shader_swizzle; +}; + /* * Cast wrappers diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 6809bccdec..bcc5c5f713 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -1013,8 +1013,6 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, src->SrcRegister.SwizzleZ, src->SrcRegister.SwizzleW); - /* Note this is ok for non-swizzle instructions: - */ reg.negate = src->SrcRegister.Negate ? 1 : 0; /* XXX: abs, absneg diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index f0dabfcfd0..33602b59c1 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -28,14 +28,17 @@ * Authors: * Keith Whitwell */ +#include "pipe/p_error.h" #include "tgsi/tgsi_info.h" #include "brw_context.h" +#include "brw_screen.h" #include "brw_util.h" #include "brw_wm.h" #include "brw_state.h" #include "brw_debug.h" +#include "brw_pipe_rast.h" /** Return number of src args for given instruction */ @@ -85,12 +88,12 @@ GLuint brw_wm_is_scalar_result( GLuint opcode ) /** - * Do GPU code generation for non-GLSL shader. non-GLSL shaders have - * no flow control instructions so we can more readily do SSA-style - * optimizations. + * Do GPU code generation for shaders without flow control. Shaders + * without flow control instructions can more readily be analysed for + * SSA-style optimizations. */ static void -brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +brw_wm_linear_shader_emit(struct brw_context *brw, struct brw_wm_compile *c) { /* Augment fragment program. Add instructions for pre- and * post-fragment-program tasks such as interpolation and fogging. @@ -136,7 +139,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) * Depending on the instructions used (i.e. flow control instructions) * we'll use one of two code generators. */ -static void do_wm_prog( struct brw_context *brw, +static int do_wm_prog( struct brw_context *brw, struct brw_fragment_shader *fp, struct brw_wm_prog_key *key) { @@ -153,7 +156,7 @@ static void do_wm_prog( struct brw_context *brw, * without triggering a segfault, no way to signal, * so just return. */ - return; + return PIPE_ERROR_OUT_OF_MEMORY; } } else { memset(c, 0, sizeof(*brw->wm.compile_data)); @@ -166,19 +169,19 @@ static void do_wm_prog( struct brw_context *brw, brw_init_compile(brw, &c->func); /* temporary sanity check assertion */ - assert(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); + assert(fp->has_flow_control == brw_wm_has_flow_control(c->fp)); /* * Shader which use GLSL features such as flow control are handled * differently from "simple" shaders. */ - if (fp->isGLSL) { + if (fp->has_flow_control) { c->dispatch_width = 8; - brw_wm_glsl_emit(brw, c); + brw_wm_branching_shader_emit(brw, c); } else { c->dispatch_width = 16; - brw_wm_non_glsl_emit(brw, c); + brw_wm_linear_shader_emit(brw, c); } if (BRW_DEBUG & DEBUG_WM) @@ -195,6 +198,8 @@ static void do_wm_prog( struct brw_context *brw, program, program_size, &c->prog_data, &brw->wm.prog_data ); + + return 0; } @@ -202,71 +207,36 @@ static void do_wm_prog( struct brw_context *brw, static void brw_wm_populate_key( struct brw_context *brw, struct brw_wm_prog_key *key ) { - /* BRW_NEW_FRAGMENT_PROGRAM */ - const struct brw_fragment_program *fp = brw->curr.fragment_shader; - GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; - GLuint lookup = 0; - GLuint line_aa; - GLuint i; + unsigned lookup, line_aa; + unsigned i; memset(key, 0, sizeof(*key)); - /* Build the index for table lookup + /* PIPE_NEW_FRAGMENT_SHADER + * PIPE_NEW_DEPTH_STENCIL_ALPHA */ - /* _NEW_COLOR */ - if (fp->program.UsesKill || - ctx->Color.AlphaEnabled) - lookup |= IZ_PS_KILL_ALPHATEST_BIT; - - if (fp->program.Base.OutputsWritten & (1<Depth.Test) - lookup |= IZ_DEPTH_TEST_ENABLE_BIT; - - if (ctx->Depth.Test && - ctx->Depth.Mask) /* ?? */ - lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + lookup = (brw->curr.zstencil->iz_lookup | + brw->curr.fragment_shader->iz_lookup); - /* _NEW_STENCIL */ - if (ctx->Stencil._Enabled) { - lookup |= IZ_STENCIL_TEST_ENABLE_BIT; - if (ctx->Stencil.WriteMask[0] || - ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) - lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; - } - - line_aa = AA_NEVER; - - /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ - if (ctx->Line.SmoothFlag) { - if (brw->intel.reduced_primitive == GL_LINES) { - line_aa = AA_ALWAYS; - } - else if (brw->intel.reduced_primitive == GL_TRIANGLES) { - if (ctx->Polygon.FrontMode == GL_LINE) { - line_aa = AA_SOMETIMES; - - if (ctx->Polygon.BackMode == GL_LINE || - (ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_BACK)) - line_aa = AA_ALWAYS; - } - else if (ctx->Polygon.BackMode == GL_LINE) { - line_aa = AA_SOMETIMES; - - if ((ctx->Polygon.CullFlag && - ctx->Polygon.CullFaceMode == GL_FRONT)) - line_aa = AA_ALWAYS; - } - } + /* PIPE_NEW_RAST + * BRW_NEW_REDUCED_PRIMITIVE + */ + switch (brw->reduced_primitive) { + case PIPE_PRIM_POINTS: + line_aa = AA_NEVER; + break; + case PIPE_PRIM_LINES: + line_aa = AA_ALWAYS; + break; + default: + line_aa = brw->curr.rast->unfilled_aa_line; + break; } brw_wm_lookup_iz(line_aa, lookup, - uses_depth, + brw->curr.fragment_shader->uses_depth, key); /* Revisit this, figure out if it's really useful, and either push @@ -276,54 +246,39 @@ static void brw_wm_populate_key( struct brw_context *brw, key->proj_attrib_mask = ~0; /*brw->wm.input_size_masks[4-1];*/ /* PIPE_NEW_RAST */ - key->flat_shade = brw->rast.flat_shade; + key->flat_shade = brw->curr.rast->templ.flatshade; /* This can be determined by looking at the INTERP mode each input decl. */ - key->linear_color = 0; - - /* _NEW_TEXTURE */ - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - if (i < brw->nr_textures) { - const struct gl_texture_unit *unit = &ctx->Texture.Unit[i]; - const struct gl_texture_object *t = unit->_Current; - const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; - - if (img->InternalFormat == GL_YCBCR_MESA) { - key->yuvtex_mask |= 1 << i; - if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR) - key->yuvtex_swap_mask |= 1 << i; - } + key->linear_attrib_mask = 0; - key->tex_swizzles[i] = t->_Swizzle; + /* PIPE_NEW_BOUND_TEXTURES */ + for (i = 0; i < brw->curr.num_textures; i++) { + const struct brw_texture *tex = brw->curr.texture[i]; - if (0) - key->shadowtex_mask |= 1<tex_swizzles[i] = SWIZZLE_NOOP; - } - } + if (tex->base.format == PIPE_FORMAT_YCBCR) + key->yuvtex_mask |= 1 << i; + if (tex->base.format == PIPE_FORMAT_YCBCR_REV) + key->yuvtex_swap_mask |= 1 << i; - /* _NEW_FRAMEBUFFER */ - if (brw->intel.driDrawable != NULL) { - key->drawable_height = brw->fb.cbufs[0].height; + /* XXX: shadow texture + */ + /* key->shadowtex_mask |= 1<vp_nr_outputs_written = brw->vs.prog_data->nr_outputs_written; + key->vp_nr_outputs = brw->vs.prog_data->nr_outputs; /* The unique fragment program ID */ - key->program_string_id = fp->id; + key->program_string_id = brw->curr.fragment_shader->id; } -static void brw_prepare_wm_prog(struct brw_context *brw) +static int brw_prepare_wm_prog(struct brw_context *brw) { struct brw_wm_prog_key key; - struct brw_fragment_program *fp = (struct brw_fragment_program *) - brw->fragment_program; + struct brw_fragment_shader *fs = brw->curr.fragment_shader; brw_wm_populate_key(brw, &key); @@ -335,23 +290,19 @@ static void brw_prepare_wm_prog(struct brw_context *brw) NULL, 0, &brw->wm.prog_data); if (brw->wm.prog_bo == NULL) - do_wm_prog(brw, fp, &key); + return do_wm_prog(brw, fs, &key); + + return 0; } const struct brw_tracked_state brw_wm_prog = { .dirty = { - .mesa = (_NEW_COLOR | - _NEW_DEPTH | - _NEW_HINT | - _NEW_STENCIL | - _NEW_POLYGON | - _NEW_LINE | - _NEW_LIGHT | - _NEW_BUFFERS | - _NEW_TEXTURE), - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_WM_INPUT_DIMENSIONS | + .mesa = (PIPE_NEW_FRAGMENT_SHADER | + PIPE_NEW_DEPTH_STENCIL_ALPHA | + PIPE_NEW_RAST | + PIPE_NEW_BOUND_TEXTURES), + .brw = (BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE), .cache = CACHE_NEW_VS_PROG, }, diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 084430cf28..2cd5bb7081 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -33,9 +33,6 @@ #ifndef BRW_WM_H #define BRW_WM_H -#include "tgsi/tgsi_ureg.h" -#include "tgsi/tgsi_ureg_parse.h" - #include "brw_context.h" #include "brw_eu.h" @@ -59,8 +56,8 @@ #define AA_ALWAYS 2 struct brw_wm_prog_key { - unsigned proj_attrib_mask; /**< one bit per fragment program attribute */ - unsigned linear_attrib_mask:1; /**< linear interpolation vs perspective interp */ + unsigned proj_attrib_mask; /**< one bit per fragment program attribute */ + unsigned linear_attrib_mask; /**< linear interpolation vs perspective interp */ GLuint source_depth_reg:3; GLuint aa_dest_stencil_reg:3; @@ -75,11 +72,10 @@ struct brw_wm_prog_key { GLuint yuvtex_mask:16; GLuint yuvtex_swap_mask:16; /* UV swaped */ - GLuint tex_swizzles[BRW_MAX_TEX_UNIT]; - - GLuint program_string_id:32; + GLuint vp_nr_outputs:6; + GLuint nr_cbufs:3; - GLuint vp_nr_outputs_written; + GLuint program_string_id; }; @@ -146,9 +142,8 @@ struct brw_wm_instruction { GLuint opcode:8; GLuint saturate:1; GLuint writemask:4; - GLuint tex_unit:4; /* texture unit for TEX, TXD, TXP instructions */ - GLuint tex_idx:3; /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */ - GLuint tex_shadow:1; /* do shadow comparison? */ + GLuint tex_unit:4; /* texture/sampler unit for texture instructions */ + GLuint tex_target:4; /* TGSI_TEXTURE_x for texture instructions*/ GLuint eot:1; /* End of thread indicator for FB_WRITE*/ GLuint target:10; /* target binding table index for FB_WRITE*/ }; @@ -180,15 +175,17 @@ struct brw_wm_instruction { #define WM_FRONTFACING (MAX_OPCODE + 8) #define MAX_WM_OPCODE (MAX_OPCODE + 9) -#define PROGRAM_PAYLOAD (TGSI_FILE_COUNT) -#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) +#define BRW_FILE_PAYLOAD (TGSI_FILE_COUNT) +#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) /* ?? */ + +struct brw_passfp_program; struct brw_wm_compile { struct brw_compile func; struct brw_wm_prog_key key; struct brw_wm_prog_data prog_data; - struct brw_fragment_program *fp; + struct brw_fragment_shader *fp; GLfloat (*env_param)[4]; @@ -201,15 +198,7 @@ struct brw_wm_compile { * simplifying and adding instructions for interpolation and * framebuffer writes. */ - struct ureg_instruction prog_instructions[BRW_WM_MAX_INSN]; - GLuint nr_fp_insns; - GLuint fp_temp; - GLuint fp_interp_emitted; - GLuint fp_fragcolor_emitted; - - struct ureg_src pixel_xy; - struct ureg_src delta_xy; - struct ureg_src pixel_w; + struct brw_passfp_program *pass_fp; struct brw_wm_value vreg[BRW_WM_MAX_VREG]; @@ -298,8 +287,8 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean ps_uses_depth, struct brw_wm_prog_key *key ); -//GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); +GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp); +void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c); void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c index 04dec5ba39..65d7626eea 100644 --- a/src/gallium/drivers/i965/brw_wm_debug.c +++ b/src/gallium/drivers/i965/brw_wm_debug.c @@ -28,7 +28,8 @@ * Authors: * Keith Whitwell */ - + +#include "tgsi/tgsi_info.h" #include "brw_context.h" #include "brw_wm.h" @@ -49,10 +50,10 @@ void brw_wm_print_value( struct brw_wm_compile *c, value - c->creg < BRW_WM_MAX_PARAM) debug_printf("c%d", value - c->creg); else if (value - c->payload.input_interp >= 0 && - value - c->payload.input_interp < FRAG_ATTRIB_MAX) + value - c->payload.input_interp < PIPE_MAX_SHADER_INPUTS) debug_printf("i%d", value - c->payload.input_interp); else if (value - c->payload.depth >= 0 && - value - c->payload.depth < FRAG_ATTRIB_MAX) + value - c->payload.depth < PIPE_MAX_SHADER_INPUTS) debug_printf("d%d", value - c->payload.depth); else debug_printf("?"); @@ -100,10 +101,10 @@ void brw_wm_print_insn( struct brw_wm_compile *c, if (inst->writemask != BRW_WRITEMASK_XYZW) debug_printf(".%s%s%s%s", - GET_BIT(inst->writemask, 0) ? "x" : "", - GET_BIT(inst->writemask, 1) ? "y" : "", - GET_BIT(inst->writemask, 2) ? "z" : "", - GET_BIT(inst->writemask, 3) ? "w" : ""); + (inst->writemask & BRW_WRITEMASK_X) ? "x" : "", + (inst->writemask & BRW_WRITEMASK_Y) ? "y" : "", + (inst->writemask & BRW_WRITEMASK_Z) ? "z" : "", + (inst->writemask & BRW_WRITEMASK_W) ? "w" : ""); switch (inst->opcode) { case WM_PIXELXY: @@ -134,7 +135,7 @@ void brw_wm_print_insn( struct brw_wm_compile *c, debug_printf(" = FRONTFACING"); break; default: - debug_printf(" = %s", _mesa_opcode_string(inst->opcode)); + debug_printf(" = %s", tgsi_get_opcode_info(inst->opcode)->mnemonic); break; } diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 5f7ae6592c..a705d8b344 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -28,10 +28,13 @@ * Authors: * Keith Whitwell */ - + +#include "util/u_math.h" +#include "tgsi/tgsi_info.h" #include "brw_context.h" #include "brw_wm.h" +#include "brw_debug.h" /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. @@ -45,15 +48,15 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg ) /* Payload R0: * - * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles, + * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 quads, * corresponding to each of the 16 execution channels. * R0.1..8 -- ? * R1.0 -- triangle vertex 0.X * R1.1 -- triangle vertex 0.Y - * R1.2 -- tile 0 x,y coords (2 packed uwords) - * R1.3 -- tile 1 x,y coords (2 packed uwords) - * R1.4 -- tile 2 x,y coords (2 packed uwords) - * R1.5 -- tile 3 x,y coords (2 packed uwords) + * R1.2 -- quad 0 x,y coords (2 packed uwords) + * R1.3 -- quad 1 x,y coords (2 packed uwords) + * R1.4 -- quad 2 x,y coords (2 packed uwords) + * R1.5 -- quad 3 x,y coords (2 packed uwords) * R1.6 -- ? * R1.7 -- ? * R1.8 -- ? @@ -134,11 +137,17 @@ static void emit_wpos_xy(struct brw_wm_compile *c, /* XXX: is this needed any more, or is this a NOOP? */ if (mask & BRW_WRITEMASK_Y) { +#if 0 /* Y' = height - 1 - Y */ brw_ADD(p, dst[1], negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), brw_imm_d(c->key.drawable_height - 1)); +#else + brw_MOV(p, + dst[0], + retype(arg0[0], BRW_REGISTER_TYPE_W)); +#endif } } @@ -279,28 +288,28 @@ static void emit_frontfacing( struct brw_compile *p, /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input * looking like: * - * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br + * arg0: q0.tl q0.tr q0.bl q0.br q1.tl q1.tr q1.bl q1.br * * and we're trying to produce: * * DDX DDY - * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) - * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) - * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) - * (ss0.br - ss0.bl) (ss0.tr - ss0.br) - * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) - * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) - * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) - * (ss1.br - ss1.bl) (ss1.tr - ss1.br) + * dst: (q0.tr - q0.tl) (q0.tl - q0.bl) + * (q0.tr - q0.tl) (q0.tr - q0.br) + * (q0.br - q0.bl) (q0.tl - q0.bl) + * (q0.br - q0.bl) (q0.tr - q0.br) + * (q1.tr - q1.tl) (q1.tl - q1.bl) + * (q1.tr - q1.tl) (q1.tr - q1.br) + * (q1.br - q1.bl) (q1.tl - q1.bl) + * (q1.br - q1.bl) (q1.tr - q1.br) * - * and add another set of two more subspans if in 16-pixel dispatch mode. + * and add two more quads if in 16-pixel dispatch mode. * * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result * for each pair, and vertstride = 2 jumps us 2 elements after processing a * pair. But for DDY, it's harder, as we want to produce the pairs swizzled * between each other. We could probably do it like ddx and swizzle the right * order later, but bail for now and just produce - * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) + * ((q0.tl - q0.bl)x4 (q1.tl - q1.bl)x4) */ void emit_ddxy(struct brw_compile *p, const struct brw_reg *dst, @@ -611,12 +620,12 @@ static void emit_dp3( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); @@ -633,12 +642,12 @@ static void emit_dp4( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); @@ -656,12 +665,12 @@ static void emit_dph( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1 ) { - const int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; + const int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]); brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]); @@ -704,12 +713,12 @@ static void emit_math1( struct brw_compile *p, GLuint mask, const struct brw_reg *arg0 ) { - int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_MOV(p, brw_message_reg(2), arg0[0]); @@ -732,12 +741,12 @@ static void emit_math2( struct brw_compile *p, const struct brw_reg *arg0, const struct brw_reg *arg1) { - int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1; if (!(mask & BRW_WRITEMASK_XYZW)) return; /* Do not emit dead code */ - assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW)); + assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW)); brw_push_insn_state(p); @@ -790,21 +799,32 @@ static void emit_tex( struct brw_wm_compile *c, GLuint i, nr; GLuint emit; GLuint msg_type; + GLboolean shadow = FALSE; /* How many input regs are there? */ - switch (inst->tex_idx) { - case TEXTURE_1D_INDEX: + switch (inst->tex_target) { + case TGSI_TEXTURE_1D: emit = BRW_WRITEMASK_X; nr = 1; break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: + case TGSI_TEXTURE_SHADOW1D: + emit = BRW_WRITEMASK_XW; + nr = 4; + shadow = TRUE; + break; + case TGSI_TEXTURE_2D: emit = BRW_WRITEMASK_XY; nr = 2; break; - case TEXTURE_3D_INDEX: - case TEXTURE_CUBE_INDEX: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + emit = BRW_WRITEMASK_XYW; + nr = 4; + shadow = TRUE; + break; + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: emit = BRW_WRITEMASK_XYZ; nr = 3; break; @@ -813,11 +833,6 @@ static void emit_tex( struct brw_wm_compile *c, abort(); } - if (inst->tex_shadow) { - nr = 4; - emit |= BRW_WRITEMASK_W; - } - msgLength = 1; for (i = 0; i < nr; i++) { @@ -832,12 +847,12 @@ static void emit_tex( struct brw_wm_compile *c, responseLength = 8; /* always */ if (BRW_IS_IGDNG(p->brw)) { - if (inst->tex_shadow) + if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG; } else { - if (inst->tex_shadow) + if (shadow) msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; else msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; @@ -870,20 +885,23 @@ static void emit_txb( struct brw_wm_compile *c, GLuint msg_type; /* Shadow ignored for txb. */ - switch (inst->tex_idx) { - case TEXTURE_1D_INDEX: + switch (inst->tex_target) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; - case TEXTURE_2D_INDEX: - case TEXTURE_RECT_INDEX: + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; - case TEXTURE_3D_INDEX: - case TEXTURE_CUBE_INDEX: + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: brw_MOV(p, brw_message_reg(2), arg[0]); brw_MOV(p, brw_message_reg(4), arg[1]); brw_MOV(p, brw_message_reg(6), arg[2]); @@ -976,10 +994,10 @@ static void emit_kil( struct brw_wm_compile *c, } } -/* KIL_NV kills the pixels that are currently executing, not based on a test +/* KILLP kills the pixels that are currently executing, not based on a test * of the arguments. */ -static void emit_kil_nv( struct brw_wm_compile *c ) +static void emit_killp( struct brw_wm_compile *c ) { struct brw_compile *p = &c->func; struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); @@ -1259,7 +1277,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) */ spill_values(c, c->payload.depth, 4); spill_values(c, c->creg, c->nr_creg); - spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX); + spill_values(c, c->payload.input_interp, PIPE_MAX_SHADER_INPUTS); for (insn = 0; insn < c->nr_insns; insn++) { @@ -1328,89 +1346,89 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Straightforward arithmetic: */ - case OPCODE_ADD: + case TGSI_OPCODE_ADD: emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]); break; - case OPCODE_FRC: + case TGSI_OPCODE_FRC: emit_alu1(p, brw_FRC, dst, dst_flags, args[0]); break; - case OPCODE_FLR: + case TGSI_OPCODE_FLR: emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]); break; - case OPCODE_DDX: + case TGSI_OPCODE_DDX: emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]); break; - case OPCODE_DDY: + case TGSI_OPCODE_DDY: emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]); break; - case OPCODE_DP3: + case TGSI_OPCODE_DP3: emit_dp3(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_DP4: + case TGSI_OPCODE_DP4: emit_dp4(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_DPH: + case TGSI_OPCODE_DPH: emit_dph(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_TRUNC: + case TGSI_OPCODE_TRUNC: emit_trunc(p, dst, dst_flags, args[0]); break; - case OPCODE_LRP: + case TGSI_OPCODE_LRP: emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]); break; - case OPCODE_MAD: + case TGSI_OPCODE_MAD: emit_mad(p, dst, dst_flags, args[0], args[1], args[2]); break; - case OPCODE_MOV: + case TGSI_OPCODE_MOV: emit_alu1(p, brw_MOV, dst, dst_flags, args[0]); break; - case OPCODE_MUL: + case TGSI_OPCODE_MUL: emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]); break; - case OPCODE_XPD: + case TGSI_OPCODE_XPD: emit_xpd(p, dst, dst_flags, args[0], args[1]); break; /* Higher math functions: */ - case OPCODE_RCP: + case TGSI_OPCODE_RCP: emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]); break; - case OPCODE_RSQ: + case TGSI_OPCODE_RSQ: emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]); break; - case OPCODE_SIN: + case TGSI_OPCODE_SIN: emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]); break; - case OPCODE_COS: + case TGSI_OPCODE_COS: emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]); break; - case OPCODE_EX2: + case TGSI_OPCODE_EX2: emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]); break; - case OPCODE_LG2: + case TGSI_OPCODE_LG2: emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]); break; - case OPCODE_SCS: + case TGSI_OPCODE_SCS: /* There is an scs math function, but it would need some * fixup for 16-element execution. */ @@ -1420,71 +1438,70 @@ void brw_wm_emit( struct brw_wm_compile *c ) emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]); break; - case OPCODE_POW: + case TGSI_OPCODE_POW: emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]); break; /* Comparisons: */ - case OPCODE_CMP: + case TGSI_OPCODE_CMP: emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]); break; - case OPCODE_MAX: + case TGSI_OPCODE_MAX: emit_max(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_MIN: + case TGSI_OPCODE_MIN: emit_min(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_SLT: + case TGSI_OPCODE_SLT: emit_slt(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_SLE: + case TGSI_OPCODE_SLE: emit_sle(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_SGT: + case TGSI_OPCODE_SGT: emit_sgt(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_SGE: + case TGSI_OPCODE_SGE: emit_sge(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_SEQ: + case TGSI_OPCODE_SEQ: emit_seq(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_SNE: + case TGSI_OPCODE_SNE: emit_sne(p, dst, dst_flags, args[0], args[1]); break; - case OPCODE_LIT: + case TGSI_OPCODE_LIT: emit_lit(p, dst, dst_flags, args[0]); break; /* Texturing operations: */ - case OPCODE_TEX: + case TGSI_OPCODE_TEX: emit_tex(c, inst, dst, dst_flags, args[0]); break; - case OPCODE_TXB: + case TGSI_OPCODE_TXB: emit_txb(c, inst, dst, dst_flags, args[0]); break; - case OPCODE_KIL: + case TGSI_OPCODE_KIL: emit_kil(c, args[0]); break; - case OPCODE_KIL_NV: - emit_kil_nv(c); + case TGSI_OPCODE_KILP: + emit_killp(c); break; default: debug_printf("Unsupported opcode %i (%s) in fragment shader\n", - inst->opcode, inst->opcode < MAX_OPCODE ? - _mesa_opcode_string(inst->opcode) : - "unknown"); + inst->opcode, + tgsi_get_opcode_info(inst->opcode)->mnemonic); } for (i = 0; i < 4; i++) diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index d594730730..8ba037cdae 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -30,9 +30,8 @@ */ -#include "pipe/p_shader_constants.h" +#include "pipe/p_shader_tokens.h" -#include "brw_context.h" #include "brw_wm.h" #include "brw_util.h" @@ -43,7 +42,7 @@ #define W 3 -static const char *wm_opcode_strings[] = { +static const char *wm_opcode_strings[] = { "PIXELXY", "DELTAXY", "PIXELW", @@ -57,143 +56,6 @@ static const char *wm_opcode_strings[] = { -/*********************************************************************** - * Source regs - */ - -static struct prog_src_register src_reg(GLuint file, GLuint idx) -{ - struct prog_src_register reg; - reg.File = file; - reg.Index = idx; - reg.Swizzle = SWIZZLE_NOOP; - reg.RelAddr = 0; - reg.Negate = NEGATE_NONE; - reg.Abs = 0; - return reg; -} - -static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst) -{ - return src_reg(dst.File, dst.Index); -} - -static struct prog_src_register src_undef( void ) -{ - return src_reg(PROGRAM_UNDEFINED, 0); -} - -static GLboolean src_is_undef(struct prog_src_register src) -{ - return src.File == PROGRAM_UNDEFINED; -} - -static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w ) -{ - reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w); - return reg; -} - -static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x ) -{ - return src_swizzle(reg, x, x, x, x); -} - -static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle ) -{ - reg.Swizzle = swizzle; - return reg; -} - - -/*********************************************************************** - * Dest regs - */ - -static struct prog_dst_register dst_reg(GLuint file, GLuint idx) -{ - struct prog_dst_register reg; - reg.File = file; - reg.Index = idx; - reg.WriteMask = BRW_WRITEMASK_XYZW; - reg.RelAddr = 0; - reg.CondMask = COND_TR; - reg.CondSwizzle = 0; - reg.CondSrc = 0; - reg.pad = 0; - return reg; -} - -static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask ) -{ - reg.WriteMask &= mask; - return reg; -} - -static struct prog_dst_register dst_undef( void ) -{ - return dst_reg(PROGRAM_UNDEFINED, 0); -} - - - -static struct prog_dst_register get_temp( struct brw_wm_compile *c ) -{ - int bit = _mesa_ffs( ~c->fp_temp ); - - if (!bit) { - debug_printf("%s: out of temporaries\n", __FILE__); - exit(1); - } - - c->fp_temp |= 1<<(bit-1); - return dst_reg(PROGRAM_TEMPORARY, c->first_internal_temp+(bit-1)); -} - - -static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp ) -{ - c->fp_temp &= ~(1 << (temp.Index - c->first_internal_temp)); -} - - -/*********************************************************************** - * Instructions - */ - -static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c) -{ - return &c->prog_instructions[c->nr_fp_insns++]; -} - -static struct prog_instruction *emit_insn(struct brw_wm_compile *c, - const struct prog_instruction *inst0) -{ - struct prog_instruction *inst = get_fp_inst(c); - *inst = *inst0; - return inst; -} - -static struct prog_instruction * emit_op(struct brw_wm_compile *c, - GLuint op, - struct prog_dst_register dest, - GLuint saturate, - struct prog_src_register src0, - struct prog_src_register src1, - struct prog_src_register src2 ) -{ - struct prog_instruction *inst = get_fp_inst(c); - - memset(inst, 0, sizeof(*inst)); - - inst->Opcode = op; - inst->DstReg = dest; - inst->SaturateMode = saturate; - inst->SrcReg[0] = src0; - inst->SrcReg[1] = src1; - inst->SrcReg[2] = src2; - return inst; -} /* Many opcodes produce the same value across all the result channels. @@ -202,32 +64,28 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c, * anyway. We can easily get both by emitting the opcode to one channel, and * then MOVing it to the others, which brw_wm_pass*.c already understands. */ -static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, - const struct prog_instruction *inst0) -{ - struct prog_instruction *inst; - unsigned int dst_chan; - unsigned int other_channel_mask; - - if (inst0->DstReg.WriteMask == 0) - return NULL; - - dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1; - inst = get_fp_inst(c); - *inst = *inst0; - inst->DstReg.WriteMask = 1 << dst_chan; - - other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan); - if (other_channel_mask != 0) { - inst = emit_op(c, - TGSI_OPCODE_MOV, - dst_mask(inst0->DstReg, other_channel_mask), - 0, - src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan), - src_undef(), - src_undef()); +static void emit_scalar_insn(struct brw_wm_compile *c, + unsigned opcode, + struct brw_dst dst, + struct brw_src src0, + struct brw_src src1, + struct brw_src src2 ) +{ + unsigned first_chan = ffs(dst.writemask) - 1; + unsigned first_mask = 1 << first_chan; + + if (dst.writemask == 0) + return; + + emit_op( c, opcode, + brw_writemask(dst, first_mask), + src0, src1, src2 ); + + if (dst.writemask != first_mask) { + emit_op1(c, TGSI_OPCODE_MOV, + brw_writemask(dst, ~first_mask), + src_swizzle1(brw_src(dst), first_chan)); } - return inst; } @@ -235,11 +93,11 @@ static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c, * Special instructions for interpolation and other tasks */ -static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) +static struct ureg_src get_pixel_xy( struct brw_wm_compile *c ) { if (src_is_undef(c->pixel_xy)) { - struct prog_dst_register pixel_xy = get_temp(c); - struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); + struct ureg_dst pixel_xy = get_temp(c); + struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); /* Emit the out calculations, and hold onto the results. Use @@ -250,7 +108,6 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) emit_op(c, WM_PIXELXY, dst_mask(pixel_xy, BRW_WRITEMASK_XY), - 0, payload_r0_depth, src_undef(), src_undef()); @@ -261,19 +118,18 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c ) return c->pixel_xy; } -static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) +static struct ureg_src get_delta_xy( struct brw_wm_compile *c ) { if (src_is_undef(c->delta_xy)) { - struct prog_dst_register delta_xy = get_temp(c); - struct prog_src_register pixel_xy = get_pixel_xy(c); - struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); + struct ureg_dst delta_xy = get_temp(c); + struct ureg_src pixel_xy = get_pixel_xy(c); + struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); /* deltas.xy = DELTAXY pixel_xy, payload[0] */ emit_op(c, WM_DELTAXY, dst_mask(delta_xy, BRW_WRITEMASK_XY), - 0, pixel_xy, payload_r0_depth, src_undef()); @@ -284,19 +140,18 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c ) return c->delta_xy; } -static struct prog_src_register get_pixel_w( struct brw_wm_compile *c ) +static struct ureg_src get_pixel_w( struct brw_wm_compile *c ) { if (src_is_undef(c->pixel_w)) { - struct prog_dst_register pixel_w = get_temp(c); - struct prog_src_register deltas = get_delta_xy(c); - struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS); + struct ureg_dst pixel_w = get_temp(c); + struct ureg_src deltas = get_delta_xy(c); + struct ureg_src interp_wpos = src_reg(TGSI_FILE_PAYLOAD, FRAG_ATTRIB_WPOS); /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x */ emit_op(c, WM_PIXELW, dst_mask(pixel_w, BRW_WRITEMASK_W), - 0, interp_wpos, deltas, src_undef()); @@ -313,9 +168,9 @@ static void emit_interp( struct brw_wm_compile *c, GLuint semantic_index, GLuint interp_mode ) { - struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); - struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); - struct prog_src_register deltas = get_delta_xy(c); + struct ureg_dst dst = dst_reg(TGSI_FILE_INPUT, idx); + struct ureg_src interp = src_reg(TGSI_FILE_PAYLOAD, idx); + struct ureg_src deltas = get_delta_xy(c); /* Need to use PINTERP on attributes which have been * multiplied by 1/W in the SF program, and LINTERP on those @@ -325,271 +180,197 @@ static void emit_interp( struct brw_wm_compile *c, case FRAG_ATTRIB_WPOS: /* Have to treat wpos.xy specially: */ - emit_op(c, + emit_op1(c, WM_WPOSXY, dst_mask(dst, BRW_WRITEMASK_XY), - 0, - get_pixel_xy(c), - src_undef(), - src_undef()); + get_pixel_xy(c)); - dst = dst_mask(dst, BRW_WRITEMASK_ZW); - - /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw + /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw */ - emit_op(c, - WM_LINTERP, - dst, - 0, - interp, - deltas, - src_undef()); + emit_op2(c, + WM_LINTERP, + dst_mask(dst, BRW_WRITEMASK_ZW), + interp, + deltas); break; case TGSI_SEMANTIC_COLOR: if (c->key.flat_shade) { - emit_op(c, + emit_op1(c, WM_CINTERP, dst, - 0, - interp, - src_undef(), - src_undef()); + interp); + } + else if (interp_mode == TGSI_INTERPOLATE_LINEAR) { + emit_op2(c, + WM_LINTERP, + dst, + interp, + deltas); } else { - emit_op(c, - translate_interp_mode(interp_mode), - dst, - 0, - interp, - deltas, - src_undef()); + emit_op3(c, + WM_PINTERP, + dst, + interp, + deltas, + get_pixel_w(c)); } + break; case FRAG_ATTRIB_FOGC: /* Interpolate the fog coordinate */ - emit_op(c, + emit_op3(c, WM_PINTERP, dst_mask(dst, BRW_WRITEMASK_X), - 0, interp, deltas, get_pixel_w(c)); - emit_op(c, + emit_op1(c, TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_YZW), - 0, - src_swizzle(interp, - SWIZZLE_ZERO, - SWIZZLE_ZERO, - SWIZZLE_ZERO, - SWIZZLE_ONE), - src_undef(), - src_undef()); + dst_mask(dst, BRW_WRITEMASK_YZ), + brw_imm1f(0.0)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + brw_imm1f(1.0)); break; case FRAG_ATTRIB_FACE: /* XXX review/test this case */ - emit_op(c, - WM_FRONTFACING, - dst_mask(dst, BRW_WRITEMASK_X), - 0, - src_undef(), - src_undef(), - src_undef()); + emit_op0(c, + WM_FRONTFACING, + dst_mask(dst, BRW_WRITEMASK_X)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_YZ), + brw_imm1f(0.0)); + + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + brw_imm1f(1.0)); break; case FRAG_ATTRIB_PNTC: /* XXX review/test this case */ - emit_op(c, - WM_PINTERP, - dst_mask(dst, BRW_WRITEMASK_XY), - 0, - interp, - deltas, - get_pixel_w(c)); - - emit_op(c, + emit_op3(c, + WM_PINTERP, + dst_mask(dst, BRW_WRITEMASK_XY), + interp, + deltas, + get_pixel_w(c)); + + emit_op1(c, TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_ZW), - 0, - src_swizzle(interp, - SWIZZLE_ZERO, - SWIZZLE_ZERO, - SWIZZLE_ZERO, - SWIZZLE_ONE), - src_undef(), - src_undef()); - break; + dst_mask(dst, BRW_WRITEMASK_Z), + brw_imm1f(c->pass_fp, 0.0f)); - default: - emit_op(c, - translate_interp_mode(interp_mode), - dst, - 0, - interp, - deltas, - get_pixel_w(c)); + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + brw_imm1f(c->pass_fp, 1.0f)); break; - } -} - -/*********************************************************************** - * Hacks to extend the program parameter and constant lists. - */ - -/* Add the fog parameters to the parameter list of the original - * program, rather than creating a new list. Doesn't really do any - * harm and it's not as if the parameter handling isn't a big hack - * anyway. - */ -static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, - GLint s0, - GLint s1, - GLint s2, - GLint s3, - GLint s4) -{ - struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; - gl_state_index tokens[STATE_LENGTH]; - GLuint idx; - tokens[0] = s0; - tokens[1] = s1; - tokens[2] = s2; - tokens[3] = s3; - tokens[4] = s4; - - for (idx = 0; idx < paramList->NumParameters; idx++) { - if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR && - memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0) - return src_reg(PROGRAM_STATE_VAR, idx); - } - - idx = _mesa_add_state_reference( paramList, tokens ); - - return src_reg(PROGRAM_STATE_VAR, idx); -} + default: + switch (interp_mode) { + case TGSI_INTERPOLATE_CONSTANT: + emit_op1(c, + WM_CINTERP, + dst, + interp); + break; -static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, - GLfloat s0, - GLfloat s1, - GLfloat s2, - GLfloat s3) -{ - struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters; - GLfloat values[4]; - GLuint idx; - GLuint swizzle; - - values[0] = s0; - values[1] = s1; - values[2] = s2; - values[3] = s3; - - /* Have to search, otherwise multiple compilations will each grow - * the parameter list. - */ - for (idx = 0; idx < paramList->NumParameters; idx++) { - if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT && - memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0) + case TGSI_INTERPOLATE_LINEAR: + emit_op2(c, + WM_LINTERP, + dst, + interp, + deltas); + break; - /* XXX: this mimics the mesa bug which puts all constants and - * parameters into the "PROGRAM_STATE_VAR" category: - */ - return src_reg(PROGRAM_STATE_VAR, idx); + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_op3(c, + WM_PINTERP, + dst, + interp, + deltas, + get_pixel_w(c)); + break; + } + break; } - - idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle ); - assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */ - return src_reg(PROGRAM_STATE_VAR, idx); } - /*********************************************************************** * Expand various instructions here to simpler forms. */ static void precalc_dst( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + struct brw_dst dst, + struct brw_src src0, + struct brw_src src1 ) { - struct prog_src_register src0 = inst->SrcReg[0]; - struct prog_src_register src1 = inst->SrcReg[1]; - struct prog_dst_register dst = inst->DstReg; - if (dst.WriteMask & BRW_WRITEMASK_Y) { /* dst.y = mul src0.y, src1.y */ - emit_op(c, - TGSI_OPCODE_MUL, - dst_mask(dst, BRW_WRITEMASK_Y), - inst->SaturateMode, - src0, - src1, - src_undef()); + emit_op2(c, + TGSI_OPCODE_MUL, + dst_mask(dst, BRW_WRITEMASK_Y), + src0, + src1); } if (dst.WriteMask & BRW_WRITEMASK_XZ) { struct prog_instruction *swz; GLuint z = GET_SWZ(src0.Swizzle, Z); - /* dst.xz = swz src0.1zzz + /* dst.z = mov src0.zzzz + */ + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_Z), + src_swizzle1(src0, Z)); + + /* dst.x = immf(1.0) */ - swz = emit_op(c, - TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_XZ), - inst->SaturateMode, - src_swizzle(src0, SWIZZLE_ONE, z, z, z), - src_undef(), - src_undef()); - /* Avoid letting negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].Negate &= ~NEGATE_X; + emit_op1(c, + TGSI_OPCODE_MOV, + brw_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0), + src_immf(c, 1.0)); } if (dst.WriteMask & BRW_WRITEMASK_W) { /* dst.w = mov src1.w */ - emit_op(c, - TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_W), - inst->SaturateMode, - src1, - src_undef(), - src_undef()); + emit_op1(c, + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + src1); } } static void precalc_lit( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + struct ureg_dst dst, + struct ureg_src src0 ) { - struct prog_src_register src0 = inst->SrcReg[0]; - struct prog_dst_register dst = inst->DstReg; - if (dst.WriteMask & BRW_WRITEMASK_XW) { - struct prog_instruction *swz; - - /* dst.xw = swz src0.1111 + /* dst.xw = imm(1.0f) */ - swz = emit_op(c, - TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_XW), - 0, - src_swizzle1(src0, SWIZZLE_ONE), - src_undef(), - src_undef()); - /* Avoid letting the negation flag of src0 affect our 1 constant. */ - swz->SrcReg[0].Negate = NEGATE_NONE; + emit_op1(c, + TGSI_OPCODE_MOV, + brw_saturate(brw_writemask(dst, BRW_WRITEMASK_XW), 0), + brw_imm1f(1.0f)); } if (dst.WriteMask & BRW_WRITEMASK_YZ) { - emit_op(c, - TGSI_OPCODE_LIT, - dst_mask(dst, BRW_WRITEMASK_YZ), - inst->SaturateMode, - src0, - src_undef(), - src_undef()); + emit_op1(c, + TGSI_OPCODE_LIT, + brw_writemask(dst, BRW_WRITEMASK_YZ), + src0); } } @@ -601,99 +382,62 @@ static void precalc_lit( struct brw_wm_compile *c, * instruction itself. */ static void precalc_tex( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + struct brw_dst dst, + unsigned unit, + struct brw_src src0 ) { - struct prog_src_register coord; - struct prog_dst_register tmpcoord; - const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + struct ureg_src coord = src_undef(); + struct ureg_dst tmp = dst_undef(); assert(unit < BRW_MAX_TEX_UNIT); + /* Cubemap: find longest component of coord vector and normalize + * it. + */ if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { - struct prog_instruction *out; - struct prog_dst_register tmp0 = get_temp(c); - struct prog_src_register tmp0src = src_reg_from_dst(tmp0); - struct prog_dst_register tmp1 = get_temp(c); - struct prog_src_register tmp1src = src_reg_from_dst(tmp1); - struct prog_src_register src0 = inst->SrcReg[0]; - - /* find longest component of coord vector and normalize it */ - tmpcoord = get_temp(c); - coord = src_reg_from_dst(tmpcoord); - - /* tmpcoord = src0 (i.e.: coord = src0) */ - out = emit_op(c, TGSI_OPCODE_MOV, - tmpcoord, - 0, - src0, - src_undef(), - src_undef()); - out->SrcReg[0].Negate = NEGATE_NONE; - out->SrcReg[0].Abs = 1; - - /* tmp0 = MAX(coord.X, coord.Y) */ - emit_op(c, TGSI_OPCODE_MAX, - tmp0, - 0, - src_swizzle1(coord, X), - src_swizzle1(coord, Y), - src_undef()); - - /* tmp1 = MAX(tmp0, coord.Z) */ - emit_op(c, TGSI_OPCODE_MAX, - tmp1, - 0, - tmp0src, - src_swizzle1(coord, Z), - src_undef()); - - /* tmp0 = 1 / tmp1 */ - emit_op(c, TGSI_OPCODE_RCP, - dst_mask(tmp0, BRW_WRITEMASK_X), - 0, - tmp1src, - src_undef(), - src_undef()); - - /* tmpCoord = src0 * tmp0 */ - emit_op(c, TGSI_OPCODE_MUL, - tmpcoord, - 0, - src0, - src_swizzle1(tmp0src, SWIZZLE_X), - src_undef()); - - release_temp(c, tmp0); - release_temp(c, tmp1); + struct ureg_src tmpsrc; + + tmp = get_temp(c); + tmpsrc = brw_src(tmpcoord) + + /* tmp = abs(src0) */ + emit_op1(c, + TGSI_OPCODE_MOV, + tmp, + brw_abs(src0)); + + /* tmp.X = MAX(tmp.X, tmp.Y) */ + emit_op2(c, TGSI_OPCODE_MAX, + brw_writemask(tmp, BRW_WRITEMASK_X), + src_swizzle1(tmpsrc, X), + src_swizzle1(tmpsrc, Y)); + + /* tmp.X = MAX(tmp.X, tmp.Z) */ + emit_op2(c, TGSI_OPCODE_MAX, + brw_writemask(tmp, BRW_WRITEMASK_X), + tmpsrc, + src_swizzle1(tmpsrc, Z)); + + /* tmp.X = 1 / tmp.X */ + emit_op1(c, TGSI_OPCODE_RCP, + dst_mask(tmp, BRW_WRITEMASK_X), + tmpsrc); + + /* tmp = src0 * tmp.xxxx */ + emit_op2(c, TGSI_OPCODE_MUL, + tmp, + src0, + src_swizzle1(tmpsrc, SWIZZLE_X)); + + coord = tmpsrc; } else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { - struct prog_src_register scale = - search_or_add_param5( c, - STATE_INTERNAL, - STATE_TEXRECT_SCALE, - unit, - 0,0 ); - - tmpcoord = get_temp(c); - - /* coord.xy = MUL inst->SrcReg[0], { 1/width, 1/height } + /* XXX: need a mechanism for internally generated constants. */ - emit_op(c, - TGSI_OPCODE_MUL, - tmpcoord, - 0, - inst->SrcReg[0], - src_swizzle(scale, - SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_ONE, - SWIZZLE_ONE), - src_undef()); - - coord = src_reg_from_dst(tmpcoord); + coord = src0; } else { - coord = inst->SrcReg[0]; + coord = src0; } /* Need to emit YUV texture conversions by hand. Probably need to @@ -704,58 +448,36 @@ static void precalc_tex( struct brw_wm_compile *c, if (c->key.yuvtex_mask & (1 << unit)) { /* convert ycbcr to RGBA */ GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<DstReg; - struct prog_dst_register tmp = get_temp(c); - struct prog_src_register tmpsrc = src_reg_from_dst(tmp); - struct prog_src_register C0 = search_or_add_const4f( c, -.5, -.0625, -.5, 1.164 ); - struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 ); + struct ureg_dst dst = inst->DstReg; + struct ureg_dst tmp = get_temp(c); + struct ureg_src tmpsrc = src_reg_from_dst(tmp); + struct ureg_src C0 = ureg_imm4f( c->ureg, -.5, -.0625, -.5, 1.164 ); + struct ureg_src C1 = ureg_imm4f( c->ureg, 1.596, -0.813, 2.018, -.391 ); /* tmp = TEX ... */ emit_tex_op(c, TGSI_OPCODE_TEX, - tmp, - inst->SaturateMode, + brw_saturate(tmp, dst.Saturate), unit, inst->TexSrcTarget, - inst->TexShadow, coord, src_undef(), src_undef()); /* tmp.xyz = ADD TMP, C0 */ - emit_op(c, - TGSI_OPCODE_ADD, - dst_mask(tmp, BRW_WRITEMASK_XYZ), - 0, - tmpsrc, - C0, - src_undef()); + emit_op2(c, TGSI_OPCODE_ADD, + dst_mask(tmp, BRW_WRITEMASK_XYZ), + tmpsrc, + C0); /* YUV.y = MUL YUV.y, C0.w */ - - emit_op(c, - TGSI_OPCODE_MUL, - dst_mask(tmp, BRW_WRITEMASK_Y), - 0, - tmpsrc, - src_swizzle1(C0, W), - src_undef()); + emit_op2(c, TGSI_OPCODE_MUL, + dst_mask(tmp, BRW_WRITEMASK_Y), + tmpsrc, + src_swizzle1(C0, W)); /* * if (UV swaped) @@ -764,23 +486,22 @@ static void precalc_tex( struct brw_wm_compile *c, * RGB.xyz = MAD YUV.xxz, C1, YUV.y */ - emit_op(c, - TGSI_OPCODE_MAD, - dst_mask(dst, BRW_WRITEMASK_XYZ), - 0, - swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z), - C1, - src_swizzle1(tmpsrc, Y)); + emit_op3(c, TGSI_OPCODE_MAD, + dst_mask(dst, BRW_WRITEMASK_XYZ), + ( swap_uv ? + src_swizzle(tmpsrc, Z,Z,X,X) : + src_swizzle(tmpsrc, X,X,Z,Z)), + C1, + src_swizzle1(tmpsrc, Y)); /* RGB.y = MAD YUV.z, C1.w, RGB.y */ - emit_op(c, - TGSI_OPCODE_MAD, - dst_mask(dst, BRW_WRITEMASK_Y), - 0, - src_swizzle1(tmpsrc, Z), - src_swizzle1(C1, W), - src_swizzle1(src_reg_from_dst(dst), Y)); + emit_op3(c, + TGSI_OPCODE_MAD, + dst_mask(dst, BRW_WRITEMASK_Y), + src_swizzle1(tmpsrc, Z), + src_swizzle1(C1, W), + src_swizzle1(src_reg_from_dst(dst), Y)); release_temp(c, tmp); } @@ -789,29 +510,20 @@ static void precalc_tex( struct brw_wm_compile *c, emit_tex_op(c, TGSI_OPCODE_TEX, inst->DstReg, - inst->SaturateMode, unit, inst->TexSrcTarget, - inst->TexShadow, coord, src_undef(), src_undef()); } - /* For GL_EXT_texture_swizzle: */ - if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) { - /* swizzle the result of the TEX instruction */ - struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg); - emit_op(c, TGSI_OPCODE_MOV, - inst->DstReg, - SATURATE_OFF, /* saturate already done above */ - src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]), - src_undef(), - src_undef()); - } + /* XXX: add GL_EXT_texture_swizzle support to gallium -- by + * generating shader varients in mesa state tracker. + */ - if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || - (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) + /* Release this temp if we ended up allocating it: + */ + if (!brw_dst_is_undef(tmpcoord)) release_temp(c, tmpcoord); } @@ -822,7 +534,7 @@ static void precalc_tex( struct brw_wm_compile *c, static GLboolean projtex( struct brw_wm_compile *c, const struct prog_instruction *inst ) { - const struct prog_src_register src = inst->SrcReg[0]; + const struct ureg_src src = inst->SrcReg[0]; GLboolean retVal; assert(inst->Opcode == TGSI_OPCODE_TXP); @@ -836,7 +548,7 @@ static GLboolean projtex( struct brw_wm_compile *c, */ if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) retVal = GL_FALSE; /* ut2004 gun rendering !?! */ - else if (src.File == PROGRAM_INPUT && + else if (src.File == TGSI_FILE_INPUT && GET_SWZ(src.Swizzle, W) == W && (c->key.proj_attrib_mask & (1 << src.Index)) == 0) retVal = GL_FALSE; @@ -853,10 +565,10 @@ static GLboolean projtex( struct brw_wm_compile *c, static void precalc_txp( struct brw_wm_compile *c, const struct prog_instruction *inst ) { - struct prog_src_register src0 = inst->SrcReg[0]; + struct ureg_src src0 = inst->SrcReg[0]; if (projtex(c, inst)) { - struct prog_dst_register tmp = get_temp(c); + struct ureg_dst tmp = get_temp(c); struct prog_instruction tmp_inst; /* tmp0.w = RCP inst.arg[0][3] @@ -864,7 +576,6 @@ static void precalc_txp( struct brw_wm_compile *c, emit_op(c, TGSI_OPCODE_RCP, dst_mask(tmp, BRW_WRITEMASK_W), - 0, src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), src_undef(), src_undef()); @@ -874,7 +585,6 @@ static void precalc_txp( struct brw_wm_compile *c, emit_op(c, TGSI_OPCODE_MUL, dst_mask(tmp, BRW_WRITEMASK_XYZ), - 0, src0, src_swizzle1(src_reg_from_dst(tmp), W), src_undef()); @@ -899,43 +609,30 @@ static void precalc_txp( struct brw_wm_compile *c, static void emit_fb_write( struct brw_wm_compile *c ) { - struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH); - struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH); - struct prog_src_register outcolor; + struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); + struct ureg_src outdepth = src_reg(TGSI_FILE_OUTPUT, FRAG_RESULT_DEPTH); + struct ureg_src outcolor; + struct prog_instruction *inst; GLuint i; - struct prog_instruction *inst, *last_inst; - struct brw_context *brw = c->func.brw; /* The inst->Aux field is used for FB write target and the EOT marker */ - if (brw->state.nr_color_regions > 1) { - for (i = 0 ; i < brw->state.nr_color_regions; i++) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i); - last_inst = inst = emit_op(c, - WM_FB_WRITE, dst_mask(dst_undef(),0), 0, - outcolor, payload_r0_depth, outdepth); - inst->Aux = (i<<1); - if (c->fp_fragcolor_emitted) { - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = (i<<1); - } - } - last_inst->Aux |= 1; //eot - } - else { - /* if gl_FragData[0] is written, use it, else use gl_FragColor */ - if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0)) - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0); - else - outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); - - inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0), - 0, outcolor, payload_r0_depth, outdepth); - inst->Aux = 1|(0<<1); + for (i = 0 ; i < c->key.nr_cbufs; i++) { + outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i); + + inst = emit_op(c, WM_FB_WRITE, + dst_mask(dst_undef(), 0), + outcolor, + payload_r0_depth, + outdepth); + + inst->Aux = (i<<1); } + + /* Set EOT flag on last inst: + */ + inst->Aux |= 1; //eot } @@ -952,7 +649,7 @@ static void validate_src_regs( struct brw_wm_compile *c, GLuint i; for (i = 0; i < nr_args; i++) { - if (inst->SrcReg[i].File == PROGRAM_INPUT) { + if (inst->SrcReg[i].File == TGSI_FILE_INPUT) { GLuint idx = inst->SrcReg[i].Index; if (!(c->fp_interp_emitted & (1<DstReg.File == PROGRAM_OUTPUT) { + if (inst->DstReg.File == TGSI_FILE_OUTPUT) { GLuint idx = inst->DstReg.Index; if (idx == FRAG_RESULT_COLOR) - c->fp_fragcolor_emitted = 1; + c->fp_fragcolor_emitted |= inst->DstReg.WriteMask; } } -static void print_insns( const struct prog_instruction *insn, - GLuint nr ) + + +static void emit_insn( struct brw_wm_compile *c, + const struct tgsi_full_instruction *inst ) { - GLuint i; - for (i = 0; i < nr; i++, insn++) { - debug_printf("%3d: ", i); - if (insn->Opcode < MAX_OPCODE) - _mesa_print_instruction(insn); - else if (insn->Opcode < MAX_WM_OPCODE) { - GLuint idx = insn->Opcode - MAX_OPCODE; - - _mesa_print_alu_instruction(insn, - wm_opcode_strings[idx], - 3); - } - else - debug_printf("965 Opcode %d\n", insn->Opcode); + + switch (inst->Opcode) { + case TGSI_OPCODE_ABS: + emit_op1(c, TGSI_OPCODE_MOV, + dst, + brw_abs(src[0])); + break; + + case TGSI_OPCODE_SUB: + emit_op2(c, TGSI_OPCODE_ADD, + dst, + src[0], + brw_negate(src[1])); + break; + + case TGSI_OPCODE_SCS: + emit_op1(c, TGSI_OPCODE_SCS, + brw_writemask(dst, BRW_WRITEMASK_XY), + src[0]); + break; + + case TGSI_OPCODE_DST: + precalc_dst(c, inst); + break; + + case TGSI_OPCODE_LIT: + precalc_lit(c, inst); + break; + + case TGSI_OPCODE_TEX: + precalc_tex(c, inst); + break; + + case TGSI_OPCODE_TXP: + precalc_txp(c, inst); + break; + + case TGSI_OPCODE_TXB: + out = emit_insn(c, inst); + out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; + assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); + break; + + case TGSI_OPCODE_XPD: + emit_op2(c, TGSI_OPCODE_XPD, + brw_writemask(dst, BRW_WRITEMASK_XYZ), + src[0], + src[1]); + break; + + case TGSI_OPCODE_KIL: + emit_op1(c, TGSI_OPCODE_KIL, + brw_writemask(dst_undef(), 0), + src[0]); + break; + + case TGSI_OPCODE_END: + emit_fb_write(c); + break; + default: + if (brw_wm_is_scalar_result(inst->Opcode)) + emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]); + else + emit_op(c, opcode, dst, src[0], src[1], src[2]); + break; } } - /** * Initial pass for fragment program code generation. * This function is used by both the GLSL and non-GLSL paths. @@ -1004,108 +753,62 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) if (BRW_DEBUG & DEBUG_WM) { debug_printf("pre-fp:\n"); - _mesa_print_program(&fp->program.Base); - debug_printf("\n"); + tgsi_dump(fp->tokens, 0); } - c->pixel_xy = src_undef(); - c->delta_xy = src_undef(); - c->pixel_w = src_undef(); + c->pixel_xy = brw_src_undef(); + c->delta_xy = brw_src_undef(); + c->pixel_w = brw_src_undef(); c->nr_fp_insns = 0; c->fp->tex_units_used = 0x0; - /* Emit preamble instructions. This is where special instructions such as - * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to - * compute shader inputs from varying vars. - */ - for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { - const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; - validate_src_regs(c, inst); - validate_dst_regs(c, inst); - } /* Loop over all instructions doing assorted simplifications and * transformations. */ - for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) { - const struct prog_instruction *inst = &fp->program.Base.Instructions[insn]; - struct prog_instruction *out; - - /* Check for INPUT values, emit INTERP instructions where - * necessary: - */ - - switch (inst->Opcode) { - case TGSI_OPCODE_ABS: - out = emit_insn(c, inst); - out->Opcode = TGSI_OPCODE_MOV; - out->SrcReg[0].Negate = NEGATE_NONE; - out->SrcReg[0].Abs = 1; - break; - - case TGSI_OPCODE_SUB: - out = emit_insn(c, inst); - out->Opcode = TGSI_OPCODE_ADD; - out->SrcReg[1].Negate ^= NEGATE_XYZW; - break; - - case TGSI_OPCODE_SCS: - out = emit_insn(c, inst); - /* This should probably be done in the parser. + tgsi_parse_init( &parse, tokens ); + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* If branching shader, emit preamble instructions at decl time, as + * instruction order in the shader does not correspond to the order + * instructions are executed in the wild. + * + * This is where special instructions such as WM_CINTERP, + * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to compute + * shader inputs from varying vars. + * + * XXX: For non-branching shaders, consider deferring variable + * initialization as late as possible to minimize register + * usage. This is how the original BRW driver worked. */ - out->DstReg.WriteMask &= BRW_WRITEMASK_XY; - break; - - case TGSI_OPCODE_DST: - precalc_dst(c, inst); - break; - - case TGSI_OPCODE_LIT: - precalc_lit(c, inst); - break; - - case TGSI_OPCODE_TEX: - precalc_tex(c, inst); - break; - - case TGSI_OPCODE_TXP: - precalc_txp(c, inst); - break; - - case TGSI_OPCODE_TXB: - out = emit_insn(c, inst); - out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); - break; - - case TGSI_OPCODE_XPD: - out = emit_insn(c, inst); - /* This should probably be done in the parser. + validate_src_regs(c, inst); + validate_dst_regs(c, inst); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* Unlike VS programs we can probably manage fine encoding + * immediate values directly into the emitted EU + * instructions, as we probably only need to reference one + * float value per instruction. Just save the data for now + * and use directly later. */ - out->DstReg.WriteMask &= BRW_WRITEMASK_XYZ; break; - case TGSI_OPCODE_KIL: - out = emit_insn(c, inst); - /* This should probably be done in the parser. - */ - out->DstReg.WriteMask = 0; - break; - case TGSI_OPCODE_END: - emit_fb_write(c); - break; - default: - if (brw_wm_is_scalar_result(inst->Opcode)) - emit_scalar_insn(c, inst); - else - emit_insn(c, inst); + case TGSI_TOKEN_TYPE_INSTRUCTION: + inst = &parse.FullToken.FullInstruction; + emit_insn( c, inst ); break; } } + c->brw_program = brw_finalize( c->builder ); + if (BRW_DEBUG & DEBUG_WM) { debug_printf("pass_fp:\n"); - print_insns( c->prog_instructions, c->nr_fp_insns ); + brw_print_program( c->brw_program ); debug_printf("\n"); } } diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index 59bc4ef701..cdc10484a6 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -332,7 +332,7 @@ static void prealloc_reg(struct brw_wm_compile *c) for (j = 0; j < 4; j++) set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); } - if (c->key.vp_outputs_written & (1 << i)) { + if (c->key.nr_vp_outputs > i) { reg_index += 2; } } @@ -1670,7 +1670,7 @@ get_argument_regs(struct brw_wm_compile *c, } } -static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) +static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_compile *c) { #define MAX_IF_DEPTH 32 #define MAX_LOOP_DEPTH 32 @@ -1943,20 +1943,20 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) * Do GPU code generation for shaders that use GLSL features such as * flow control. Other shaders will be compiled with the */ -void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c) +void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c) { if (BRW_DEBUG & DEBUG_WM) { - debug_printf("brw_wm_glsl_emit:\n"); + debug_printf("%s:\n", __FUNCTION__); } /* initial instruction translation/simplification */ brw_wm_pass_fp(c); /* actual code generation */ - brw_wm_emit_glsl(brw, c); + brw_wm_emit_branching_shader(brw, c); if (BRW_DEBUG & DEBUG_WM) { - brw_wm_print_program(c, "brw_wm_glsl_emit done"); + brw_wm_print_program(c, "brw_wm_branching_shader_emit done"); } c->prog_data.total_grf = num_grf_used(c); diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 71e4c56835..d8b9028927 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -168,54 +168,20 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, if (!ref) { switch (file) { - case PROGRAM_INPUT: - case PROGRAM_PAYLOAD: - case PROGRAM_TEMPORARY: - case PROGRAM_OUTPUT: - case PROGRAM_VARYING: + case TGSI_FILE_INPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_OUTPUT: + case BRW_FILE_PAYLOAD: + /* should already be done?? */ break; - case PROGRAM_LOCAL_PARAM: - ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]); - break; - - case PROGRAM_ENV_PARAM: + case TGSI_FILE_CONSTANT: ref = get_param_ref(c, &c->env_param[idx][component]); break; - case PROGRAM_STATE_VAR: - case PROGRAM_UNIFORM: - case PROGRAM_CONSTANT: - case PROGRAM_NAMED_PARAM: { - struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters; - - /* There's something really hokey about parameters parsed in - * arb programs - they all end up in here, whether they be - * state values, parameters or constants. This duplicates the - * structure above & also seems to subvert the limits set for - * each type of constant/param. - */ - switch (plist->Parameters[idx].Type) { - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - /* These are invarient: - */ - ref = get_imm_ref(c, &plist->ParameterValues[idx][component]); - break; - - case PROGRAM_STATE_VAR: - case PROGRAM_UNIFORM: - /* These may change from run to run: - */ - ref = get_param_ref(c, &plist->ParameterValues[idx][component] ); - break; - - default: - assert(0); - break; - } + case TGSI_FILE_IMMEDIATE: + ref = get_imm_ref(c, &plist->ParameterValues[idx][component]); break; - } default: assert(0); @@ -310,17 +276,16 @@ translate_insn(struct brw_wm_compile *c, const struct prog_instruction *inst) { struct brw_wm_instruction *out = get_instruction(c); - GLuint writemask = inst->DstReg.WriteMask; + GLuint writemask = inst->dst.WriteMask; GLuint nr_args = brw_wm_nr_args(inst->Opcode); GLuint i, j; /* Copy some data out of the instruction */ out->opcode = inst->Opcode; - out->saturate = (inst->SaturateMode != SATURATE_OFF); + out->saturate = inst->dst.Saturate; out->tex_unit = inst->TexSrcUnit; - out->tex_idx = inst->TexSrcTarget; - out->tex_shadow = inst->TexShadow; + out->tex_target = inst->TexSrcTarget; out->eot = inst->Aux & 1; out->target = inst->Aux >> 1; @@ -328,7 +293,7 @@ translate_insn(struct brw_wm_compile *c, */ for (i = 0; i < nr_args; i++) { for (j = 0; j < 4; j++) { - out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out); + out->src[i][j] = get_new_ref(c, inst->src[i], j, out); } } @@ -380,15 +345,6 @@ static void pass0_init_payload( struct brw_wm_compile *c ) &c->payload.depth[j] ); } -#if 0 - /* This seems to be an alternative to the INTERP_WPOS stuff I do - * elsewhere: - */ - if (c->key.source_depth_reg) - pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2, - &c->payload.depth[c->key.source_depth_reg/2]); -#endif - for (i = 0; i < FRAG_ATTRIB_MAX; i++) pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0, &c->payload.input_interp[i] ); @@ -403,6 +359,9 @@ static void pass0_init_payload( struct brw_wm_compile *c ) * the same number. * * Translate away swizzling and eliminate non-saturating moves. + * + * Translate instructions from Mesa's prog_instruction structs to our + * internal brw_wm_instruction representation. */ void brw_wm_pass0( struct brw_wm_compile *c ) { @@ -421,7 +380,7 @@ void brw_wm_pass0( struct brw_wm_compile *c ) */ switch (inst->Opcode) { case OPCODE_MOV: - if (!inst->SaturateMode) { + if (!inst->dst.Saturate) { pass0_precalc_mov(c, inst); } else { diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index 85a3a55ca4..b0356b1bd5 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -90,17 +90,24 @@ static void track_arg(struct brw_wm_compile *c, static GLuint get_texcoord_mask( GLuint tex_idx ) { switch (tex_idx) { - case TEXTURE_1D_INDEX: + case TGSI_TEXTURE_1D: return BRW_WRITEMASK_X; - case TEXTURE_2D_INDEX: + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: return BRW_WRITEMASK_XY; - case TEXTURE_3D_INDEX: + case TGSI_TEXTURE_3D: return BRW_WRITEMASK_XYZ; - case TEXTURE_CUBE_INDEX: + case TGSI_TEXTURE_CUBE: return BRW_WRITEMASK_XYZ; - case TEXTURE_RECT_INDEX: - return BRW_WRITEMASK_XY; - default: return 0; + + case TGSI_TEXTURE_SHADOW1D: + return BRW_WRITEMASK_XZ; + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: + return BRW_WRITEMASK_XYZ; + default: + assert(0); + return 0; } } @@ -217,14 +224,9 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: read0 = get_texcoord_mask(inst->tex_idx); - - if (inst->tex_shadow) - read0 |= BRW_WRITEMASK_Z; break; case TGSI_OPCODE_TXB: - /* Shadow ignored for txb. - */ read0 = get_texcoord_mask(inst->tex_idx) | BRW_WRITEMASK_W; break; diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index edabf6ceb6..1898f38cef 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -52,7 +52,7 @@ struct brw_wm_unit_key { unsigned int max_threads; unsigned int nr_surfaces, sampler_count; - GLboolean uses_depth, computes_depth, uses_kill, is_glsl; + GLboolean uses_depth, computes_depth, uses_kill, has_flow_control; GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable; GLfloat offset_units, offset_factor; }; @@ -114,10 +114,10 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* _NEW_COLOR */ key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; - key->is_glsl = bfp->isGLSL; + key->has_flow_control = bfp->has_flow_control; /* temporary sanity check assertion */ - ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); + ASSERT(bfp->has_flow_control == brw_wm_has_flow_control(fp)); /* _NEW_QUERY */ key->stats_wm = (brw->query.stats_wm != 0); @@ -184,7 +184,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.wm5.program_computes_depth = key->computes_depth; wm.wm5.program_uses_killpixel = key->uses_kill; - if (key->is_glsl) + if (key->has_flow_control) wm.wm5.enable_8_pix = 1; else wm.wm5.enable_16_pix = 1; -- cgit v1.2.3 From 5d61b6f1f64ca26dd038af0679873ef0353660dd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 31 Oct 2009 15:05:01 +0000 Subject: i965g: wip on fragment shaders --- src/gallium/drivers/i965/brw_wm.h | 63 ++- src/gallium/drivers/i965/brw_wm_fp.c | 871 ++++++++++++++++++++++++++--------- 2 files changed, 698 insertions(+), 236 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 2cd5bb7081..8ee99420aa 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -74,6 +74,7 @@ struct brw_wm_prog_key { GLuint vp_nr_outputs:6; GLuint nr_cbufs:3; + GLuint has_flow_control:1; GLuint program_string_id; }; @@ -176,9 +177,36 @@ struct brw_wm_instruction { #define MAX_WM_OPCODE (MAX_OPCODE + 9) #define BRW_FILE_PAYLOAD (TGSI_FILE_COUNT) -#define PAYLOAD_DEPTH (FRAG_ATTRIB_MAX) /* ?? */ +#define PAYLOAD_DEPTH (PIPE_MAX_SHADER_INPUTS) /* ?? */ + + +struct brw_fp_src { + unsigned file:4; + unsigned index:16; + unsigned swizzle:8; + unsigned indirect:1; + unsigned negate:1; + unsigned abs:1; +}; + +struct brw_fp_dst { + unsigned file:4; + unsigned index:16; + unsigned writemask:4; + unsigned indirect:1; + unsigned saturate:1; +}; + +struct brw_fp_instruction { + struct brw_fp_dst dst; + struct brw_fp_src src[3]; + unsigned opcode:8; + unsigned tex_unit:4; + unsigned tex_target:4; + unsigned target:10; /* destination surface for FB_WRITE */ + unsigned eot:1; /* mark last instruction (usually FB_WRITE) */ +}; -struct brw_passfp_program; struct brw_wm_compile { struct brw_compile func; @@ -198,9 +226,26 @@ struct brw_wm_compile { * simplifying and adding instructions for interpolation and * framebuffer writes. */ - struct brw_passfp_program *pass_fp; - - + struct { + GLfloat v[4]; + unsigned nr; + } immediate[BRW_WM_MAX_CONST+3]; + GLuint nr_immediates; + + struct brw_fp_instruction fp_instructions[BRW_WM_MAX_INSN]; + GLuint nr_fp_insns; + GLuint fp_temp; + GLuint fp_interp_emitted; + GLuint fp_fragcolor_emitted; + GLuint fp_first_internal_temp; + + struct brw_fp_src fp_pixel_xy; + struct brw_fp_src fp_delta_xy; + struct brw_fp_src fp_pixel_w; + + + /* Subsequent passes using SSA representation: + */ struct brw_wm_value vreg[BRW_WM_MAX_VREG]; GLuint nr_vreg; @@ -213,7 +258,7 @@ struct brw_wm_compile { } payload; - const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4]; + const struct brw_wm_ref *pass0_fp_reg[BRW_FILE_PAYLOAD+1][256][4]; struct brw_wm_ref undef_ref; struct brw_wm_value undef_value; @@ -241,7 +286,7 @@ struct brw_wm_compile { struct { GLboolean inited; struct brw_reg reg; - } wm_regs[PROGRAM_PAYLOAD+1][256][4]; + } wm_regs[BRW_FILE_PAYLOAD+1][256][4]; GLboolean used_grf[BRW_WM_MAX_GRF]; GLuint first_free_grf; @@ -258,13 +303,15 @@ struct brw_wm_compile { GLint index; struct brw_reg reg; } current_const[3]; + + GLuint error; }; GLuint brw_wm_nr_args( GLuint opcode ); GLuint brw_wm_is_scalar_result( GLuint opcode ); -void brw_wm_pass_fp( struct brw_wm_compile *c ); +int brw_wm_pass_fp( struct brw_wm_compile *c ); void brw_wm_pass0( struct brw_wm_compile *c ); void brw_wm_pass1( struct brw_wm_compile *c ); void brw_wm_pass2( struct brw_wm_compile *c ); diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 8ba037cdae..57933afbbe 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -31,15 +31,26 @@ #include "pipe/p_shader_tokens.h" +#include "pipe/p_error.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_util.h" #include "brw_wm.h" #include "brw_util.h" +#include "brw_debug.h" #define X 0 #define Y 1 #define Z 2 #define W 3 +#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3) static const char *wm_opcode_strings[] = { @@ -54,7 +65,294 @@ static const char *wm_opcode_strings[] = { "FRONTFACING", }; +/*********************************************************************** + * Source regs + */ + +static struct brw_fp_src src_reg(GLuint file, GLuint idx) +{ + struct brw_fp_src reg; + reg.file = file; + reg.index = idx; + reg.swizzle = BRW_SWIZZLE_XYZW; + reg.indirect = 0; + reg.negate = 0; + reg.abs = 0; + return reg; +} + +static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst) +{ + return src_reg(dst.file, dst.index); +} + +static struct brw_fp_src src_undef( void ) +{ + return src_reg(TGSI_FILE_NULL, 0); +} + +static GLboolean src_is_undef(struct brw_fp_src src) +{ + return src.file == TGSI_FILE_NULL; +} + +static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w ) +{ + unsigned swz = reg.swizzle; + + reg.swizzle = ( GET_SWZ(swz, x) << 0 | + GET_SWZ(swz, y) << 2 | + GET_SWZ(swz, z) << 4 | + GET_SWZ(swz, w) << 6 ); + + return reg; +} + +static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x ) +{ + return src_swizzle(reg, x, x, x, x); +} + +static struct brw_fp_src src_abs( struct brw_fp_src src ) +{ + src.negate = 0; + src.abs = 1; + return src; +} + +static struct brw_fp_src src_negate( struct brw_fp_src src ) +{ + src.negate = 1; + src.abs = 0; + return src; +} + + +static int match_or_expand_immediate( const float *v, + unsigned nr, + float *v2, + unsigned *nr2, + unsigned *swizzle ) +{ + unsigned i, j; + + *swizzle = 0; + + for (i = 0; i < nr; i++) { + boolean found = FALSE; + + for (j = 0; j < *nr2 && !found; j++) { + if (v[i] == v2[j]) { + *swizzle |= j << (i * 2); + found = TRUE; + } + } + + if (!found) { + if (*nr2 >= 4) + return FALSE; + + v2[*nr2] = v[i]; + *swizzle |= *nr2 << (i * 2); + (*nr2)++; + } + } + + return TRUE; +} + + + +/* Internally generated immediates: overkill... + */ +static struct brw_fp_src src_imm( struct brw_wm_compile *c, + const GLfloat *v, + unsigned nr) +{ + unsigned i, j; + unsigned swizzle; + + /* Could do a first pass where we examine all existing immediates + * without expanding. + */ + + for (i = 0; i < c->nr_immediates; i++) { + if (match_or_expand_immediate( v, + nr, + c->immediate[i].v, + &c->immediate[i].nr, + &swizzle )) + goto out; + } + + if (c->nr_immediates < Elements(c->immediate)) { + i = c->nr_immediates++; + if (match_or_expand_immediate( v, + nr, + c->immediate[i].v, + &c->immediate[i].nr, + &swizzle )) + goto out; + } + + c->error = 1; + return src_undef(); + +out: + /* Make sure that all referenced elements are from this immediate. + * Has the effect of making size-one immediates into scalars. + */ + for (j = nr; j < 4; j++) + swizzle |= (swizzle & 0x3) << (j * 2); + + return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ), + GET_SWZ(swizzle, X), + GET_SWZ(swizzle, Y), + GET_SWZ(swizzle, Z), + GET_SWZ(swizzle, W) ); +} + + + +static struct brw_fp_src src_imm1f( struct brw_wm_compile *c, + GLfloat f ) +{ + return src_imm(c, &f, 1); +} + +static struct brw_fp_src src_imm4f( struct brw_wm_compile *c, + GLfloat x, + GLfloat y, + GLfloat z, + GLfloat w) +{ + GLfloat f[4] = {x,y,z,w}; + return src_imm(c, f, 4); +} + + + +/*********************************************************************** + * Dest regs + */ + +static struct brw_fp_dst dst_reg(GLuint file, GLuint idx) +{ + struct brw_fp_dst reg; + reg.file = file; + reg.index = idx; + reg.writemask = BRW_WRITEMASK_XYZW; + reg.indirect = 0; + return reg; +} + +static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask ) +{ + reg.writemask &= mask; + return reg; +} + +static struct brw_fp_dst dst_undef( void ) +{ + return dst_reg(TGSI_FILE_NULL, 0); +} + +static boolean dst_is_undef( struct brw_fp_dst dst ) +{ + return dst.file == TGSI_FILE_NULL; +} + +static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag ) +{ + reg.saturate = flag; + return reg; +} + +static struct brw_fp_dst get_temp( struct brw_wm_compile *c ) +{ + int bit = ffs( ~c->fp_temp ); + + if (!bit) { + debug_printf("%s: out of temporaries\n", __FILE__); + } + + c->fp_temp |= 1<<(bit-1); + return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1)); +} + + +static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp ) +{ + c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp)); +} + + +/*********************************************************************** + * Instructions + */ + +static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c) +{ + return &c->fp_instructions[c->nr_fp_insns++]; +} + +static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + GLuint tex_src_unit, + GLuint tex_src_target, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) +{ + struct brw_fp_instruction *inst = get_fp_inst(c); + + inst->opcode = op; + inst->dst = dest; + inst->tex_unit = tex_src_unit; + inst->tex_target = tex_src_target; + inst->src[0] = src0; + inst->src[1] = src1; + inst->src[2] = src2; + return inst; +} + + +static INLINE void emit_op3(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) +{ + emit_tex_op(c, op, dest, 0, 0, src0, src1, src2); +} + + +static INLINE void emit_op2(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0, + struct brw_fp_src src1) +{ + emit_tex_op(c, op, dest, 0, 0, src0, src1, src_undef()); +} + +static INLINE void emit_op1(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest, + struct brw_fp_src src0) +{ + emit_tex_op(c, op, dest, 0, 0, src0, src_undef(), src_undef()); +} + +static INLINE void emit_op0(struct brw_wm_compile *c, + GLuint op, + struct brw_fp_dst dest) +{ + emit_tex_op(c, op, dest, 0, 0, src_undef(), src_undef(), src_undef()); +} @@ -66,10 +364,10 @@ static const char *wm_opcode_strings[] = { */ static void emit_scalar_insn(struct brw_wm_compile *c, unsigned opcode, - struct brw_dst dst, - struct brw_src src0, - struct brw_src src1, - struct brw_src src2 ) + struct brw_fp_dst dst, + struct brw_fp_src src0, + struct brw_fp_src src1, + struct brw_fp_src src2 ) { unsigned first_chan = ffs(dst.writemask) - 1; unsigned first_mask = 1 << first_chan; @@ -77,14 +375,14 @@ static void emit_scalar_insn(struct brw_wm_compile *c, if (dst.writemask == 0) return; - emit_op( c, opcode, - brw_writemask(dst, first_mask), - src0, src1, src2 ); + emit_op3( c, opcode, + dst_mask(dst, first_mask), + src0, src1, src2 ); if (dst.writemask != first_mask) { emit_op1(c, TGSI_OPCODE_MOV, - brw_writemask(dst, ~first_mask), - src_swizzle1(brw_src(dst), first_chan)); + dst_mask(dst, ~first_mask), + src_scalar(src_reg_from_dst(dst), first_chan)); } } @@ -93,11 +391,11 @@ static void emit_scalar_insn(struct brw_wm_compile *c, * Special instructions for interpolation and other tasks */ -static struct ureg_src get_pixel_xy( struct brw_wm_compile *c ) +static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c ) { - if (src_is_undef(c->pixel_xy)) { - struct ureg_dst pixel_xy = get_temp(c); - struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); + if (src_is_undef(c->fp_pixel_xy)) { + struct brw_fp_dst pixel_xy = get_temp(c); + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); /* Emit the out calculations, and hold onto the results. Use @@ -105,79 +403,85 @@ static struct ureg_src get_pixel_xy( struct brw_wm_compile *c ) */ /* pixel_xy.xy = PIXELXY payload[0]; */ - emit_op(c, - WM_PIXELXY, - dst_mask(pixel_xy, BRW_WRITEMASK_XY), - payload_r0_depth, - src_undef(), - src_undef()); + emit_op1(c, + WM_PIXELXY, + dst_mask(pixel_xy, BRW_WRITEMASK_XY), + payload_r0_depth); - c->pixel_xy = src_reg_from_dst(pixel_xy); + c->fp_pixel_xy = src_reg_from_dst(pixel_xy); } - return c->pixel_xy; + return c->fp_pixel_xy; } -static struct ureg_src get_delta_xy( struct brw_wm_compile *c ) +static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c ) { - if (src_is_undef(c->delta_xy)) { - struct ureg_dst delta_xy = get_temp(c); - struct ureg_src pixel_xy = get_pixel_xy(c); - struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); + if (src_is_undef(c->fp_delta_xy)) { + struct brw_fp_dst delta_xy = get_temp(c); + struct brw_fp_src pixel_xy = get_pixel_xy(c); + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); /* deltas.xy = DELTAXY pixel_xy, payload[0] */ - emit_op(c, + emit_op3(c, WM_DELTAXY, dst_mask(delta_xy, BRW_WRITEMASK_XY), pixel_xy, payload_r0_depth, src_undef()); - c->delta_xy = src_reg_from_dst(delta_xy); + c->fp_delta_xy = src_reg_from_dst(delta_xy); } - return c->delta_xy; + return c->fp_delta_xy; } -static struct ureg_src get_pixel_w( struct brw_wm_compile *c ) +static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c ) { - if (src_is_undef(c->pixel_w)) { - struct ureg_dst pixel_w = get_temp(c); - struct ureg_src deltas = get_delta_xy(c); - struct ureg_src interp_wpos = src_reg(TGSI_FILE_PAYLOAD, FRAG_ATTRIB_WPOS); + if (src_is_undef(c->fp_pixel_w)) { + struct brw_fp_dst pixel_w = get_temp(c); + struct brw_fp_src deltas = get_delta_xy(c); + + /* XXX: assuming position is always first -- valid? + */ + struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0); /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x */ - emit_op(c, - WM_PIXELW, - dst_mask(pixel_w, BRW_WRITEMASK_W), - interp_wpos, - deltas, - src_undef()); + emit_op3(c, + WM_PIXELW, + dst_mask(pixel_w, BRW_WRITEMASK_W), + interp_wpos, + deltas, + src_undef()); - c->pixel_w = src_reg_from_dst(pixel_w); + c->fp_pixel_w = src_reg_from_dst(pixel_w); } - return c->pixel_w; + return c->fp_pixel_w; } + +/*********************************************************************** + * Emit INTERP instructions ahead of first use of each attrib. + */ + static void emit_interp( struct brw_wm_compile *c, + GLuint idx, GLuint semantic, - GLuint semantic_index, GLuint interp_mode ) { - struct ureg_dst dst = dst_reg(TGSI_FILE_INPUT, idx); - struct ureg_src interp = src_reg(TGSI_FILE_PAYLOAD, idx); - struct ureg_src deltas = get_delta_xy(c); + struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx); + struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx); + struct brw_fp_src deltas = get_delta_xy(c); /* Need to use PINTERP on attributes which have been * multiplied by 1/W in the SF program, and LINTERP on those * which have not: */ switch (semantic) { - case FRAG_ATTRIB_WPOS: + case TGSI_SEMANTIC_POSITION: /* Have to treat wpos.xy specially: */ emit_op1(c, @@ -218,7 +522,8 @@ static void emit_interp( struct brw_wm_compile *c, } break; - case FRAG_ATTRIB_FOGC: + + case TGSI_SEMANTIC_FOG: /* Interpolate the fog coordinate */ emit_op3(c, WM_PINTERP, @@ -228,17 +533,17 @@ static void emit_interp( struct brw_wm_compile *c, get_pixel_w(c)); emit_op1(c, - TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_YZ), - brw_imm1f(0.0)); + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_YZ), + src_imm1f(c, 0.0)); emit_op1(c, - TGSI_OPCODE_MOV, - dst_mask(dst, BRW_WRITEMASK_W), - brw_imm1f(1.0)); + TGSI_OPCODE_MOV, + dst_mask(dst, BRW_WRITEMASK_W), + src_imm1f(c, 1.0)); break; - case FRAG_ATTRIB_FACE: + case TGSI_SEMANTIC_FACE: /* XXX review/test this case */ emit_op0(c, WM_FRONTFACING, @@ -247,15 +552,15 @@ static void emit_interp( struct brw_wm_compile *c, emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_YZ), - brw_imm1f(0.0)); + src_imm1f(c, 0.0)); emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_W), - brw_imm1f(1.0)); + src_imm1f(c, 1.0)); break; - case FRAG_ATTRIB_PNTC: + case TGSI_SEMANTIC_PSIZE: /* XXX review/test this case */ emit_op3(c, WM_PINTERP, @@ -267,12 +572,12 @@ static void emit_interp( struct brw_wm_compile *c, emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_Z), - brw_imm1f(c->pass_fp, 0.0f)); + src_imm1f(c, 0.0f)); emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_W), - brw_imm1f(c->pass_fp, 1.0f)); + src_imm1f(c, 1.0f)); break; default: @@ -310,11 +615,11 @@ static void emit_interp( struct brw_wm_compile *c, * Expand various instructions here to simpler forms. */ static void precalc_dst( struct brw_wm_compile *c, - struct brw_dst dst, - struct brw_src src0, - struct brw_src src1 ) + struct brw_fp_dst dst, + struct brw_fp_src src0, + struct brw_fp_src src1 ) { - if (dst.WriteMask & BRW_WRITEMASK_Y) { + if (dst.writemask & BRW_WRITEMASK_Y) { /* dst.y = mul src0.y, src1.y */ emit_op2(c, @@ -324,25 +629,22 @@ static void precalc_dst( struct brw_wm_compile *c, src1); } - if (dst.WriteMask & BRW_WRITEMASK_XZ) { - struct prog_instruction *swz; - GLuint z = GET_SWZ(src0.Swizzle, Z); - + if (dst.writemask & BRW_WRITEMASK_XZ) { /* dst.z = mov src0.zzzz */ emit_op1(c, TGSI_OPCODE_MOV, dst_mask(dst, BRW_WRITEMASK_Z), - src_swizzle1(src0, Z)); + src_scalar(src0, Z)); - /* dst.x = immf(1.0) + /* dst.x = imm1f(1.0) */ emit_op1(c, TGSI_OPCODE_MOV, - brw_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0), - src_immf(c, 1.0)); + dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0), + src_imm1f(c, 1.0)); } - if (dst.WriteMask & BRW_WRITEMASK_W) { + if (dst.writemask & BRW_WRITEMASK_W) { /* dst.w = mov src1.w */ emit_op1(c, @@ -354,22 +656,22 @@ static void precalc_dst( struct brw_wm_compile *c, static void precalc_lit( struct brw_wm_compile *c, - struct ureg_dst dst, - struct ureg_src src0 ) + struct brw_fp_dst dst, + struct brw_fp_src src0 ) { - if (dst.WriteMask & BRW_WRITEMASK_XW) { + if (dst.writemask & BRW_WRITEMASK_XW) { /* dst.xw = imm(1.0f) */ emit_op1(c, TGSI_OPCODE_MOV, - brw_saturate(brw_writemask(dst, BRW_WRITEMASK_XW), 0), - brw_imm1f(1.0f)); + dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0), + src_imm1f(c, 1.0f)); } - if (dst.WriteMask & BRW_WRITEMASK_YZ) { + if (dst.writemask & BRW_WRITEMASK_YZ) { emit_op1(c, TGSI_OPCODE_LIT, - brw_writemask(dst, BRW_WRITEMASK_YZ), + dst_mask(dst, BRW_WRITEMASK_YZ), src0); } } @@ -382,41 +684,42 @@ static void precalc_lit( struct brw_wm_compile *c, * instruction itself. */ static void precalc_tex( struct brw_wm_compile *c, - struct brw_dst dst, + struct brw_fp_dst dst, + unsigned target, unsigned unit, - struct brw_src src0 ) + struct brw_fp_src src0 ) { - struct ureg_src coord = src_undef(); - struct ureg_dst tmp = dst_undef(); + struct brw_fp_src coord = src_undef(); + struct brw_fp_dst tmp = dst_undef(); assert(unit < BRW_MAX_TEX_UNIT); /* Cubemap: find longest component of coord vector and normalize * it. */ - if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) { - struct ureg_src tmpsrc; + if (target == TGSI_TEXTURE_CUBE) { + struct brw_fp_src tmpsrc; tmp = get_temp(c); - tmpsrc = brw_src(tmpcoord) + tmpsrc = src_reg_from_dst(tmp); /* tmp = abs(src0) */ emit_op1(c, TGSI_OPCODE_MOV, tmp, - brw_abs(src0)); + src_abs(src0)); /* tmp.X = MAX(tmp.X, tmp.Y) */ emit_op2(c, TGSI_OPCODE_MAX, - brw_writemask(tmp, BRW_WRITEMASK_X), - src_swizzle1(tmpsrc, X), - src_swizzle1(tmpsrc, Y)); + dst_mask(tmp, BRW_WRITEMASK_X), + src_scalar(tmpsrc, X), + src_scalar(tmpsrc, Y)); /* tmp.X = MAX(tmp.X, tmp.Z) */ emit_op2(c, TGSI_OPCODE_MAX, - brw_writemask(tmp, BRW_WRITEMASK_X), + dst_mask(tmp, BRW_WRITEMASK_X), tmpsrc, - src_swizzle1(tmpsrc, Z)); + src_scalar(tmpsrc, Z)); /* tmp.X = 1 / tmp.X */ emit_op1(c, TGSI_OPCODE_RCP, @@ -427,11 +730,12 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op2(c, TGSI_OPCODE_MUL, tmp, src0, - src_swizzle1(tmpsrc, SWIZZLE_X)); + src_scalar(tmpsrc, X)); coord = tmpsrc; } - else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) { + else if (target == TGSI_TEXTURE_RECT || + target == TGSI_TEXTURE_SHADOWRECT) { /* XXX: need a mechanism for internally generated constants. */ coord = src0; @@ -448,19 +752,18 @@ static void precalc_tex( struct brw_wm_compile *c, if (c->key.yuvtex_mask & (1 << unit)) { /* convert ycbcr to RGBA */ GLboolean swap_uv = c->key.yuvtex_swap_mask & (1<DstReg; - struct ureg_dst tmp = get_temp(c); - struct ureg_src tmpsrc = src_reg_from_dst(tmp); - struct ureg_src C0 = ureg_imm4f( c->ureg, -.5, -.0625, -.5, 1.164 ); - struct ureg_src C1 = ureg_imm4f( c->ureg, 1.596, -0.813, 2.018, -.391 ); + struct brw_fp_dst tmp = get_temp(c); + struct brw_fp_src tmpsrc = src_reg_from_dst(tmp); + struct brw_fp_src C0 = src_imm4f( c, -.5, -.0625, -.5, 1.164 ); + struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 ); /* tmp = TEX ... */ emit_tex_op(c, TGSI_OPCODE_TEX, - brw_saturate(tmp, dst.Saturate), + dst_saturate(tmp, dst.saturate), unit, - inst->TexSrcTarget, + target, coord, src_undef(), src_undef()); @@ -477,7 +780,7 @@ static void precalc_tex( struct brw_wm_compile *c, emit_op2(c, TGSI_OPCODE_MUL, dst_mask(tmp, BRW_WRITEMASK_Y), tmpsrc, - src_swizzle1(C0, W)); + src_scalar(C0, W)); /* * if (UV swaped) @@ -492,16 +795,16 @@ static void precalc_tex( struct brw_wm_compile *c, src_swizzle(tmpsrc, Z,Z,X,X) : src_swizzle(tmpsrc, X,X,Z,Z)), C1, - src_swizzle1(tmpsrc, Y)); + src_scalar(tmpsrc, Y)); /* RGB.y = MAD YUV.z, C1.w, RGB.y */ emit_op3(c, TGSI_OPCODE_MAD, dst_mask(dst, BRW_WRITEMASK_Y), - src_swizzle1(tmpsrc, Z), - src_swizzle1(C1, W), - src_swizzle1(src_reg_from_dst(dst), Y)); + src_scalar(tmpsrc, Z), + src_scalar(C1, W), + src_scalar(src_reg_from_dst(dst), Y)); release_temp(c, tmp); } @@ -509,9 +812,9 @@ static void precalc_tex( struct brw_wm_compile *c, /* ordinary RGBA tex instruction */ emit_tex_op(c, TGSI_OPCODE_TEX, - inst->DstReg, + dst, unit, - inst->TexSrcTarget, + target, coord, src_undef(), src_undef()); @@ -523,8 +826,8 @@ static void precalc_tex( struct brw_wm_compile *c, /* Release this temp if we ended up allocating it: */ - if (!brw_dst_is_undef(tmpcoord)) - release_temp(c, tmpcoord); + if (!dst_is_undef(tmp)) + release_temp(c, tmp); } @@ -532,13 +835,9 @@ static void precalc_tex( struct brw_wm_compile *c, * Check if the given TXP instruction really needs the divide-by-W step. */ static GLboolean projtex( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + unsigned target, + struct brw_fp_src src ) { - const struct ureg_src src = inst->SrcReg[0]; - GLboolean retVal; - - assert(inst->Opcode == TGSI_OPCODE_TXP); - /* Only try to detect the simplest cases. Could detect (later) * cases where we are trying to emit code like RCP {1.0}, MUL x, * {1.0}, and so on. @@ -546,16 +845,15 @@ static GLboolean projtex( struct brw_wm_compile *c, * More complex cases than this typically only arise from * user-provided fragment programs anyway: */ - if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) - retVal = GL_FALSE; /* ut2004 gun rendering !?! */ - else if (src.File == TGSI_FILE_INPUT && - GET_SWZ(src.Swizzle, W) == W && - (c->key.proj_attrib_mask & (1 << src.Index)) == 0) - retVal = GL_FALSE; - else - retVal = GL_TRUE; - - return retVal; + if (target == TGSI_TEXTURE_CUBE) + return GL_FALSE; /* ut2004 gun rendering !?! */ + + if (src.file == TGSI_FILE_INPUT && + GET_SWZ(src.swizzle, W) == W && + (c->key.proj_attrib_mask & (1 << src.index)) == 0) + return GL_FALSE; + + return GL_TRUE; } @@ -563,110 +861,168 @@ static GLboolean projtex( struct brw_wm_compile *c, * Emit code for TXP. */ static void precalc_txp( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + struct brw_fp_dst dst, + unsigned target, + unsigned unit, + struct brw_fp_src src0 ) { - struct ureg_src src0 = inst->SrcReg[0]; - - if (projtex(c, inst)) { - struct ureg_dst tmp = get_temp(c); - struct prog_instruction tmp_inst; + if (projtex(c, target, src0)) { + struct brw_fp_dst tmp = get_temp(c); /* tmp0.w = RCP inst.arg[0][3] */ - emit_op(c, + emit_op1(c, TGSI_OPCODE_RCP, dst_mask(tmp, BRW_WRITEMASK_W), - src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)), - src_undef(), - src_undef()); + src_scalar(src0, W)); /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww */ - emit_op(c, - TGSI_OPCODE_MUL, - dst_mask(tmp, BRW_WRITEMASK_XYZ), - src0, - src_swizzle1(src_reg_from_dst(tmp), W), - src_undef()); + emit_op2(c, + TGSI_OPCODE_MUL, + dst_mask(tmp, BRW_WRITEMASK_XYZ), + src0, + src_scalar(src_reg_from_dst(tmp), W)); - /* dst = precalc(TEX tmp0) + /* dst = TEX tmp0 */ - tmp_inst = *inst; - tmp_inst.SrcReg[0] = src_reg_from_dst(tmp); - precalc_tex(c, &tmp_inst); + precalc_tex(c, + dst, + target, + unit, + src_reg_from_dst(tmp)); release_temp(c, tmp); } else { - /* dst = precalc(TEX src0) + /* dst = TEX src0 */ - precalc_tex(c, inst); + precalc_tex(c, dst, target, unit, src0); } } +/* XXX: note this returns a src_reg. + */ +static struct brw_fp_src +find_output_by_semantic( struct brw_wm_compile *c, + unsigned semantic, + unsigned index ) +{ + const struct tgsi_shader_info *info = &c->fp->info; + unsigned i; + + for (i = 0; i < info->num_outputs; i++) + if (info->output_semantic_name[i] == semantic && + info->output_semantic_index[i] == index) + return src_reg( TGSI_FILE_OUTPUT, i ); + + /* If not found, return some arbitrary immediate value: + */ + return src_imm1f(c, 1.0); +} + static void emit_fb_write( struct brw_wm_compile *c ) { - struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH); - struct ureg_src outdepth = src_reg(TGSI_FILE_OUTPUT, FRAG_RESULT_DEPTH); - struct ureg_src outcolor; - struct prog_instruction *inst; + struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH); + struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0); GLuint i; - /* The inst->Aux field is used for FB write target and the EOT marker */ + outdepth = src_scalar(outdepth, Z); for (i = 0 ; i < c->key.nr_cbufs; i++) { - outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i); + struct brw_fp_src outcolor; + unsigned target = 1<key.nr_cbufs - 1) + target |= 1; + + outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i); - inst->Aux = (i<<1); + /* Use emit_tex_op so that we can specify the inst->tex_target + * field, which is abused to contain the FB write target and the + * EOT marker + */ + emit_tex_op(c, WM_FB_WRITE, + dst_undef(), + target, + 0, + outcolor, + payload_r0_depth, + outdepth); } - - /* Set EOT flag on last inst: - */ - inst->Aux |= 1; //eot } +static struct brw_fp_dst translate_dst( struct brw_wm_compile *c, + const struct tgsi_full_dst_register *dst, + unsigned saturate ) +{ + struct brw_fp_dst out; + + out.file = dst->DstRegister.File; + out.index = dst->DstRegister.Index; + out.writemask = dst->DstRegister.WriteMask; + out.indirect = dst->DstRegister.Indirect; + out.saturate = (saturate == TGSI_SAT_ZERO_ONE); + + if (out.indirect) { + assert(dst->DstRegisterInd.File == TGSI_FILE_ADDRESS); + assert(dst->DstRegisterInd.Index == 0); + } + + return out; +} -/*********************************************************************** - * Emit INTERP instructions ahead of first use of each attrib. - */ - -static void validate_src_regs( struct brw_wm_compile *c, - const struct prog_instruction *inst ) +static struct brw_fp_src translate_src( struct brw_wm_compile *c, + const struct tgsi_full_src_register *src ) { - GLuint nr_args = brw_wm_nr_args( inst->Opcode ); - GLuint i; + struct brw_fp_src out; + + out.file = src->SrcRegister.File; + out.index = src->SrcRegister.Index; + out.indirect = src->SrcRegister.Indirect; + + out.swizzle = ((src->SrcRegister.SwizzleX << 0) | + (src->SrcRegister.SwizzleY << 2) | + (src->SrcRegister.SwizzleZ << 4) | + (src->SrcRegister.SwizzleW << 6)); + + switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) { + case TGSI_UTIL_SIGN_CLEAR: + out.abs = 1; + out.negate = 0; + break; - for (i = 0; i < nr_args; i++) { - if (inst->SrcReg[i].File == TGSI_FILE_INPUT) { - GLuint idx = inst->SrcReg[i].Index; - if (!(c->fp_interp_emitted & (1<fp_interp_emitted |= 1<DstReg.File == TGSI_FILE_OUTPUT) { - GLuint idx = inst->DstReg.Index; - if (idx == FRAG_RESULT_COLOR) - c->fp_fragcolor_emitted |= inst->DstReg.WriteMask; + + if (out.indirect) { + assert(src->SrcRegisterInd.File == TGSI_FILE_ADDRESS); + assert(src->SrcRegisterInd.Index == 0); } + + return out; } @@ -674,59 +1030,78 @@ static void validate_dst_regs( struct brw_wm_compile *c, static void emit_insn( struct brw_wm_compile *c, const struct tgsi_full_instruction *inst ) { - - switch (inst->Opcode) { + unsigned opcode = inst->Instruction.Opcode; + struct brw_fp_dst dst; + struct brw_fp_src src[3]; + int i; + + dst = translate_dst( c, &inst->FullDstRegisters[0], + inst->Instruction.Saturate ); + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) + src[i] = translate_src( c, &inst->FullSrcRegisters[0] ); + + switch (opcode) { case TGSI_OPCODE_ABS: emit_op1(c, TGSI_OPCODE_MOV, dst, - brw_abs(src[0])); + src_abs(src[0])); break; case TGSI_OPCODE_SUB: emit_op2(c, TGSI_OPCODE_ADD, dst, src[0], - brw_negate(src[1])); + src_negate(src[1])); break; case TGSI_OPCODE_SCS: emit_op1(c, TGSI_OPCODE_SCS, - brw_writemask(dst, BRW_WRITEMASK_XY), + dst_mask(dst, BRW_WRITEMASK_XY), src[0]); break; case TGSI_OPCODE_DST: - precalc_dst(c, inst); + precalc_dst(c, dst, src[0], src[1]); break; case TGSI_OPCODE_LIT: - precalc_lit(c, inst); + precalc_lit(c, dst, src[0]); break; case TGSI_OPCODE_TEX: - precalc_tex(c, inst); + precalc_tex(c, dst, + inst->InstructionExtTexture.Texture, + src[0].file, /* sampler unit */ + src[1] ); break; case TGSI_OPCODE_TXP: - precalc_txp(c, inst); + precalc_txp(c, dst, + inst->InstructionExtTexture.Texture, + src[0].file, /* sampler unit */ + src[1] ); break; case TGSI_OPCODE_TXB: - out = emit_insn(c, inst); - out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT); + /* XXX: TXB not done + */ + precalc_tex(c, dst, + inst->InstructionExtTexture.Texture, + src[0].file, /* sampler unit */ + src[1] ); break; case TGSI_OPCODE_XPD: emit_op2(c, TGSI_OPCODE_XPD, - brw_writemask(dst, BRW_WRITEMASK_XYZ), + dst_mask(dst, BRW_WRITEMASK_XYZ), src[0], src[1]); break; case TGSI_OPCODE_KIL: emit_op1(c, TGSI_OPCODE_KIL, - brw_writemask(dst_undef(), 0), + dst_mask(dst_undef(), 0), src[0]); break; @@ -734,10 +1109,11 @@ static void emit_insn( struct brw_wm_compile *c, emit_fb_write(c); break; default: - if (brw_wm_is_scalar_result(inst->Opcode)) + if (!c->key.has_flow_control && + brw_wm_is_scalar_result(opcode)) emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]); else - emit_op(c, opcode, dst, src[0], src[1], src[2]); + emit_op3(c, opcode, dst, src[0], src[1], src[2]); break; } } @@ -746,46 +1122,70 @@ static void emit_insn( struct brw_wm_compile *c, * Initial pass for fragment program code generation. * This function is used by both the GLSL and non-GLSL paths. */ -void brw_wm_pass_fp( struct brw_wm_compile *c ) +int brw_wm_pass_fp( struct brw_wm_compile *c ) { - struct brw_fragment_program *fp = c->fp; - GLuint insn; + struct brw_fragment_shader *fs = c->fp; + struct tgsi_parse_context parse; + struct tgsi_full_instruction *inst; + struct tgsi_full_declaration *decl; + const float *imm; + GLuint size; + GLuint i; if (BRW_DEBUG & DEBUG_WM) { debug_printf("pre-fp:\n"); - tgsi_dump(fp->tokens, 0); + tgsi_dump(fs->tokens, 0); } - c->pixel_xy = brw_src_undef(); - c->delta_xy = brw_src_undef(); - c->pixel_w = brw_src_undef(); + c->fp_pixel_xy = src_undef(); + c->fp_delta_xy = src_undef(); + c->fp_pixel_w = src_undef(); c->nr_fp_insns = 0; - c->fp->tex_units_used = 0x0; + c->nr_immediates = 0; /* Loop over all instructions doing assorted simplifications and * transformations. */ - tgsi_parse_init( &parse, tokens ); + tgsi_parse_init( &parse, fs->tokens ); while( !tgsi_parse_end_of_tokens( &parse ) ) { tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: - /* If branching shader, emit preamble instructions at decl time, as - * instruction order in the shader does not correspond to the order - * instructions are executed in the wild. - * - * This is where special instructions such as WM_CINTERP, - * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to compute - * shader inputs from varying vars. + /* Turn intput declarations into special WM_* instructions. * * XXX: For non-branching shaders, consider deferring variable * initialization as late as possible to minimize register * usage. This is how the original BRW driver worked. + * + * In a branching shader, must preamble instructions at decl + * time, as instruction order in the shader does not + * correspond to the order instructions are executed in the + * wild. + * + * This is where special instructions such as WM_CINTERP, + * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to + * compute shader inputs from the payload registers and pixel + * position. */ - validate_src_regs(c, inst); - validate_dst_regs(c, inst); + decl = &parse.FullToken.FullDeclaration; + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + unsigned attrib; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + for (attrib = first; attrib <= last; attrib++) { + emit_interp(c, + attrib, + decl->Semantic.SemanticName, + decl->Declaration.Interpolate ); + } + } + break; case TGSI_TOKEN_TYPE_IMMEDIATE: @@ -795,21 +1195,36 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) * float value per instruction. Just save the data for now * and use directly later. */ + i = c->nr_immediates++; + imm = &parse.FullToken.FullImmediate.u[i].Float; + size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + + if (c->nr_immediates >= BRW_WM_MAX_CONST) + return PIPE_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < size; i++) + c->immediate[c->nr_immediates].v[i] = imm[i]; + + for (; i < 4; i++) + c->immediate[c->nr_immediates].v[i] = 0.0; + + c->immediate[c->nr_immediates].nr = size; + c->nr_immediates++; break; case TGSI_TOKEN_TYPE_INSTRUCTION: inst = &parse.FullToken.FullInstruction; - emit_insn( c, inst ); + emit_insn(c, inst); break; } } - c->brw_program = brw_finalize( c->builder ); - if (BRW_DEBUG & DEBUG_WM) { debug_printf("pass_fp:\n"); - brw_print_program( c->brw_program ); + //brw_print_program( c->fp_brw_program ); debug_printf("\n"); } + + return c->error; } -- cgit v1.2.3 From f202a34cb1eca41cf5d12bd72016f284bc81ccf8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 31 Oct 2009 18:23:14 +0000 Subject: i965g: non-glsl fragment shader path is compiling Disabled glsl code for now, probably want to clean this up somehow. --- src/gallium/drivers/i965/Makefile | 1 - src/gallium/drivers/i965/brw_wm.c | 14 +- src/gallium/drivers/i965/brw_wm.h | 10 +- src/gallium/drivers/i965/brw_wm_fp.c | 7 +- src/gallium/drivers/i965/brw_wm_glsl.c | 268 ++++++++++++++++++++------------ src/gallium/drivers/i965/brw_wm_pass0.c | 87 +++++------ src/gallium/drivers/i965/brw_wm_pass1.c | 8 +- src/gallium/drivers/i965/brw_wm_pass2.c | 27 +--- 8 files changed, 230 insertions(+), 192 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index c3dbad72ae..896cb234a6 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -47,7 +47,6 @@ C_SOURCES = \ brw_wm_debug.c \ brw_wm_emit.c \ brw_wm_fp.c \ - brw_wm_glsl.c \ brw_wm_iz.c \ brw_wm_pass0.c \ brw_wm_pass1.c \ diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 33602b59c1..4fbf9de9bb 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -177,7 +177,10 @@ static int do_wm_prog( struct brw_context *brw, */ if (fp->has_flow_control) { c->dispatch_width = 8; - brw_wm_branching_shader_emit(brw, c); + /* XXX: GLSL support + */ + exit(1); + //brw_wm_branching_shader_emit(brw, c); } else { c->dispatch_width = 16; @@ -239,18 +242,9 @@ static void brw_wm_populate_key( struct brw_context *brw, brw->curr.fragment_shader->uses_depth, key); - /* Revisit this, figure out if it's really useful, and either push - * it into the state tracker so that everyone benefits (use to - * create fs varients with TEX rather than TXP), or discard. - */ - key->proj_attrib_mask = ~0; /*brw->wm.input_size_masks[4-1];*/ - /* PIPE_NEW_RAST */ key->flat_shade = brw->curr.rast->templ.flatshade; - /* This can be determined by looking at the INTERP mode each input decl. - */ - key->linear_attrib_mask = 0; /* PIPE_NEW_BOUND_TEXTURES */ for (i = 0; i < brw->curr.num_textures; i++) { diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 8ee99420aa..48dac39756 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -56,9 +56,6 @@ #define AA_ALWAYS 2 struct brw_wm_prog_key { - unsigned proj_attrib_mask; /**< one bit per fragment program attribute */ - unsigned linear_attrib_mask; /**< linear interpolation vs perspective interp */ - GLuint source_depth_reg:3; GLuint aa_dest_stencil_reg:3; GLuint dest_depth_reg:3; @@ -73,6 +70,7 @@ struct brw_wm_prog_key { GLuint yuvtex_swap_mask:16; /* UV swaped */ GLuint vp_nr_outputs:6; + GLuint nr_inputs:6; GLuint nr_cbufs:3; GLuint has_flow_control:1; @@ -179,6 +177,12 @@ struct brw_wm_instruction { #define BRW_FILE_PAYLOAD (TGSI_FILE_COUNT) #define PAYLOAD_DEPTH (PIPE_MAX_SHADER_INPUTS) /* ?? */ +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 +#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3) + struct brw_fp_src { unsigned file:4; diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 57933afbbe..58f1d35b7d 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -46,11 +46,6 @@ #include "brw_debug.h" -#define X 0 -#define Y 1 -#define Z 2 -#define W 3 -#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3) static const char *wm_opcode_strings[] = { @@ -850,7 +845,7 @@ static GLboolean projtex( struct brw_wm_compile *c, if (src.file == TGSI_FILE_INPUT && GET_SWZ(src.swizzle, W) == W && - (c->key.proj_attrib_mask & (1 << src.index)) == 0) + c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE) return GL_FALSE; return GL_TRUE; diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index cdc10484a6..a06b0a446e 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -1,10 +1,13 @@ +#include "util/u_math.h" + + #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, GLuint component); @@ -63,7 +66,7 @@ alloc_grf(struct brw_wm_compile *c) /* really, no free GRF regs found */ if (!c->out_of_regs) { /* print warning once per compilation */ - _mesa_warning(NULL, "i965: ran out of registers for fragment program"); + debug_printf("%s: ran out of registers for fragment program", __FUNCTION__); c->out_of_regs = GL_TRUE; } @@ -154,20 +157,18 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, { struct brw_reg reg; switch (file) { - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - case PROGRAM_UNIFORM: - file = PROGRAM_STATE_VAR; - break; - case PROGRAM_UNDEFINED: + case TGSI_FILE_NULL: return brw_null_reg(); - case PROGRAM_TEMPORARY: - case PROGRAM_INPUT: - case PROGRAM_OUTPUT: - case PROGRAM_PAYLOAD: + + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + case BRW_FILE_PAYLOAD: break; + default: - debug_printf("Unexpected file in get_reg()"); + debug_printf("%s: Unexpected file type\n", __FUNCTION__); return brw_null_reg(); } @@ -204,6 +205,76 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, + +/** + * Find first/last instruction that references each temporary register. + */ +GLboolean +_mesa_find_temp_intervals(const struct prog_instruction *instructions, + GLuint numInstructions, + GLint intBegin[MAX_PROGRAM_TEMPS], + GLint intEnd[MAX_PROGRAM_TEMPS]) +{ + struct loop_info + { + GLuint Start, End; /**< Start, end instructions of loop */ + }; + struct loop_info loopStack[MAX_LOOP_NESTING]; + GLuint loopStackDepth = 0; + GLuint i; + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + intBegin[i] = intEnd[i] = -1; + } + + /* Scan instructions looking for temporary registers */ + for (i = 0; i < numInstructions; i++) { + const struct prog_instruction *inst = instructions + i; + if (inst->Opcode == OPCODE_BGNLOOP) { + loopStack[loopStackDepth].Start = i; + loopStack[loopStackDepth].End = inst->BranchTarget; + loopStackDepth++; + } + else if (inst->Opcode == OPCODE_ENDLOOP) { + loopStackDepth--; + } + else if (inst->Opcode == OPCODE_CAL) { + return GL_FALSE; + } + else { + const GLuint numSrc = 3; + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + const GLuint index = inst->SrcReg[j].Index; + if (inst->SrcReg[j].RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + const GLuint index = inst->DstReg.Index; + if (inst->DstReg.RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + } + + return GL_TRUE; +} + + /** * This is called if we run out of GRF registers. Examine the live intervals * of temp regs in the program and free those which won't be used again. @@ -211,29 +282,29 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component, static void reclaim_temps(struct brw_wm_compile *c) { - GLint intBegin[MAX_PROGRAM_TEMPS]; - GLint intEnd[MAX_PROGRAM_TEMPS]; + GLint intBegin[BRW_WM_MAX_TEMPS]; + GLint intEnd[BRW_WM_MAX_TEMPS]; int index; /*printf("Reclaim temps:\n");*/ - _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns, + _mesa_find_temp_intervals(c->fp_instructions, c->nr_fp_insns, intBegin, intEnd); - for (index = 0; index < MAX_PROGRAM_TEMPS; index++) { + for (index = 0; index < BRW_WM_MAX_TEMPS; index++) { if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) { /* program temp[i] can be freed */ int component; /*printf(" temp[%d] is dead\n", index);*/ for (component = 0; component < 4; component++) { - if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) { - int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr; + if (c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited) { + int r = c->wm_regs[TGSI_FILE_TEMPORARY][index][component].reg.nr; release_grf(c, r); /* printf(" Reclaim temp %d, reg %d at inst %d\n", index, r, c->cur_inst); */ - c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE; + c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited = GL_FALSE; } } } @@ -264,7 +335,7 @@ static void prealloc_reg(struct brw_wm_compile *c) reg = brw_vec8_grf(i * 2, 0); else reg = brw_vec8_grf(0, 0); - set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg); + set_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, i, reg); } reg_index += 2 * c->key.nr_depth_regs; @@ -306,7 +377,7 @@ static void prealloc_reg(struct brw_wm_compile *c) * Constants will be copied in prepare_constant_buffer() */ c->prog_data.param[index] = &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + set_reg(c, TGSI_FILE_STATE_VAR, i, j, reg); } } /* number of constant regs used (each reg is float[8]) */ @@ -330,7 +401,7 @@ static void prealloc_reg(struct brw_wm_compile *c) urb_read_length = reg_index; reg = brw_vec8_grf(reg_index, 0); for (j = 0; j < 4; j++) - set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg); + set_reg(c, TGSI_FILE_PAYLOAD, fp_input, j, reg); } if (c->key.nr_vp_outputs > i) { reg_index += 2; @@ -354,7 +425,7 @@ static void prealloc_reg(struct brw_wm_compile *c) prealloc_grf(c, 127); for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; + const struct brw_fp_instruction *inst = &c->fp_instructions[i]; struct brw_reg dst[4]; switch (inst->Opcode) { @@ -397,7 +468,7 @@ static void prealloc_reg(struct brw_wm_compile *c) * the three GRF slots. */ static void fetch_constants(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint i; @@ -405,9 +476,8 @@ static void fetch_constants(struct brw_wm_compile *c, /* loop over instruction src regs */ for (i = 0; i < 3; i++) { const struct prog_src_register *src = &inst->SrcReg[i]; - if (src->File == PROGRAM_STATE_VAR || - src->File == PROGRAM_CONSTANT || - src->File == PROGRAM_UNIFORM) { + if (src->File == TGSI_FILE_IMMEDIATE || + src->File == TGSI_FILE_CONSTANT) { c->current_const[i].index = src->Index; #if 0 @@ -431,7 +501,7 @@ static void fetch_constants(struct brw_wm_compile *c, * Convert Mesa dst register to brw register. */ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, GLuint component) { const int nr = 1; @@ -442,7 +512,7 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c, static struct brw_reg get_src_reg_const(struct brw_wm_compile *c, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, GLuint srcRegIndex, GLuint component) { /* We should have already fetched the constant from the constant @@ -462,7 +532,7 @@ get_src_reg_const(struct brw_wm_compile *c, const_reg = stride(const_reg, 0, 1, 0); const_reg.subnr = component * 4; - if (src->Negate & (1 << component)) + if (src->Negate) const_reg = negate(const_reg); if (src->Abs) const_reg = brw_abs(const_reg); @@ -483,7 +553,7 @@ get_src_reg_const(struct brw_wm_compile *c, * Convert Mesa src register to brw register. */ static struct brw_reg get_src_reg(struct brw_wm_compile *c, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, GLuint srcRegIndex, GLuint channel) { const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; @@ -499,9 +569,9 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, } if (c->fp->use_const_buffer && - (src->File == PROGRAM_STATE_VAR || - src->File == PROGRAM_CONSTANT || - src->File == PROGRAM_UNIFORM)) { + (src->File == TGSI_FILE_STATE_VAR || + src->File == TGSI_FILE_CONSTANT || + src->File == TGSI_FILE_UNIFORM)) { return get_src_reg_const(c, inst, srcRegIndex, component); } else { @@ -513,26 +583,26 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, /** - * Same as \sa get_src_reg() but if the register is a literal, emit - * a brw_reg encoding the literal. - * Note that a brw instruction only allows one src operand to be a literal. + * Same as \sa get_src_reg() but if the register is a immediate, emit + * a brw_reg encoding the immediate. + * Note that a brw instruction only allows one src operand to be a immediate. * For instructions with more than one operand, only the second can be a - * literal. This means that we treat some literals as constants/uniforms - * (which why PROGRAM_CONSTANT is checked in fetch_constants()). + * immediate. This means that we treat some immediates as constants + * (which why TGSI_FILE_IMMEDIATE is checked in fetch_constants()). * */ static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, GLuint srcRegIndex, GLuint channel) { const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; - if (src->File == PROGRAM_CONSTANT) { - /* a literal */ + if (src->File == TGSI_FILE_IMMEDIATE) { + /* an immediate */ const int component = GET_SWZ(src->Swizzle, channel); const GLfloat *param = c->fp->program.Base.Parameters->ParameterValues[src->Index]; GLfloat value = param[component]; - if (src->Negate & (1 << channel)) + if (src->Negate) value = -value; if (src->Abs) value = FABSF(value); @@ -612,7 +682,7 @@ static void invoke_subroutine( struct brw_wm_compile *c, } static void emit_trunc( struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -630,7 +700,7 @@ static void emit_trunc( struct brw_wm_compile *c, } static void emit_mov( struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -650,7 +720,7 @@ static void emit_mov( struct brw_wm_compile *c, } static void emit_pixel_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW); @@ -680,7 +750,7 @@ static void emit_pixel_xy(struct brw_wm_compile *c, } static void emit_delta_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_reg r1 = brw_vec1_grf(1, 0); struct brw_reg dst0, dst1, src0, src1; @@ -740,7 +810,7 @@ static void fire_fb_write( struct brw_wm_compile *c, } static void emit_fb_write(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; int nr = 2; @@ -808,7 +878,7 @@ static void emit_fb_write(struct brw_wm_compile *c, } static void emit_pixel_w( struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -838,7 +908,7 @@ static void emit_pixel_w( struct brw_wm_compile *c, } static void emit_linterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -867,7 +937,7 @@ static void emit_linterp(struct brw_wm_compile *c, } static void emit_cinterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -893,7 +963,7 @@ static void emit_cinterp(struct brw_wm_compile *c, } static void emit_pinterp(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -927,7 +997,7 @@ static void emit_pinterp(struct brw_wm_compile *c, /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */ static void emit_frontfacing(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD); @@ -956,7 +1026,7 @@ static void emit_frontfacing(struct brw_wm_compile *c, } static void emit_xpd(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { int i; struct brw_compile *p = &c->func; @@ -981,13 +1051,13 @@ static void emit_xpd(struct brw_wm_compile *c, } static void emit_dp3(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_reg src0[3], src1[3], dst; int i; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; @@ -1008,13 +1078,13 @@ static void emit_dp3(struct brw_wm_compile *c, } static void emit_dp4(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; @@ -1035,13 +1105,13 @@ static void emit_dp4(struct brw_wm_compile *c, } static void emit_dph(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_reg src0[4], src1[4], dst; int i; struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; @@ -1067,12 +1137,12 @@ static void emit_dph(struct brw_wm_compile *c, * register's X, Y, Z and W channels (subject to writemasking of course). */ static void emit_math1(struct brw_wm_compile *c, - const struct prog_instruction *inst, GLuint func) + const struct brw_fp_instruction *inst, GLuint func) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; @@ -1095,43 +1165,43 @@ static void emit_math1(struct brw_wm_compile *c, } static void emit_rcp(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_INV); } static void emit_rsq(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); } static void emit_sin(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); } static void emit_cos(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_COS); } static void emit_ex2(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); } static void emit_lg2(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); } static void emit_add(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -1150,7 +1220,7 @@ static void emit_add(struct brw_wm_compile *c, } static void emit_arl(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, addr_reg; @@ -1164,7 +1234,7 @@ static void emit_arl(struct brw_wm_compile *c, static void emit_mul(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, src1, dst; @@ -1183,7 +1253,7 @@ static void emit_mul(struct brw_wm_compile *c, } static void emit_frc(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -1202,7 +1272,7 @@ static void emit_frc(struct brw_wm_compile *c, } static void emit_flr(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg src0, dst; @@ -1221,7 +1291,7 @@ static void emit_flr(struct brw_wm_compile *c, static void emit_min_max(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; const GLuint mask = inst->DstReg.WriteMask; @@ -1269,12 +1339,12 @@ static void emit_min_max(struct brw_wm_compile *c, } static void emit_pow(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst, src0, src1; GLuint mask = inst->DstReg.WriteMask; - int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1; + int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1; if (!(mask & WRITEMASK_XYZW)) return; @@ -1299,7 +1369,7 @@ static void emit_pow(struct brw_wm_compile *c, } static void emit_lrp(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1352,7 +1422,7 @@ static void emit_kil(struct brw_wm_compile *c) } static void emit_mad(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1375,7 +1445,7 @@ static void emit_mad(struct brw_wm_compile *c, } static void emit_sop(struct brw_wm_compile *c, - const struct prog_instruction *inst, GLuint cond) + const struct brw_fp_instruction *inst, GLuint cond) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1399,37 +1469,37 @@ static void emit_sop(struct brw_wm_compile *c, } static void emit_slt(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_L); } static void emit_sle(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_LE); } static void emit_sgt(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_G); } static void emit_sge(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_GE); } static void emit_seq(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_EQ); } static void emit_sne(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { emit_sop(c, inst, BRW_CONDITIONAL_NEQ); } @@ -1459,7 +1529,7 @@ static INLINE struct brw_reg odd_bytes( struct brw_reg reg ) static void emit_wpos_xy(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; GLuint mask = inst->DstReg.WriteMask; @@ -1494,25 +1564,25 @@ static void emit_wpos_xy(struct brw_wm_compile *c, BIAS on SIMD8 not working yet... */ static void emit_txb(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; - /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ - const GLuint unit = inst->TexSrcUnit; + /* Note: tex_unit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->tex_unit; GLuint i; GLuint msg_type; assert(unit < BRW_MAX_TEX_UNIT); - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) src[i] = get_src_reg(c, inst, 0, i); - switch (inst->TexSrcTarget) { + switch (inst->tex_target) { case TEXTURE_1D_INDEX: brw_MOV(p, brw_message_reg(2), src[0]); /* s coord */ brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); /* t coord */ @@ -1561,12 +1631,12 @@ static void emit_txb(struct brw_wm_compile *c, static void emit_tex(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; - /* Note: TexSrcUnit was already looked up through SamplerTextures[] */ - const GLuint unit = inst->TexSrcUnit; + /* Note: tex_unit was already looked up through SamplerTextures[] */ + const GLuint unit = inst->tex_unit; GLuint msg_len; GLuint i, nr; GLuint emit; @@ -1575,14 +1645,14 @@ static void emit_tex(struct brw_wm_compile *c, assert(unit < BRW_MAX_TEX_UNIT); - payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); for (i = 0; i < 4; i++) dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) src[i] = get_src_reg(c, inst, 0, i); - switch (inst->TexSrcTarget) { + switch (inst->tex_target) { case TEXTURE_1D_INDEX: emit = WRITEMASK_X; nr = 1; @@ -1657,7 +1727,7 @@ static void post_wm_emit( struct brw_wm_compile *c ) static void get_argument_regs(struct brw_wm_compile *c, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, int index, struct brw_reg *regs, int mask) @@ -1686,7 +1756,7 @@ static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_ brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); for (i = 0; i < c->nr_fp_insns; i++) { - const struct prog_instruction *inst = &c->prog_instructions[i]; + const struct brw_fp_instruction *inst = &c->fp_instructions[i]; int dst_flags; struct brw_reg args[3][4], dst[4]; int j; diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index d8b9028927..7b18335dec 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -28,9 +28,10 @@ * Authors: * Keith Whitwell */ - -#include "brw_context.h" +#include "util/u_memory.h" + +#include "brw_debug.h" #include "brw_wm.h" @@ -133,19 +134,19 @@ static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c, /* Search for an existing const value matching the request: */ for (i = 0; i < c->nr_imm_refs; i++) { - if (c->imm_ref[i].imm_val == *imm1f) + if (c->imm_ref[i].imm1f == *imm1f) return c->imm_ref[i].ref; } /* Else try to add a new one: */ - if (c->nr_imm_refs < BRW_WM_MAX_IMM) { + if (c->nr_imm_refs < Elements(c->imm_ref)) { GLuint i = c->nr_imm_refs++; /* An immediate is a special type of parameter: */ - c->imm_ref[i].imm_val = *imm_val; - c->imm_ref[i].ref = get_param_ref(c, imm_val); + c->imm_ref[i].imm1f = *imm1f; + c->imm_ref[i].ref = get_param_ref(c, imm1f); return c->imm_ref[i].ref; } @@ -180,7 +181,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, break; case TGSI_FILE_IMMEDIATE: - ref = get_imm_ref(c, &plist->ParameterValues[idx][component]); + ref = get_imm_ref(c, &c->immediate[idx].v[component]); break; default: @@ -205,16 +206,16 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, static void pass0_set_dst( struct brw_wm_compile *c, struct brw_wm_instruction *out, - const struct prog_instruction *inst, + const struct brw_fp_instruction *inst, GLuint writemask ) { - const struct prog_dst_register *dst = &inst->DstReg; + const struct brw_fp_dst dst = inst->dst; GLuint i; for (i = 0; i < 4; i++) { if (writemask & (1<dst[i] = get_value(c); - pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]); + pass0_set_fpreg_value(c, dst.file, dst.index, i, out->dst[i]); } } @@ -223,27 +224,15 @@ static void pass0_set_dst( struct brw_wm_compile *c, static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, - struct prog_src_register src, + struct brw_fp_src src, GLuint i ) { - GLuint component = GET_SWZ(src.Swizzle,i); - const struct brw_wm_ref *src_ref; - static const GLfloat const_zero = 0.0; - static const GLfloat const_one = 1.0; - - if (component == SWIZZLE_ZERO) - src_ref = get_imm_ref(c, &const_zero); - else if (component == SWIZZLE_ONE) - src_ref = get_imm_ref(c, &const_one); - else - src_ref = pass0_get_reg(c, src.File, src.Index, component); - - return src_ref; + return pass0_get_reg(c, src.file, src.index, GET_SWZ(src.swizzle,i)); } static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, - struct prog_src_register src, + struct brw_fp_src src, GLuint i, struct brw_wm_instruction *insn) { @@ -259,10 +248,10 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, newref->value->lastuse = newref; } - if (src.Negate & (1 << i)) + if (src.negate) newref->hw_reg.negate ^= 1; - if (src.Abs) { + if (src.abs) { newref->hw_reg.negate = 0; newref->hw_reg.abs = 1; } @@ -273,21 +262,21 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c, static void translate_insn(struct brw_wm_compile *c, - const struct prog_instruction *inst) + const struct brw_fp_instruction *inst) { struct brw_wm_instruction *out = get_instruction(c); - GLuint writemask = inst->dst.WriteMask; - GLuint nr_args = brw_wm_nr_args(inst->Opcode); + GLuint writemask = inst->dst.writemask; + GLuint nr_args = brw_wm_nr_args(inst->opcode); GLuint i, j; /* Copy some data out of the instruction */ - out->opcode = inst->Opcode; - out->saturate = inst->dst.Saturate; - out->tex_unit = inst->TexSrcUnit; - out->tex_target = inst->TexSrcTarget; - out->eot = inst->Aux & 1; - out->target = inst->Aux >> 1; + out->opcode = inst->opcode; + out->saturate = inst->dst.saturate; + out->tex_unit = inst->tex_unit; + out->tex_target = inst->tex_target; + out->eot = inst->eot; //inst->Aux & 1; + out->target = inst->target; //inst->Aux >> 1; /* Args: */ @@ -308,10 +297,10 @@ translate_insn(struct brw_wm_compile *c, * Optimize moves and swizzles away: */ static void pass0_precalc_mov( struct brw_wm_compile *c, - const struct prog_instruction *inst ) + const struct brw_fp_instruction *inst ) { - const struct prog_dst_register *dst = &inst->DstReg; - GLuint writemask = inst->DstReg.WriteMask; + const struct brw_fp_dst dst = inst->dst; + GLuint writemask = dst.writemask; struct brw_wm_ref *refs[4]; GLuint i; @@ -323,11 +312,11 @@ static void pass0_precalc_mov( struct brw_wm_compile *c, * one loop and the above case was incorrectly handled. */ for (i = 0; i < 4; i++) { - refs[i] = get_new_ref(c, inst->SrcReg[0], i, NULL); + refs[i] = get_new_ref(c, inst->src[0], i, NULL); } for (i = 0; i < 4; i++) { if (writemask & (1 << i)) { - pass0_set_fpreg_ref( c, dst->File, dst->Index, i, refs[i]); + pass0_set_fpreg_ref( c, dst.file, dst.index, i, refs[i]); } } } @@ -341,12 +330,12 @@ static void pass0_init_payload( struct brw_wm_compile *c ) for (i = 0; i < 4; i++) { GLuint j = i >= c->key.nr_depth_regs ? 0 : i; - pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, + pass0_set_fpreg_value( c, BRW_FILE_PAYLOAD, PAYLOAD_DEPTH, i, &c->payload.depth[j] ); } - for (i = 0; i < FRAG_ATTRIB_MAX; i++) - pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0, + for (i = 0; i < c->key.nr_inputs; i++) + pass0_set_fpreg_value( c, BRW_FILE_PAYLOAD, i, 0, &c->payload.input_interp[i] ); } @@ -360,7 +349,7 @@ static void pass0_init_payload( struct brw_wm_compile *c ) * * Translate away swizzling and eliminate non-saturating moves. * - * Translate instructions from Mesa's prog_instruction structs to our + * Translate instructions from our fp_instruction structs to our * internal brw_wm_instruction representation. */ void brw_wm_pass0( struct brw_wm_compile *c ) @@ -374,13 +363,13 @@ void brw_wm_pass0( struct brw_wm_compile *c ) pass0_init_payload(c); for (insn = 0; insn < c->nr_fp_insns; insn++) { - const struct prog_instruction *inst = &c->prog_instructions[insn]; + const struct brw_fp_instruction *inst = &c->fp_instructions[insn]; /* Optimize away moves, otherwise emit translated instruction: */ - switch (inst->Opcode) { - case OPCODE_MOV: - if (!inst->dst.Saturate) { + switch (inst->opcode) { + case TGSI_OPCODE_MOV: + if (!inst->dst.saturate) { pass0_precalc_mov(c, inst); } else { diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index b0356b1bd5..09ad2b8f5b 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -30,8 +30,8 @@ */ -#include "brw_context.h" #include "brw_wm.h" +#include "brw_debug.h" static GLuint get_tracked_mask(struct brw_wm_compile *c, @@ -223,11 +223,11 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: - read0 = get_texcoord_mask(inst->tex_idx); + read0 = get_texcoord_mask(inst->tex_target); break; case TGSI_OPCODE_TXB: - read0 = get_texcoord_mask(inst->tex_idx) | BRW_WRITEMASK_W; + read0 = get_texcoord_mask(inst->tex_target) | BRW_WRITEMASK_W; break; case WM_WPOSXY: @@ -276,7 +276,7 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case TGSI_OPCODE_DST: case WM_FRONTFACING: - case TGSI_OPCODE_KIL_NV: + case TGSI_OPCODE_KILP: default: break; } diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c index a19ca62328..d3d678a5e6 100644 --- a/src/gallium/drivers/i965/brw_wm_pass2.c +++ b/src/gallium/drivers/i965/brw_wm_pass2.c @@ -30,7 +30,7 @@ */ -#include "brw_context.h" +#include "brw_debug.h" #include "brw_wm.h" @@ -82,27 +82,14 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], i++); - for (j = 0; j < FRAG_ATTRIB_MAX; j++) { - if (c->key.vp_outputs_written & (1<= VERT_RESULT_VAR0) - fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0); - else if (j <= VERT_RESULT_TEX7) - fp_index = j; - else - fp_index = -1; - - nr_interp_regs++; - if (fp_index >= 0) - prealloc_reg(c, &c->payload.input_interp[fp_index], i++); - } + for (j = 0; j < c->key.vp_nr_outputs; j++) { + prealloc_reg(c, &c->payload.input_interp[j], i++); } assert(nr_interp_regs >= 1); c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = nr_interp_regs * 2; + c->prog_data.urb_read_length = c->key.vp_nr_outputs * 2; c->prog_data.curb_read_length = c->nr_creg * 2; c->max_wm_grf = i * 2; @@ -308,9 +295,9 @@ void brw_wm_pass2( struct brw_wm_compile *c ) /* Allocate registers to hold results: */ switch (inst->opcode) { - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXP: alloc_contiguous_dest(c, inst->dst, 4, insn); break; -- cgit v1.2.3 From e7b76000826ff4faf8bf6a834d55b50a2784c9f2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 31 Oct 2009 20:05:19 +0000 Subject: i965g: more work on compilation --- src/gallium/auxiliary/util/u_math.h | 13 ++ src/gallium/drivers/i965/brw_context.h | 10 +- src/gallium/drivers/i965/brw_pipe_sampler.c | 32 ++++ src/gallium/drivers/i965/brw_structs.h | 8 +- src/gallium/drivers/i965/brw_wm_sampler_state.c | 201 +++++++++--------------- src/gallium/drivers/i965/brw_wm_state.c | 76 ++++----- 6 files changed, 169 insertions(+), 171 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 75b075f160..c13bf96177 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -539,6 +539,19 @@ do { \ #endif +static INLINE uint32_t util_unsigned_fixed(float value, unsigned frac_bits) +{ + value *= (1< */ +#include "util/u_math.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_screen.h" /* Samplers aren't strictly wm state from the hardware's perspective, @@ -41,41 +43,6 @@ -/* The brw (and related graphics cores) do not support GL_CLAMP. The - * Intel drivers for "other operating systems" implement GL_CLAMP as - * GL_CLAMP_TO_EDGE, so the same is done here. - */ -static GLuint translate_wrap_mode( GLenum wrap ) -{ - switch( wrap ) { - case GL_REPEAT: - return BRW_TEXCOORDMODE_WRAP; - case GL_CLAMP: - return BRW_TEXCOORDMODE_CLAMP; - case GL_CLAMP_TO_EDGE: - return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */ - case GL_CLAMP_TO_BORDER: - return BRW_TEXCOORDMODE_CLAMP_BORDER; - case GL_MIRRORED_REPEAT: - return BRW_TEXCOORDMODE_MIRROR; - default: - return BRW_TEXCOORDMODE_WRAP; - } -} - - -static GLuint U_FIXED(GLfloat value, GLuint frac_bits) -{ - value *= (1<tex_target == GL_TEXTURE_CUBE_MAP) { - if (key->seamless_cube_map && - (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) { - sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; - sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; - sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; - } else { - sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; - sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; - sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; - } - } else if (key->tex_target == GL_TEXTURE_1D) { - /* There's a bug in 1D texture sampling - it actually pays - * attention to the wrap_t value, though it should not. - * Override the wrap_t value here to GL_REPEAT to keep - * any nonexistent border pixels from floating in. - */ - sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; - } - - - - sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ -} - /** Sets up the cache key for sampler state for all texture units */ static void brw_wm_sampler_populate_key(struct brw_context *brw, struct wm_sampler_key *key) { - int nr = MIN2(brw->curr.number_textures, - brw->curr.number_samplers); int i; memset(key, 0, sizeof(*key)); - for (i = 0; i < nr; i++) { + key->sampler_count = MIN2(brw->curr.num_textures, + brw->curr.num_samplers); + + for (i = 0; i < key->sampler_count; i++) { const struct brw_texture *tex = brw->curr.texture[i]; const struct brw_sampler *sampler = brw->curr.sampler[i]; - struct wm_sampler_entry *entry = &key->sampler[i]; + struct brw_sampler_state *entry = &key->sampler[i]; - entry->tex_target = texObj->Target; - entry->seamless_cube_map = FALSE; /* XXX: add this to gallium */ entry->ss0 = sampler->ss0; entry->ss1 = sampler->ss1; + entry->ss2.default_color_pointer = brw->wm.sdc_bo[i]->offset >> 5; /* reloc */ entry->ss3 = sampler->ss3; + /* Cube-maps on 965 and later must use the same wrap mode for all 3 + * coordinate dimensions. Futher, only CUBE and CLAMP are valid. + */ + if (tex->base.target == PIPE_TEXTURE_CUBE) { + if (FALSE && + (sampler->ss0.min_filter != BRW_MAPFILTER_NEAREST || + sampler->ss0.mag_filter != BRW_MAPFILTER_NEAREST)) { + entry->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; + entry->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; + entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; + } else { + entry->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + entry->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP; + } + } else if (tex->base.target == PIPE_TEXTURE_1D) { + /* There's a bug in 1D texture sampling - it actually pays + * attention to the wrap_t value, though it should not. + * Override the wrap_t value here to GL_REPEAT to keep + * any nonexistent border pixels from floating in. + */ + entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + } + } +} + + +static void +brw_wm_sampler_update_default_colors(struct brw_context *brw) +{ + int nr = MIN2(brw->curr.num_textures, + brw->curr.num_samplers); + int i; + + for (i = 0; i < nr; i++) { + const struct brw_texture *tex = brw->curr.texture[i]; + const struct brw_sampler *sampler = brw->curr.sampler[i]; + brw->sws->bo_unreference(brw->wm.sdc_bo[i]); - if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) { + + if (pf_is_depth_or_stencil(tex->base.format)) { float bordercolor[4] = { - texObj->BorderColor[0], - texObj->BorderColor[0], - texObj->BorderColor[0], - texObj->BorderColor[0] + sampler->templ.border_color[0], + sampler->templ.border_color[0], + sampler->templ.border_color[0], + sampler->templ.border_color[0] }; /* GL specs that border color for depth textures is taken from the * R channel, while the hardware uses A. Spam R into all the @@ -183,22 +137,21 @@ brw_wm_sampler_populate_key(struct brw_context *brw, */ brw->wm.sdc_bo[i] = upload_default_color(brw, bordercolor); } else { - brw->wm.sdc_bo[i] = upload_default_color(brw, texObj->BorderColor); + brw->wm.sdc_bo[i] = upload_default_color(brw, sampler->templ.border_color); } } - - key->sampler_count = nr; } -/* All samplers must be uploaded in a single contiguous array, which - * complicates various things. However, this is still too confusing - - * FIXME: simplify all the different new texture state flags. + + +/* All samplers must be uploaded in a single contiguous array. */ -static void upload_wm_samplers( struct brw_context *brw ) +static int upload_wm_samplers( struct brw_context *brw ) { struct wm_sampler_key key; int i; + brw_wm_sampler_update_default_colors(brw); brw_wm_sampler_populate_key(brw, &key); if (brw->wm.sampler_count != key.sampler_count) { @@ -209,7 +162,7 @@ static void upload_wm_samplers( struct brw_context *brw ) brw->sws->bo_unreference(brw->wm.sampler_bo); brw->wm.sampler_bo = NULL; if (brw->wm.sampler_count == 0) - return; + return 0; brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER, &key, sizeof(key), @@ -220,41 +173,29 @@ static void upload_wm_samplers( struct brw_context *brw ) * cache. */ if (brw->wm.sampler_bo == NULL) { - struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; - - memset(sampler, 0, sizeof(sampler)); - for (i = 0; i < key.sampler_count; i++) { - if (brw->wm.sdc_bo[i] == NULL) - continue; - - brw_update_sampler_state(&key.sampler[i], brw->wm.sdc_bo[i], - &sampler[i]); - } - brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER, &key, sizeof(key), brw->wm.sdc_bo, key.sampler_count, - &sampler, sizeof(sampler), + &key.sampler, sizeof(key.sampler), NULL, NULL); /* Emit SDC relocations */ - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - if (!ctx->Texture.Unit[i]._ReallyEnabled) - continue; - - dri_bo_emit_reloc(brw->wm.sampler_bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0, - i * sizeof(struct brw_sampler_state) + - offsetof(struct brw_sampler_state, ss2), - brw->wm.sdc_bo[i]); + for (i = 0; i < key.sampler_count; i++) { + brw->sws->bo_emit_reloc(brw->wm.sampler_bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + i * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + brw->wm.sdc_bo[i]); } } + + return 0; } const struct brw_tracked_state brw_wm_samplers = { .dirty = { - .mesa = PIPE_NEW_BOUND_TEXTURES | PIPE_NEW_SAMPLER, + .mesa = PIPE_NEW_BOUND_TEXTURES | PIPE_NEW_SAMPLERS, .brw = 0, .cache = 0 }, diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index 1898f38cef..f161de9b40 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -29,12 +29,14 @@ * Keith Whitwell */ - +#include "util/u_math.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" #include "brw_wm.h" +#include "brw_debug.h" +#include "brw_pipe_rast.h" /*********************************************************************** * WM unit - fragment programs and rasterization @@ -60,8 +62,7 @@ struct brw_wm_unit_key { static void wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { - const struct gl_fragment_program *fp = brw->fragment_program; - const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; + const struct brw_fragment_shader *fp = brw->curr.fragment_shader; memset(key, 0, sizeof(*key)); @@ -82,7 +83,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; key->curb_entry_read_length = brw->wm.prog_data->curb_read_length; key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; - key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024); + key->total_scratch = align(brw->wm.prog_data->total_scratch, 1024); /* BRW_NEW_URB_FENCE */ key->urb_size = brw->urb.vsize; @@ -96,39 +97,42 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /* CACHE_NEW_SAMPLER */ key->sampler_count = brw->wm.sampler_count; - /* _NEW_POLYGONSTIPPLE */ - key->polygon_stipple = ctx->Polygon.StippleFlag; + /* PIPE_NEW_RAST */ + key->polygon_stipple = brw->curr.rast->templ.poly_stipple_enable; - /* BRW_NEW_FRAGMENT_PROGRAM */ - key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; + /* PIPE_NEW_FRAGMENT_PROGRAM */ + key->uses_depth = fp->uses_depth; + key->computes_depth = fp->info.writes_z; - /* as far as we can tell */ - key->computes_depth = - (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0; /* PIPE_NEW_DEPTH_BUFFER + * * Override for NULL depthbuffer case, required by the Pixel Shader Computed * Depth field. */ if (brw->curr.fb.zsbuf == NULL) key->computes_depth = 0; - /* _NEW_COLOR */ - key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; - key->has_flow_control = bfp->has_flow_control; + /* PIPE_NEW_DEPTH_STENCIL_ALPHA */ + key->uses_kill = (fp->info.uses_kill || + brw->curr.zstencil->cc3.alpha_test); + + key->has_flow_control = fp->has_flow_control; /* temporary sanity check assertion */ - ASSERT(bfp->has_flow_control == brw_wm_has_flow_control(fp)); + assert(fp->has_flow_control == 0); - /* _NEW_QUERY */ + /* PIPE_NEW_QUERY */ key->stats_wm = (brw->query.stats_wm != 0); - /* _NEW_LINE */ - key->line_stipple = ctx->Line.StippleFlag; + /* PIPE_NEW_RAST */ + key->line_stipple = brw->curr.rast->templ.line_stipple_enable; + - /* _NEW_POLYGON */ - key->offset_enable = ctx->Polygon.OffsetFill; - key->offset_units = ctx->Polygon.OffsetUnits; - key->offset_factor = ctx->Polygon.OffsetFactor; + key->offset_enable = (brw->curr.rast->templ.offset_cw || + brw->curr.rast->templ.offset_ccw); + + key->offset_units = brw->curr.rast->templ.offset_units; + key->offset_factor = brw->curr.rast->templ.offset_scale; } /** @@ -143,7 +147,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, memset(&wm, 0, sizeof(wm)); - wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; + wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -225,7 +229,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, NULL, NULL); /* Emit WM program relocation */ - dri_bo_emit_reloc(bo, + brw->sws->bo_emit_reloc(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, wm.thread0.grf_reg_count << 1, offsetof(struct brw_wm_unit_state, thread0), @@ -233,7 +237,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, /* Emit scratch space relocation */ if (key->total_scratch != 0) { - dri_bo_emit_reloc(bo, + brw->sws->bo_emit_reloc(bo, 0, 0, wm.thread2.per_thread_scratch_space, offsetof(struct brw_wm_unit_state, thread2), @@ -242,7 +246,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, /* Emit sampler state relocation */ if (key->sampler_count != 0) { - dri_bo_emit_reloc(bo, + brw->sws->bo_emit_reloc(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), offsetof(struct brw_wm_unit_state, wm4), @@ -253,7 +257,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, } -static void upload_wm_unit( struct brw_context *brw ) +static int upload_wm_unit( struct brw_context *brw ) { struct brw_wm_unit_key key; struct brw_winsys_buffer *reloc_bufs[3]; @@ -291,19 +295,19 @@ static void upload_wm_unit( struct brw_context *brw ) if (brw->wm.state_bo == NULL) { brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs); } + + return 0; } const struct brw_tracked_state brw_wm_unit = { .dirty = { - .mesa = (PIPE_NEW_DEPTH_BUFFER | - _NEW_POLYGON | - _NEW_POLYGONSTIPPLE | - _NEW_LINE | - _NEW_COLOR | - _NEW_QUERY), - - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_CURBE_OFFSETS | + .mesa = (PIPE_NEW_FRAGMENT_SHADER | + PIPE_NEW_DEPTH_BUFFER | + PIPE_NEW_RAST | + PIPE_NEW_DEPTH_STENCIL_ALPHA | + PIPE_NEW_QUERY), + + .brw = (BRW_NEW_CURBE_OFFSETS | BRW_NEW_NR_WM_SURFACES), .cache = (CACHE_NEW_WM_PROG | -- cgit v1.2.3 From 9b18ca095503eb80f02db55baf9c35aa69ae1cc9 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 1 Nov 2009 12:08:14 +0000 Subject: i965g: more work on compilation -- surface management --- src/gallium/drivers/i965/brw_context.h | 11 +- src/gallium/drivers/i965/brw_defines.h | 5 + src/gallium/drivers/i965/brw_screen.h | 22 +- src/gallium/drivers/i965/brw_screen_surface.c | 117 ++++ src/gallium/drivers/i965/brw_screen_texture.c | 215 ++++++++ src/gallium/drivers/i965/brw_state.h | 16 - src/gallium/drivers/i965/brw_state_cache.c | 3 +- src/gallium/drivers/i965/brw_structs.h | 12 +- src/gallium/drivers/i965/brw_wm_surface_state.c | 690 ++++-------------------- 9 files changed, 474 insertions(+), 617 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 8067e20c96..471855ab63 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -146,6 +146,8 @@ struct brw_blend_state { struct brw_cc3 cc3; struct brw_cc5 cc5; struct brw_cc6 cc6; + + struct brw_surf_ss0 ss0; }; @@ -501,15 +503,14 @@ struct brw_context const struct brw_rasterizer_state *rast; const struct brw_depth_stencil_state *zstencil; - const struct brw_texture *texture[PIPE_MAX_SAMPLERS]; const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS]; - unsigned num_textures; + const struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned num_vertex_elements; unsigned num_samplers; - - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + struct brw_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned num_vertex_elements; + unsigned num_textures; unsigned num_vertex_buffers; struct pipe_scissor_state scissor; diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h index 544d36306c..65cd71c939 100644 --- a/src/gallium/drivers/i965/brw_defines.h +++ b/src/gallium/drivers/i965/brw_defines.h @@ -841,4 +841,9 @@ #define URB_SIZES(brw) (BRW_IS_IGDNG(brw) ? 1024 : \ (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ + +#define BRW_TILING_NONE 0 +#define BRW_TILING_Y 1 +#define BRW_TILING_X 2 + #endif diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index efa27db1e0..844c6355d5 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -32,6 +32,7 @@ #include "pipe/p_screen.h" #include "brw_reg.h" +#include "brw_structs.h" struct brw_winsys_screen; @@ -68,10 +69,23 @@ struct brw_texture { struct pipe_texture base; - ubyte shader_swizzle; + struct brw_winsys_buffer *bo; + struct brw_surface_state ss; + + unsigned brw_target; + unsigned pitch; + unsigned tiling; + unsigned cpp; }; +struct brw_surface +{ + struct pipe_surface base; + struct brw_surface_state ss; + struct brw_winsys_buffer *bo; +}; + /* * Cast wrappers */ @@ -87,6 +101,12 @@ brw_transfer(struct pipe_transfer *transfer) return (struct brw_transfer *)transfer; } +static INLINE struct brw_surface * +brw_surface(struct pipe_surface *surface) +{ + return (struct brw_surface *)surface; +} + static INLINE struct brw_buffer * brw_buffer(struct pipe_buffer *buffer) { diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c index e0df6cc629..01d4b2d2b1 100644 --- a/src/gallium/drivers/i965/brw_screen_surface.c +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -2,6 +2,123 @@ #include "pipe/p_screen.h" #include "brw_screen.h" + +/** + * Sets up a surface state structure to point at the given region. + * While it is only used for the front/back buffer currently, it should be + * usable for further buffers when doing ARB_draw_buffer support. + */ +static void +brw_update_renderbuffer_surface(struct brw_context *brw, + struct gl_renderbuffer *rb, + unsigned int unit) +{ + struct brw_winsys_buffer *region_bo = NULL; + struct intel_renderbuffer *irb = intel_renderbuffer(rb); + struct intel_region *region = irb ? irb->region : NULL; + struct { + unsigned int surface_type; + unsigned int surface_format; + unsigned int width, height, pitch, cpp; + GLubyte color_mask[4]; + GLboolean color_blend; + uint32_t tiling; + uint32_t draw_offset; + } key; + + memset(&key, 0, sizeof(key)); + + if (region != NULL) { + region_bo = region->buffer; + + key.surface_type = BRW_SURFACE_2D; + switch (irb->texformat->MesaFormat) { + case PIPE_FORMAT_ARGB8888: + key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + break; + case PIPE_FORMAT_RGB565: + key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + break; + case PIPE_FORMAT_ARGB1555: + key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + break; + case PIPE_FORMAT_ARGB4444: + key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + break; + default: + debug_printf("Bad renderbuffer format: %d\n", + irb->texformat->MesaFormat); + assert(0); + key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + return; + } + key.tiling = region->tiling; + if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) { + key.width = rb->Width; + key.height = rb->Height; + } else { + key.width = region->width; + key.height = region->height; + } + key.pitch = region->pitch; + key.cpp = region->cpp; + key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */ + } + + memcpy(key.color_mask, ctx->Color.ColorMask, + sizeof(key.color_mask)); + + key.color_blend = (!ctx->Color._LogicOpEnabled && + ctx->Color.BlendEnabled); + + brw->sws->bo_unreference(brw->wm.surf_bo[unit]); + brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + ®ion_bo, 1, + NULL); + + if (brw->wm.surf_bo[unit] == NULL) { + struct brw_surface_state surf; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.surface_format = key.surface_format; + surf.ss0.surface_type = key.surface_type; + if (key.tiling == I915_TILING_NONE) { + surf.ss1.base_addr = key.draw_offset; + } else { + uint32_t tile_offset = key.draw_offset % 4096; + + surf.ss1.base_addr = key.draw_offset - tile_offset; + + assert(BRW_IS_G4X(brw) || tile_offset == 0); + if (BRW_IS_G4X(brw)) { + if (key.tiling == I915_TILING_X) { + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 512 / 2; + } else { + surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4; + surf.ss5.y_offset = tile_offset / 128 / 2; + } + } + } + + if (region_bo != NULL) + surf.ss1.base_addr += region_bo->offset; /* reloc */ + + surf.ss2.width = key.width - 1; + surf.ss2.height = key.height - 1; + brw_set_surface_tiling(&surf, key.tiling); + surf.ss3.pitch = (key.pitch * key.cpp) - 1; + +} + + + struct brw_surface_id { unsigned face:3; unsigned zslice:13; diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 50c30878c6..3d069add6f 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -36,6 +36,166 @@ #define FILE_DEBUG_FLAG DEBUG_MIPTREE + + +static GLuint translate_tex_target( unsigned target ) +{ + switch (target) { + case PIPE_TEXTURE_1D: + return BRW_SURFACE_1D; + + case PIPE_TEXTURE_2D: + return BRW_SURFACE_2D; + + case PIPE_TEXTURE_3D: + return BRW_SURFACE_3D; + + case PIPE_TEXTURE_CUBE: + return BRW_SURFACE_CUBE; + + default: + assert(0); + return BRW_SURFACE_1D; + } +} + + +static GLuint translate_tex_format( enum pipe_format pf ) +{ + switch( pf ) { + case PIPE_FORMAT_L8_UNORM: + return BRW_SURFACEFORMAT_L8_UNORM; + + case PIPE_FORMAT_I8_UNORM: + return BRW_SURFACEFORMAT_I8_UNORM; + + case PIPE_FORMAT_A8_UNORM: + return BRW_SURFACEFORMAT_A8_UNORM; + + case PIPE_FORMAT_A8L8_UNORM: + return BRW_SURFACEFORMAT_L8A8_UNORM; + + case PIPE_FORMAT_A8R8G8B8_UNORM: /* XXX */ + case PIPE_FORMAT_B8G8R8A8_UNORM: /* XXX */ + case PIPE_FORMAT_R8G8B8A8_UNORM: /* XXX */ + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + + case PIPE_FORMAT_R8G8B8X8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; + + case PIPE_FORMAT_R5G6B5_UNORM: + return BRW_SURFACEFORMAT_B5G6R5_UNORM; + + case PIPE_FORMAT_A1R5G5B5_UNORM: + return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + + case PIPE_FORMAT_A4R4G4B4_UNORM: + return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + + + case PIPE_FORMAT_L16_UNORM: + return BRW_SURFACEFORMAT_L16_UNORM; + + /* XXX: Z texturing: + case PIPE_FORMAT_I16_UNORM: + return BRW_SURFACEFORMAT_I16_UNORM; + */ + + /* XXX: Z texturing: + case PIPE_FORMAT_A16_UNORM: + return BRW_SURFACEFORMAT_A16_UNORM; + */ + + case PIPE_FORMAT_YCBCR_REV: + return BRW_SURFACEFORMAT_YCRCB_NORMAL; + + case PIPE_FORMAT_YCBCR: + return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; + + /* XXX: Add FXT to gallium? + case PIPE_FORMAT_FXT1_RGBA: + return BRW_SURFACEFORMAT_FXT1; + */ + + case PIPE_FORMAT_DXT1_RGB: + return BRW_SURFACEFORMAT_DXT1_RGB; + + case PIPE_FORMAT_DXT1_RGBA: + return BRW_SURFACEFORMAT_BC1_UNORM; + + case PIPE_FORMAT_DXT3_RGBA: + return BRW_SURFACEFORMAT_BC2_UNORM; + + case PIPE_FORMAT_DXT5_RGBA: + return BRW_SURFACEFORMAT_BC3_UNORM; + + case PIPE_FORMAT_R8G8B8A8_SRGB: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; + + case PIPE_FORMAT_A8L8_SRGB: + return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; + + case PIPE_FORMAT_L8_SRGB: + return BRW_SURFACEFORMAT_L8_UNORM_SRGB; + + case PIPE_FORMAT_DXT1_SRGB: + return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; + + /* XXX: which pipe depth formats does i965 suppport + */ + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + return BRW_SURFACEFORMAT_I24X8_UNORM; + +#if 0 + /* XXX: these different surface formats don't seem to + * make any difference for shadow sampler/compares. + */ + if (depth_mode == GL_INTENSITY) + return BRW_SURFACEFORMAT_I24X8_UNORM; + else if (depth_mode == GL_ALPHA) + return BRW_SURFACEFORMAT_A24X8_UNORM; + else + return BRW_SURFACEFORMAT_L24X8_UNORM; +#endif + + /* XXX: presumably for bump mapping. Add this to mesa state + * tracker? + */ + case PIPE_FORMAT_R8G8_SNORM: + return BRW_SURFACEFORMAT_R8G8_SNORM; + + case PIPE_FORMAT_R8G8B8A8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + default: + assert(0); + return 0; + } +} + +static void +brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) +{ + switch (tiling) { + case BRW_TILING_NONE: + surf->ss3.tiled_surface = 0; + surf->ss3.tile_walk = 0; + break; + case BRW_TILING_X: + surf->ss3.tiled_surface = 1; + surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR; + break; + case BRW_TILING_Y: + surf->ss3.tiled_surface = 1; + surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR; + break; + } +} + + GLboolean brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt, uint32_t tiling) @@ -216,3 +376,58 @@ GLboolean brw_miptree_layout(struct brw_context *brw, return GL_TRUE; } + +static void brw_create_texture( struct pipe_screen *screen, + const pipe_texture *templ ) + +{ + + key.format = tex->base.format; + key.pitch = tex->pitch; + key.depth = tex->base.depth[0]; + key.bo = tex->buffer; + key.offset = 0; + + key.target = tex->brw_target; /* translated to BRW enum */ + //key.depthmode = 0; /* XXX: add this to gallium? or the state tracker? */ + key.last_level = tex->base.last_level; + key.width = tex->base.depth[0]; + key.height = tex->base.height[0]; + key.cpp = tex->cpp; + key.tiling = tex->tiling; + + + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = translate_tex_target(key->target); + surf.ss0.surface_format = translate_tex_format(key->format /* , key->depthmode */ ); + + /* This is ok for all textures with channel width 8bit or less: + */ +/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + assert(key->bo); + surf.ss1.base_addr = key->bo->offset; /* reloc */ + surf.ss2.mip_count = key->last_level; + surf.ss2.width = key->width - 1; + surf.ss2.height = key->height - 1; + brw_set_surface_tiling(&surf, key->tiling); + surf.ss3.pitch = (key->pitch * key->cpp) - 1; + surf.ss3.depth = key->depth - 1; + + surf.ss4.min_lod = 0; + + if (key->target == PIPE_TEXTURE_CUBE) { + surf.ss0.cube_pos_x = 1; + surf.ss0.cube_pos_y = 1; + surf.ss0.cube_pos_z = 1; + surf.ss0.cube_neg_x = 1; + surf.ss0.cube_neg_y = 1; + surf.ss0.cube_neg_z = 1; + } + +} + + + + + diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index 2275e9ad69..b47b04fd46 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -94,19 +94,6 @@ const struct brw_tracked_state brw_indices; const struct brw_tracked_state brw_vertices; const struct brw_tracked_state brw_index_buffer; -/** - * Use same key for WM and VS surfaces. - */ -struct brw_surface_key { - unsigned target; - struct brw_winsys_buffer *bo; - GLint format; - GLint first_level, last_level; - GLint width, height, depth; - GLint pitch, cpp; - uint32_t tiling; - GLuint offset; -}; /*********************************************************************** * brw_state.c @@ -171,9 +158,6 @@ void brw_clear_batch_cache( struct brw_context *brw ); /*********************************************************************** * brw_wm_surface_state.c */ -struct brw_winsys_buffer * -brw_create_constant_surface( struct brw_context *brw, - struct brw_surface_key *key ); /*********************************************************************** * brw_state_debug.c diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c index 4310d01ba2..9cf44f7a5c 100644 --- a/src/gallium/drivers/i965/brw_state_cache.c +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -179,7 +179,8 @@ brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs, + struct brw_winsys_buffer **reloc_bufs, + GLuint nr_reloc_bufs, void *aux_return) { struct brw_cache_item *item; diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h index f5d6a2599b..bf10bc04de 100644 --- a/src/gallium/drivers/i965/brw_structs.h +++ b/src/gallium/drivers/i965/brw_structs.h @@ -1048,7 +1048,7 @@ struct brw_sf_viewport */ struct brw_surface_state { - struct { + struct brw_surf_ss0 { GLuint cube_pos_z:1; GLuint cube_neg_z:1; GLuint cube_pos_y:1; @@ -1070,18 +1070,18 @@ struct brw_surface_state GLuint surface_type:3; /**< BRW_SURFACE_1D/2D/3D/CUBE */ } ss0; - struct { + struct brw_surf_ss1 { GLuint base_addr; } ss1; - struct { + struct brw_surf_ss2 { GLuint pad:2; GLuint mip_count:4; GLuint width:13; GLuint height:13; } ss2; - struct { + struct brw_surf_ss3 { GLuint tile_walk:1; GLuint tiled_surface:1; GLuint pad:1; @@ -1089,7 +1089,7 @@ struct brw_surface_state GLuint depth:11; } ss3; - struct { + struct brw_surf_ss4 { GLuint multisample_position_palette_index:3; GLuint pad1:1; GLuint num_multisamples:3; @@ -1099,7 +1099,7 @@ struct brw_surface_state GLuint min_lod:4; } ss4; - struct { + struct brw_surf_ss5 { GLuint pad1:16; GLuint llc_mapping:1; GLuint mlc_mapping:1; diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index 7157feb6f3..88485c76cb 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -29,448 +29,49 @@ * Keith Whitwell */ +#include "pipe/p_format.h" #include "brw_batchbuffer.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_screen.h" -static GLuint translate_tex_target( GLenum target ) -{ - switch (target) { - case GL_TEXTURE_1D: - return BRW_SURFACE_1D; - - case GL_TEXTURE_RECTANGLE_NV: - return BRW_SURFACE_2D; - - case GL_TEXTURE_2D: - return BRW_SURFACE_2D; - - case GL_TEXTURE_3D: - return BRW_SURFACE_3D; - - case GL_TEXTURE_CUBE_MAP: - return BRW_SURFACE_CUBE; - - default: - assert(0); - return 0; - } -} - - -static GLuint translate_tex_format( GLuint mesa_format, - GLenum depth_mode ) -{ - switch( pipe_format ) { - case PIPE_FORMAT_L8_UNORM: - return BRW_SURFACEFORMAT_L8_UNORM; - - case PIPE_FORMAT_I8_UNORM: - return BRW_SURFACEFORMAT_I8_UNORM; - - case PIPE_FORMAT_A8_UNORM: - return BRW_SURFACEFORMAT_A8_UNORM; - - case PIPE_FORMAT_A8L8_UNORM: - return BRW_SURFACEFORMAT_L8A8_UNORM; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_B8G8R8A8_UNORM: - case PIPE_FORMAT_R8G8B8A8_UNORM: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - - case PIPE_FORMAT_R8G8B8X8_UNORM: - return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; - - case PIPE_FORMAT_: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - case PIPE_FORMAT_RGB565: - return BRW_SURFACEFORMAT_B5G6R5_UNORM; - - case PIPE_FORMAT_ARGB1555: - return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; - - case PIPE_FORMAT_ARGB4444: - return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; - - - case PIPE_FORMAT_L16_UNORM: - return BRW_SURFACEFORMAT_L16_UNORM; - - case PIPE_FORMAT_I16_UNORM: - return BRW_SURFACEFORMAT_I16_UNORM; - - case PIPE_FORMAT_A16_UNORM: - return BRW_SURFACEFORMAT_A16_UNORM; - - case PIPE_FORMAT_YCBCR_REV: - return BRW_SURFACEFORMAT_YCRCB_NORMAL; - - case PIPE_FORMAT_YCBCR: - return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; - - case PIPE_FORMAT_RGB_FXT1: - case PIPE_FORMAT_RGBA_FXT1: - return BRW_SURFACEFORMAT_FXT1; - - case PIPE_FORMAT_RGB_DXT1: - return BRW_SURFACEFORMAT_DXT1_RGB; - - case PIPE_FORMAT_RGBA_DXT1: - return BRW_SURFACEFORMAT_BC1_UNORM; - - case PIPE_FORMAT_RGBA_DXT3: - return BRW_SURFACEFORMAT_BC2_UNORM; - - case PIPE_FORMAT_RGBA_DXT5: - return BRW_SURFACEFORMAT_BC3_UNORM; - - case PIPE_FORMAT_R8G8B8A8_SRGB: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; - - case PIPE_FORMAT_A8L8_SRGB: - return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; - - case PIPE_FORMAT_L8_SRGB: - return BRW_SURFACEFORMAT_L8_UNORM_SRGB; - - case PIPE_FORMAT_SRGB_DXT1: - return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; - - case PIPE_FORMAT_S8_Z24: - /* XXX: these different surface formats don't seem to - * make any difference for shadow sampler/compares. - */ - if (depth_mode == GL_INTENSITY) - return BRW_SURFACEFORMAT_I24X8_UNORM; - else if (depth_mode == GL_ALPHA) - return BRW_SURFACEFORMAT_A24X8_UNORM; - else - return BRW_SURFACEFORMAT_L24X8_UNORM; - - case PIPE_FORMAT_DUDV8: - return BRW_SURFACEFORMAT_R8G8_SNORM; - - case PIPE_FORMAT_SIGNED_RGBA8888_REV: - return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; - - default: - assert(0); - return 0; - } -} - -static void -brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) -{ - switch (tiling) { - case I915_TILING_NONE: - surf->ss3.tiled_surface = 0; - surf->ss3.tile_walk = 0; - break; - case I915_TILING_X: - surf->ss3.tiled_surface = 1; - surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR; - break; - case I915_TILING_Y: - surf->ss3.tiled_surface = 1; - surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR; - break; - } -} - -static struct brw_winsys_buffer * -brw_create_texture_surface( struct brw_context *brw, - struct brw_surface_key *key ) -{ - struct brw_surface_state surf; - struct brw_winsys_buffer *bo; - - memset(&surf, 0, sizeof(surf)); - - surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf.ss0.surface_type = translate_tex_target(key->target); - if (key->bo) { - surf.ss0.surface_format = translate_tex_format(key->format, - key->internal_format, - key->depthmode); - } - else { - switch (key->depth) { - case 32: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - break; - default: - case 24: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM; - break; - case 16: - surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - break; - } - } - - /* This is ok for all textures with channel width 8bit or less: - */ -/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - if (key->bo) - surf.ss1.base_addr = key->bo->offset; /* reloc */ - else - surf.ss1.base_addr = key->offset; - - surf.ss2.mip_count = key->last_level; - surf.ss2.width = key->width - 1; - surf.ss2.height = key->height - 1; - brw_set_surface_tiling(&surf, key->tiling); - surf.ss3.pitch = (key->pitch * key->cpp) - 1; - surf.ss3.depth = key->depth - 1; - - surf.ss4.min_lod = 0; - - if (key->target == GL_TEXTURE_CUBE_MAP) { - surf.ss0.cube_pos_x = 1; - surf.ss0.cube_pos_y = 1; - surf.ss0.cube_pos_z = 1; - surf.ss0.cube_neg_x = 1; - surf.ss0.cube_neg_y = 1; - surf.ss0.cube_neg_z = 1; - } - - bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, - key, sizeof(*key), - &key->bo, key->bo ? 1 : 0, - &surf, sizeof(surf), - NULL, NULL); - - if (key->bo) { - /* Emit relocation to surface contents */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); - } - return bo; -} static void -brw_update_texture_surface( struct brw_context *brw, GLuint unit ) +brw_update_texture_surface( struct brw_context *brw, + struct brw_texture *tex, + GLuint surf ) { - struct pipe_texture *tex = brw->texture[unit]; - struct brw_surface_key key; - const GLuint surf = SURF_INDEX_TEXTURE(unit); - - memset(&key, 0, sizeof(key)); - - key.format = tex->base.format; - key.pitch = tex->pitch; - key.depth = tex->base.depth[0]; - key.bo = tex->buffer; - key.offset = 0; - - key.target = tObj->target; /* translated to BRW enum */ - /* key.depthmode = tObj->DepthMode; */ /* XXX: add this to gallium? or the state tracker? */ - key.first_level = 0; - key.last_level = tex->base.last_level; - key.width = tex->base.depth[0]; - key.height = tex->base.height[0]; - key.cpp = tex->cpp; - key.tiling = tex->tiling; - - brw->sws->bo_unreference(brw->wm.surf_bo[surf]); brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, + &tex->ss, sizeof tex->ss, + &tex->bo, 1, NULL); - if (brw->wm.surf_bo[surf] == NULL) { - brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key); - } -} - - - -/** - * Create the constant buffer surface. Vertex/fragment shader constants will be - * read from this buffer with Data Port Read instructions/messages. - */ -struct brw_winsys_buffer * -brw_create_constant_surface( struct brw_context *brw, - struct brw_surface_key *key ) -{ - const GLint w = key->width - 1; - struct brw_surface_state surf; - struct brw_winsys_buffer *bo; - - memset(&surf, 0, sizeof(surf)); - - surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf.ss0.surface_type = BRW_SURFACE_BUFFER; - surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; - - assert(key->bo); - if (key->bo) - surf.ss1.base_addr = key->bo->offset; /* reloc */ - else - surf.ss1.base_addr = key->offset; - - surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ - surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ - surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ - surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ - brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ - - bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, - key, sizeof(*key), - &key->bo, key->bo ? 1 : 0, - &surf, sizeof(surf), - NULL, NULL); - if (key->bo) { + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, + &tex->ss, sizeof tex->ss, + &tex->bo, 1, + &tex->ss, sizeof tex->ss, + NULL, NULL); + /* Emit relocation to surface contents */ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); + brw->sws->bo_emit_reloc(brw->wm.surf_bo[surf], + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + offsetof(struct brw_surface_state, ss1), + tex->bo); } - - return bo; } -/* Creates a new WM constant buffer reflecting the current fragment program's - * constants, if needed by the fragment program. - * - * Otherwise, constants go through the CURBEs using the brw_constant_buffer - * state atom. - */ -static drm_intel_bo * -brw_wm_update_constant_buffer(struct brw_context *brw) -{ - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - const struct gl_program_parameter_list *params = fp->program.Base.Parameters; - const int size = params->NumParameters * 4 * sizeof(GLfloat); - drm_intel_bo *const_buffer; - /* BRW_NEW_FRAGMENT_PROGRAM */ - if (!fp->use_const_buffer) - return NULL; - const_buffer = drm_intel_bo_alloc(intel->bufmgr, - BRW_BUFFER_TYPE_SHADER_CONSTANTS, - size, 64); - /* _NEW_PROGRAM_CONSTANTS */ - dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); - return const_buffer; -} -/** - * Update the surface state for a WM constant buffer. - * The constant buffer will be (re)allocated here if needed. - */ -static void -brw_update_wm_constant_surface( struct brw_context *brw, - GLuint surf) -{ - struct brw_context *brw = brw_context(ctx); - struct brw_surface_key key; - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - const struct gl_program_parameter_list *params = - fp->program.Base.Parameters; - - /* If we're in this state update atom, we need to update WM constants, so - * free the old buffer and create a new one for the new contents. - */ - brw->sws->bo_unreference(fp->const_buffer); - fp->const_buffer = brw_wm_update_constant_buffer(brw); - - /* If there's no constant buffer, then no surface BO is needed to point at - * it. - */ - if (fp->const_buffer == 0) { - drm_intel_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = NULL; - return; - } - - memset(&key, 0, sizeof(key)); - - key.format = PIPE_FORMAT_RGBA_FLOAT32; - key.internal_format = GL_RGBA; - key.bo = fp->const_buffer; - key.depthmode = GL_NONE; - key.pitch = params->NumParameters; - key.width = params->NumParameters; - key.height = 1; - key.depth = 1; - key.cpp = 16; - - /* - printf("%s:\n", __FUNCTION__); - printf(" width %d height %d depth %d cpp %d pitch %d\n", - key.width, key.height, key.depth, key.cpp, key.pitch); - */ - - brw->sws->bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->wm.surf_bo[surf] == NULL) { - brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); - } - brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; -} - -/** - * Updates surface / buffer for fragment shader constant buffer, if - * one is required. - * - * This consumes the state updates for the constant buffer, and produces - * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for - * inclusion in the binding table. - */ -static void prepare_wm_constant_surface(struct brw_context *brw ) -{ - struct brw_fragment_program *fp = - (struct brw_fragment_program *) brw->fragment_program; - GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; - - drm_intel_bo_unreference(fp->const_buffer); - fp->const_buffer = brw_wm_update_constant_buffer(brw); - - /* If there's no constant buffer, then no surface BO is needed to point at - * it. - */ - if (fp->const_buffer == 0) { - if (brw->wm.surf_bo[surf] != NULL) { - drm_intel_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = NULL; - brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; - } - return; - } - - brw_update_wm_constant_surface(ctx, surf); -} - -const struct brw_tracked_state brw_wm_constant_surface = { - .dirty = { - .mesa = (_NEW_PROGRAM_CONSTANTS), - .brw = (BRW_NEW_FRAGMENT_PROGRAM), - .cache = 0 - }, - .prepare = prepare_wm_constant_surface, -}; /** @@ -480,142 +81,46 @@ const struct brw_tracked_state brw_wm_constant_surface = { */ static void brw_update_renderbuffer_surface(struct brw_context *brw, - struct gl_renderbuffer *rb, + struct brw_surface *surface, unsigned int unit) { - struct brw_winsys_buffer *region_bo = NULL; - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - struct intel_region *region = irb ? irb->region : NULL; - struct { - unsigned int surface_type; - unsigned int surface_format; - unsigned int width, height, pitch, cpp; - GLubyte color_mask[4]; - GLboolean color_blend; - uint32_t tiling; - uint32_t draw_offset; - } key; - - memset(&key, 0, sizeof(key)); - - if (region != NULL) { - region_bo = region->buffer; - - key.surface_type = BRW_SURFACE_2D; - switch (irb->texformat->MesaFormat) { - case PIPE_FORMAT_ARGB8888: - key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - break; - case PIPE_FORMAT_RGB565: - key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - break; - case PIPE_FORMAT_ARGB1555: - key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; - break; - case PIPE_FORMAT_ARGB4444: - key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; - break; - default: - debug_printf("Bad renderbuffer format: %d\n", - irb->texformat->MesaFormat); - assert(0); - key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - return; - } - key.tiling = region->tiling; - if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) { - key.width = rb->Width; - key.height = rb->Height; - } else { - key.width = region->width; - key.height = region->height; - } - key.pitch = region->pitch; - key.cpp = region->cpp; - key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */ - } else { - key.surface_type = BRW_SURFACE_NULL; - key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - key.tiling = I915_TILING_X; - key.width = 1; - key.height = 1; - key.cpp = 4; - key.draw_offset = 0; - } - memcpy(key.color_mask, ctx->Color.ColorMask, - sizeof(key.color_mask)); - key.color_blend = (!ctx->Color._LogicOpEnabled && - ctx->Color.BlendEnabled); + struct brw_surf_ss0 blend_ss0 = brw->curr.blend->ss0; + struct brw_surface_state ss; + + /* Surfaces are potentially shared between contexts, so can't + * scribble the in-place ss0 value in the surface. + */ + memcpy(&ss, &surface->ss, sizeof ss); + + ss.ss0.color_blend = blend_ss0.color_blend; + ss.ss0.writedisable_blue = blend_ss0.writedisable_blue; + ss.ss0.writedisable_green = blend_ss0.writedisable_green; + ss.ss0.writedisable_red = blend_ss0.writedisable_red; + ss.ss0.writedisable_alpha = blend_ss0.writedisable_alpha; brw->sws->bo_unreference(brw->wm.surf_bo[unit]); brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, + &ss, sizeof(ss), + &surface->bo, 1, NULL); if (brw->wm.surf_bo[unit] == NULL) { - struct brw_surface_state surf; - - memset(&surf, 0, sizeof(surf)); - - surf.ss0.surface_format = key.surface_format; - surf.ss0.surface_type = key.surface_type; - if (key.tiling == I915_TILING_NONE) { - surf.ss1.base_addr = key.draw_offset; - } else { - uint32_t tile_offset = key.draw_offset % 4096; - - surf.ss1.base_addr = key.draw_offset - tile_offset; - - assert(BRW_IS_G4X(brw) || tile_offset == 0); - if (BRW_IS_G4X(brw)) { - if (key.tiling == I915_TILING_X) { - /* Note that the low bits of these fields are missing, so - * there's the possibility of getting in trouble. - */ - surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4; - surf.ss5.y_offset = tile_offset / 512 / 2; - } else { - surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4; - surf.ss5.y_offset = tile_offset / 128 / 2; - } - } - } - if (region_bo != NULL) - surf.ss1.base_addr += region_bo->offset; /* reloc */ - - surf.ss2.width = key.width - 1; - surf.ss2.height = key.height - 1; - brw_set_surface_tiling(&surf, key.tiling); - surf.ss3.pitch = (key.pitch * key.cpp) - 1; - - /* _NEW_COLOR */ - surf.ss0.color_blend = key.color_blend; - surf.ss0.writedisable_red = !key.color_mask[0]; - surf.ss0.writedisable_green = !key.color_mask[1]; - surf.ss0.writedisable_blue = !key.color_mask[2]; - surf.ss0.writedisable_alpha = !key.color_mask[3]; - - /* Key size will never match key size for textures, so we're safe. */ + brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - &surf, sizeof(surf), + &ss, sizeof ss, + &surface->bo, 1, + &ss, sizeof ss, NULL, NULL); - if (region_bo != NULL) { - /* We might sample from it, and we might render to it, so flag - * them both. We might be able to figure out from other state - * a more restrictive relocation to emit. - */ - drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], - offsetof(struct brw_surface_state, ss1), - region_bo, - surf.ss1.base_addr - region_bo->offset, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER); - } + + /* XXX: we will only be rendering to this surface: + */ + brw->sws->bo_emit_reloc(brw->wm.surf_bo[unit], + I915_GEM_DOMAIN_RENDER, 0, + ss.ss1.base_addr - surface->bo->offset, /* XXX */ + offsetof(struct brw_surface_state, ss1), + surface->bo); } } @@ -631,21 +136,21 @@ brw_wm_get_binding_table(struct brw_context *brw) assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); + /* Note there is no key for this search beyond the values in the + * relocation array: + */ bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, NULL); if (bind_bo == NULL) { - GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint); uint32_t data[BRW_WM_MAX_SURF]; + GLuint data_size = brw->wm.nr_surfaces * sizeof data[0]; int i; for (i = 0; i < brw->wm.nr_surfaces; i++) - if (brw->wm.surf_bo[i]) - data[i] = brw->wm.surf_bo[i]->offset; - else - data[i] = 0; + data[i] = brw->wm.surf_bo[i]->offset; bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, @@ -654,70 +159,79 @@ brw_wm_get_binding_table(struct brw_context *brw) NULL, NULL); /* Emit binding table relocations to surface state */ - for (i = 0; i < BRW_WM_MAX_SURF; i++) { - if (brw->wm.surf_bo[i] != NULL) { - dri_bo_emit_reloc(bind_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0, - i * sizeof(GLuint), - brw->wm.surf_bo[i]); - } + for (i = 0; i < brw->wm.nr_surfaces; i++) { + brw->sws->bo_emit_reloc(bind_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + i * sizeof(GLuint), + brw->wm.surf_bo[i]); } } return bind_bo; } -static void prepare_wm_surfaces(struct brw_context *brw ) +static int prepare_wm_surfaces(struct brw_context *brw ) { GLuint i; - int old_nr_surfaces; - - /* _NEW_BUFFERS */ - /* Update surfaces for drawing buffers */ - if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - brw_update_renderbuffer_surface(brw, - ctx->DrawBuffer->_ColorDrawBuffers[i], - i); - } - } else { - brw_update_renderbuffer_surface(brw, NULL, 0); + int nr_surfaces = 0; + + /* Unreference old buffers + */ + for (i = 0; i < brw->wm.nr_surfaces; i++) { + brw->sws->bo_unreference(brw->wm.surf_bo[i]); + brw->wm.surf_bo[i] = NULL; } - old_nr_surfaces = brw->wm.nr_surfaces; - brw->wm.nr_surfaces = PIPE_MAX_COLOR_BUFS; - if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL) - brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1; + /* PIPE_NEW_COLOR_BUFFERS | PIPE_NEW_BLEND + * + * Update surfaces for drawing buffers. Mixes in colormask and + * blend state. + * + * XXX: no color buffer case + */ + for (i = 0; i < brw->curr.fb.nr_cbufs; i++) { + brw_update_renderbuffer_surface(brw, + brw_surface(brw->curr.fb.cbufs[i]), + nr_surfaces++); + } - /* Update surfaces for textures */ - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { - const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; - const GLuint surf = SURF_INDEX_TEXTURE(i); + /* PIPE_NEW_TEXTURE + */ + for (i = 0; i < brw->curr.num_textures; i++) { + brw_update_texture_surface(brw, + brw->curr.texture[i], + nr_surfaces++); + } - /* _NEW_TEXTURE, BRW_NEW_TEXDATA */ - if (texUnit->_ReallyEnabled) { - brw_update_texture_surface(ctx, i); - brw->wm.nr_surfaces = surf + 1; - } else { - brw->sws->bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = NULL; - } + /* PIPE_NEW_FRAGMENT_CONSTANTS + */ +#if 0 + if (brw->curr.fragment_constants) { + brw_update_fragment_constant_surface(brw, + brw->curr.fragment_constants, + nr_surfaces++); } +#endif brw->sws->bo_unreference(brw->wm.bind_bo); brw->wm.bind_bo = brw_wm_get_binding_table(brw); - if (brw->wm.nr_surfaces != old_nr_surfaces) + if (brw->wm.nr_surfaces != nr_surfaces) { + brw->wm.nr_surfaces = nr_surfaces; brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; + } + + return 0; } const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = (_NEW_COLOR | - _NEW_TEXTURE | - _NEW_BUFFERS), + .mesa = (PIPE_NEW_COLOR_BUFFERS | + PIPE_NEW_BOUND_TEXTURES | + PIPE_NEW_FRAGMENT_CONSTANTS | + PIPE_NEW_BLEND), .brw = (BRW_NEW_CONTEXT | BRW_NEW_WM_SURFACES), .cache = 0 -- cgit v1.2.3 From 39448a9aa061291f4253ee2a1a42e2488e14233c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 1 Nov 2009 13:11:56 +0000 Subject: i965g: more files compiling --- src/gallium/drivers/i965/Makefile | 5 +- src/gallium/drivers/i965/brw_batchbuffer.c | 56 ++++--- src/gallium/drivers/i965/brw_bo.c | 12 -- src/gallium/drivers/i965/brw_context.h | 1 + src/gallium/drivers/i965/brw_pipe_blend.c | 12 ++ src/gallium/drivers/i965/brw_pipe_flush.c | 25 ++-- src/gallium/drivers/i965/brw_pipe_shader.c | 226 ++++++++++++++--------------- 7 files changed, 176 insertions(+), 161 deletions(-) delete mode 100644 src/gallium/drivers/i965/brw_bo.c (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 896cb234a6..ae37d2d702 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -29,6 +29,8 @@ C_SOURCES = \ brw_pipe_depth.c \ brw_pipe_fb.c \ brw_pipe_query.c \ + brw_pipe_shader.c \ + brw_pipe_flush.c \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ @@ -56,10 +58,7 @@ C_SOURCES = \ brw_wm_surface_state.c \ brw_screen_surface.c \ brw_screen_texture.c \ - brw_bo.c \ brw_batchbuffer.c \ - brw_pipe_shader.c \ - brw_pipe_flush.c \ intel_tex_layout.c include ../../Makefile.template diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index 45fbd59273..1cffc0ab39 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -109,12 +109,13 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file, debug_printf("%s:%d: Batchbuffer flush with %db used\n", file, line, used); - /* Emit a flush if the bufmgr doesn't do it for us. */ - if (intel->always_flush_cache || !intel->ttm) { +#if 0 + if (intel->always_flush_cache || 1) { *(GLuint *) (batch->ptr) = ((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); batch->ptr += 4; used = batch->ptr - batch->map; } +#endif /* Round batchbuffer usage to 2 DWORDs. */ @@ -137,16 +138,25 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file, batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 ); if (BRW_DEBUG & DEBUG_BATCH) { - dri_bo_map(batch->buf, GL_FALSE); - intel_decode(batch->buf->virtual, used / 4, batch->buf->offset, - brw->brw_screen->pci_id); - dri_bo_unmap(batch->buf); + void *ptr = batch->sws->bo_map(batch->buf, GL_FALSE); + + intel_decode(ptr, + used / 4, + batch->buf->offset, + batch->chipset); + + batch->sws->bo_unmap(batch->buf); } if (BRW_DEBUG & DEBUG_SYNC) { + /* Abuse map/unmap to achieve wait-for-fence. + * + * XXX: hide this inside the winsys and export a fence + * interface. + */ debug_printf("waiting for idle\n"); - dri_bo_map(batch->buf, GL_TRUE); - dri_bo_unmap(batch->buf); + batch->sws->bo_map(batch->buf, GL_TRUE); + batch->sws->bo_unmap(batch->buf); } /* Reset the buffer: @@ -155,9 +165,10 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file, } -/* This is the only way buffers get added to the validate list. +/* The OUT_RELOC() macro ends up here, generating a relocation within + * the batch buffer. */ -GLboolean +enum pipe_error brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, struct brw_winsys_buffer *buffer, uint32_t read_domains, uint32_t write_domain, @@ -165,9 +176,12 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, { int ret; - if (batch->ptr - batch->map > batch->buf->size) - debug_printf ("bad relocation ptr %p map %p offset %d size %d\n", - batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); + if (batch->ptr - batch->map > batch->buf->size) { + debug_printf("bad relocation ptr %p map %p offset %d size %d\n", + batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); + + return PIPE_ERROR_OUT_OF_MEMORY; + } ret = batch->sws->bo_emit_reloc(batch->buf, read_domains, @@ -175,6 +189,8 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, delta, batch->ptr - batch->map, buffer); + if (ret != 0) + return ret; /* * Using the old buffer offset, write in what the right data would be, in case @@ -182,17 +198,23 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, * in the kernel */ brw_batchbuffer_emit_dword (batch, buffer->offset + delta); - - return GL_TRUE; + return 0; } -void +enum pipe_error brw_batchbuffer_data(struct brw_batchbuffer *batch, const void *data, GLuint bytes, enum cliprect_mode cliprect_mode) { + enum pipe_error ret; + assert((bytes & 3) == 0); - brw_batchbuffer_require_space(batch, bytes); + + ret = brw_batchbuffer_require_space(batch, bytes); + if (ret) + return ret; + __memcpy(batch->ptr, data, bytes); batch->ptr += bytes; + return 0; } diff --git a/src/gallium/drivers/i965/brw_bo.c b/src/gallium/drivers/i965/brw_bo.c deleted file mode 100644 index e7a4dac666..0000000000 --- a/src/gallium/drivers/i965/brw_bo.c +++ /dev/null @@ -1,12 +0,0 @@ - - -void brw_buffer_subdata() -{ - if (intel->intelScreen->kernel_exec_fencing) { - drm_intel_gem_bo_map_gtt(bo); - memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); - drm_intel_gem_bo_unmap_gtt(bo); - } else { - dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr); - } -} diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 471855ab63..3e9315c41f 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -169,6 +169,7 @@ struct brw_fragment_shader { struct tgsi_shader_info info; unsigned iz_lookup; + //unsigned wm_lookup; boolean uses_depth:1; boolean has_flow_control:1; diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c index d3bb882b1a..cc9ee2e8db 100644 --- a/src/gallium/drivers/i965/brw_pipe_blend.c +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -130,6 +130,11 @@ static void *brw_create_blend_state( struct pipe_context *pipe, (blend->cc6.dest_blend_factor != blend->cc5.ia_dest_blend_factor || blend->cc6.src_blend_factor != blend->cc5.ia_src_blend_factor || blend->cc6.blend_function != blend->cc5.ia_blend_function); + + /* Per-surface blend enables, currently just follow global + * state: + */ + blend->ss0.color_blend = 1; } blend->cc5.dither_enable = templ->dither; @@ -137,6 +142,13 @@ static void *brw_create_blend_state( struct pipe_context *pipe, if (BRW_DEBUG & DEBUG_STATS) blend->cc5.statistics_enable = 1; + /* Per-surface color mask -- just follow global state: + */ + blend->ss0.writedisable_red = (templ->colormask & PIPE_MASK_R) ? 1 : 0; + blend->ss0.writedisable_green = (templ->colormask & PIPE_MASK_G) ? 1 : 0; + blend->ss0.writedisable_blue = (templ->colormask & PIPE_MASK_B) ? 1 : 0; + blend->ss0.writedisable_alpha = (templ->colormask & PIPE_MASK_A) ? 1 : 0; + return (void *)blend; } diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index fb4a784de9..1b43428760 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -1,11 +1,15 @@ +#include "util/u_upload_mgr.h" + +#include "brw_context.h" + + /** * called from brw_batchbuffer_flush and children before sending a * batchbuffer off. */ -static void brw_finish_batch(struct intel_context *intel) +static void brw_finish_batch(struct brw_context *brw) { - struct brw_context *brw = brw_context(&intel->ctx); brw_emit_query_end(brw); } @@ -15,9 +19,6 @@ static void brw_finish_batch(struct intel_context *intel) */ static void brw_new_batch( struct brw_context *brw ) { - /* Check that we didn't just wrap our batchbuffer at a bad time. */ - assert(!brw->no_batch_wrap); - brw->curbe.need_new_bo = GL_TRUE; /* Mark all context state as needing to be re-emitted. @@ -33,17 +34,9 @@ static void brw_new_batch( struct brw_context *brw ) /* Move to the end of the current upload buffer so that we'll force choosing * a new buffer next time. */ - if (brw->vb.upload.bo != NULL) { - brw->sws->bo_unreference(brw->vb.upload.bo); - brw->vb.upload.bo = NULL; - brw->vb.upload.offset = 0; - } -} - + u_upload_flush( brw->vb.upload_vertex ); + u_upload_flush( brw->vb.upload_index ); -static void brw_note_fence( struct brw_context *brw, GLuint fence ) -{ - brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE; } /* called from intelWaitForIdle() and intelFlush() @@ -52,7 +45,7 @@ static void brw_note_fence( struct brw_context *brw, GLuint fence ) */ static GLuint brw_flush_cmd( void ) { - return ((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); + return ((MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); } diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 6e37eac634..2422f77f34 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -28,151 +28,151 @@ * Authors: * Keith Whitwell */ + +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" + #include "brw_context.h" #include "brw_util.h" #include "brw_wm.h" /** - * Determine if the given fragment program uses GLSL features such - * as flow conditionals, loops, subroutines. - * Some GLSL shaders may use these features, others might not. + * Determine if the given shader uses complex features such as flow + * conditionals, loops, subroutines. */ GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp) { - return (fp->info.insn_count[TGSI_OPCODE_ARL] > 0 || - fp->info.insn_count[TGSI_OPCODE_IF] > 0 || - fp->info.insn_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */ - fp->info.insn_count[TGSI_OPCODE_CAL] > 0 || - fp->info.insn_count[TGSI_OPCODE_BRK] > 0 || /* redundant - BGNLOOP */ - fp->info.insn_count[TGSI_OPCODE_RET] > 0 || /* redundant - CAL */ - fp->info.insn_count[TGSI_OPCODE_BGNLOOP] > 0); + return (fp->info.opcode_count[TGSI_OPCODE_ARL] > 0 || + fp->info.opcode_count[TGSI_OPCODE_IF] > 0 || + fp->info.opcode_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */ + fp->info.opcode_count[TGSI_OPCODE_CAL] > 0 || + fp->info.opcode_count[TGSI_OPCODE_BRK] > 0 || /* redundant - BGNLOOP */ + fp->info.opcode_count[TGSI_OPCODE_RET] > 0 || /* redundant - CAL */ + fp->info.opcode_count[TGSI_OPCODE_BGNLOOP] > 0); } -static void brwBindProgram( struct brw_context *brw, - GLenum target, - struct gl_program *prog ) +static void brw_bind_fs_state( struct pipe_context *pipe, void *prog ) { - struct brw_context *brw = brw_context(ctx); - - switch (target) { - case GL_VERTEX_PROGRAM_ARB: - brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; - break; - case GL_FRAGMENT_PROGRAM_ARB: - brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; - break; - } + struct brw_context *brw = brw_context(pipe); + + brw->curr.fragment_shader = (struct brw_fragment_shader *)prog; + brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SHADER; } -static struct gl_program *brwNewProgram( structg brw_context *brw, - GLenum target, - GLuint id ) +static void brw_bind_vs_state( struct pipe_context *pipe, void *prog ) { - struct brw_context *brw = brw_context(ctx); - - switch (target) { - case GL_VERTEX_PROGRAM_ARB: { - struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program); - if (prog) { - prog->id = brw->program_id++; - - return _mesa_init_vertex_program( ctx, &prog->program, - target, id ); - } - else - return NULL; - } - - case GL_FRAGMENT_PROGRAM_ARB: { - struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program); - if (prog) { - prog->id = brw->program_id++; - - return _mesa_init_fragment_program( ctx, &prog->program, - target, id ); - } - else - return NULL; - } - - default: - return _mesa_new_program(ctx, target, id); - } + struct brw_context *brw = brw_context(pipe); + + brw->curr.vertex_shader = (struct brw_vertex_shader *)prog; + brw->state.dirty.mesa |= PIPE_NEW_VERTEX_SHADER; } -static void brwDeleteProgram( struct brw_context *brw, - struct gl_program *prog ) + + +static void *brw_create_fs_state( struct pipe_context *pipe, + const struct pipe_shader_state *shader ) { - if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { - struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; - struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog); - brw->sws->bo_unreference(brw_fprog->const_buffer); - } + struct brw_context *brw = brw_context(pipe); + struct brw_fragment_shader *fs; + int i; + + fs = CALLOC_STRUCT(brw_fragment_shader); + if (fs == NULL) + return NULL; + + /* Duplicate tokens, scan shader + */ + fs->id = brw->program_id++; + fs->has_flow_control = brw_wm_has_flow_control(fs); + + fs->tokens = tgsi_dup_tokens(shader->tokens); + if (fs->tokens == NULL) + goto fail; + + tgsi_scan_shader(fs->tokens, &fs->info); + + for (i = 0; i < fs->info.num_inputs; i++) + if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION) + fs->uses_depth = 1; + + if (fs->info.uses_kill) + fs->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fs->info.writes_z) + fs->iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + return (void *)fs; - _mesa_delete_program( ctx, prog ); +fail: + FREE(fs); + return NULL; } -static GLboolean brwIsProgramNative( struct brw_context *brw, - GLenum target, - struct gl_program *prog ) +static void *brw_create_vs_state( struct pipe_context *pipe, + const struct pipe_shader_state *shader ) { - return GL_TRUE; + struct brw_context *brw = brw_context(pipe); + + struct brw_vertex_shader *vs = CALLOC_STRUCT(brw_vertex_shader); + if (vs == NULL) + return NULL; + + /* Duplicate tokens, scan shader + */ + vs->id = brw->program_id++; + //vs->has_flow_control = brw_wm_has_flow_control(vs); + + /* Tell the draw module about this shader: + */ + + /* Done: + */ + return (void *)vs; } -static void brwProgramStringNotify( struct brw_context *brw, - GLenum target, - struct gl_program *prog ) + +static void brw_delete_fs_state( struct pipe_context *pipe, void *prog ) { - struct brw_context *brw = brw_context(ctx); - - if (target == GL_FRAGMENT_PROGRAM_ARB) { - struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; - struct brw_fragment_program *newFP = brw_fragment_program(fprog); - const struct brw_fragment_program *curFP = - brw_fragment_program_const(brw->fragment_program); - - if (fprog->FogOption) { - _mesa_append_fog_code(ctx, fprog); - fprog->FogOption = GL_NONE; - } - - if (newFP == curFP) - brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; - newFP->id = brw->program_id++; - newFP->has_flow_control = brw_wm_has_flow_control(fprog); - } - else if (target == GL_VERTEX_PROGRAM_ARB) { - struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; - struct brw_vertex_program *newVP = brw_vertex_program(vprog); - const struct brw_vertex_program *curVP = - brw_vertex_program_const(brw->vertex_program); - - if (newVP == curVP) - brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; - if (newVP->program.IsPositionInvariant) { - _mesa_insert_mvp_code(ctx, &newVP->program); - } - newVP->id = brw->program_id++; - - /* Also tell tnl about it: - */ - _tnl_program_string(ctx, target, prog); - } + struct brw_context *brw = brw_context(pipe); + struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog; + + brw->sws->bo_unreference(fs->const_buffer); + FREE( (void *)fs->tokens ); + FREE( fs ); } -void brwInitFragProgFuncs( struct dd_function_table *functions ) + +static void brw_delete_vs_state( struct pipe_context *pipe, void *prog ) { - assert(functions->ProgramStringNotify == _tnl_program_string); + struct brw_fragment_shader *vs = (struct brw_fragment_shader *)prog; - functions->BindProgram = brwBindProgram; - functions->NewProgram = brwNewProgram; - functions->DeleteProgram = brwDeleteProgram; - functions->IsProgramNative = brwIsProgramNative; - functions->ProgramStringNotify = brwProgramStringNotify; + /* Delete draw shader + */ + FREE( (void *)vs->tokens ); + FREE( vs ); } + + + + +void brw_pipe_shader_init( struct brw_context *brw ) +{ + brw->base.create_vs_state = brw_create_vs_state; + brw->base.bind_vs_state = brw_bind_vs_state; + brw->base.delete_vs_state = brw_delete_vs_state; + + brw->base.create_fs_state = brw_create_fs_state; + brw->base.bind_fs_state = brw_bind_fs_state; + brw->base.delete_fs_state = brw_delete_fs_state; +} + +void brw_pipe_shader_cleanup( struct brw_context *brw ) +{ +} -- cgit v1.2.3 From 15e7a3b8bb6771d24e5bde7805ea394f9ce0a3ec Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 1 Nov 2009 14:32:50 +0000 Subject: i965g: more files compiling --- src/gallium/drivers/i965/Makefile | 4 +- src/gallium/drivers/i965/brw_defines.h | 3 - src/gallium/drivers/i965/brw_screen.h | 20 ++ src/gallium/drivers/i965/brw_screen_tex_layout.c | 387 ++++++++++++++++++++++ src/gallium/drivers/i965/brw_screen_texture.c | 196 +---------- src/gallium/drivers/i965/brw_wm_constant_buffer.c | 151 +++++++++ src/gallium/drivers/i965/intel_tex_layout.c | 137 -------- 7 files changed, 576 insertions(+), 322 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_screen_tex_layout.c create mode 100644 src/gallium/drivers/i965/brw_wm_constant_buffer.c delete mode 100644 src/gallium/drivers/i965/intel_tex_layout.c (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index ae37d2d702..d88f34cb7e 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -56,9 +56,9 @@ C_SOURCES = \ brw_wm_sampler_state.c \ brw_wm_state.c \ brw_wm_surface_state.c \ + brw_screen_tex_layout.c \ brw_screen_surface.c \ brw_screen_texture.c \ - brw_batchbuffer.c \ - intel_tex_layout.c + brw_batchbuffer.c include ../../Makefile.template diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h index 65cd71c939..92c6b6edc3 100644 --- a/src/gallium/drivers/i965/brw_defines.h +++ b/src/gallium/drivers/i965/brw_defines.h @@ -842,8 +842,5 @@ (BRW_IS_G4X(brw) ? 384 : 256)) /* 512 bit units */ -#define BRW_TILING_NONE 0 -#define BRW_TILING_Y 1 -#define BRW_TILING_X 2 #endif diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index 844c6355d5..bd04e689d9 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -65,6 +65,11 @@ struct brw_buffer boolean is_user_buffer; }; +#define BRW_TILING_NONE 0 +#define BRW_TILING_Y 1 +#define BRW_TILING_X 2 + + struct brw_texture { struct pipe_texture base; @@ -72,10 +77,17 @@ struct brw_texture struct brw_winsys_buffer *bo; struct brw_surface_state ss; + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + boolean compressed; unsigned brw_target; unsigned pitch; unsigned tiling; unsigned cpp; + unsigned total_height; }; @@ -128,5 +140,13 @@ brw_surface_bo( struct pipe_surface *surface ); unsigned brw_surface_pitch( const struct pipe_surface *surface ); +/*********************************************************************** + * Internal functions + */ +GLboolean brw_texture_layout(struct brw_screen *brw_screen, + struct brw_texture *tex ); + + + #endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c new file mode 100644 index 0000000000..8377d30564 --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c @@ -0,0 +1,387 @@ + +#include "pipe/p_format.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "brw_screen.h" +#include "brw_debug.h" + +static int +brw_tex_pitch_align (struct brw_texture *tex, + int pitch) +{ + if (!tex->compressed) { + int pitch_align; + + switch (tex->tiling) { + case BRW_TILING_X: + pitch_align = 512; + break; + case BRW_TILING_Y: + pitch_align = 128; + break; + default: + /* XXX: Untiled pitch alignment of 64 bytes for now to allow + * render-to-texture to work in all cases. This should + * probably be replaced at some point by some scheme to only + * do this when really necessary, for example standalone + * render target views. + */ + pitch_align = 64; + break; + } + + pitch = align(pitch * tex->cpp, pitch_align); + pitch /= tex->cpp; + } + + return pitch; +} + + +static void +brw_tex_alignment_unit(enum pipe_format pf, + GLuint *w, GLuint *h) +{ + switch (pf) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT1_SRGB: + case PIPE_FORMAT_DXT1_SRGBA: + case PIPE_FORMAT_DXT3_SRGBA: + case PIPE_FORMAT_DXT5_SRGBA: + *w = 4; + *h = 4; + break; + + default: + *w = 4; + *h = 2; + break; + } +} + + +static void +brw_tex_set_level_info(struct brw_texture *tex, + GLuint level, + GLuint nr_images, + GLuint x, GLuint y, + GLuint w, GLuint h, GLuint d) +{ + assert(tex->base.width[level] == w); + assert(tex->base.height[level] == h); + assert(tex->base.depth[level] == d); + assert(tex->image_offset[level] == NULL); + assert(nr_images >= 1); + + if (BRW_DEBUG & DEBUG_TEXTURE) + debug_printf("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + level, w, h, d, x, y, tex->level_offset[level]); + + + tex->level_offset[level] = (x + y * tex->pitch) * tex->cpp; + tex->nr_images[level] = nr_images; + + tex->image_offset[level] = MALLOC(nr_images * sizeof(GLuint)); + tex->image_offset[level][0] = 0; +} + + +static void +brw_tex_set_image_offset(struct brw_texture *tex, + GLuint level, GLuint img, + GLuint x, GLuint y, + GLuint offset) +{ + assert((x == 0 && y == 0) || img != 0 || level != 0); + assert(img < tex->nr_images[level]); + + if (BRW_DEBUG & DEBUG_TEXTURE) + debug_printf("%s level %d img %d pos %d,%d image_offset %x\n", + __FUNCTION__, level, img, x, y, + tex->image_offset[level][img]); + + tex->image_offset[level][img] = (x + y * tex->pitch) * tex->cpp + offset; +} + + + +static void brw_layout_2d( struct brw_texture *tex ) +{ + GLuint align_h = 2, align_w = 4; + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = tex->base.width[0]; + GLuint height = tex->base.height[0]; + + tex->pitch = tex->base.width[0]; + brw_tex_alignment_unit(tex->base.format, &align_w, &align_h); + + if (tex->compressed) { + tex->pitch = align(tex->base.width[0], align_w); + } + + /* May need to adjust pitch to accomodate the placement of + * the 2nd mipmap. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap out past the width of its parent. + */ + if (tex->base.last_level > 0) { + GLuint mip1_width; + + if (tex->compressed) { + mip1_width = align(minify(tex->base.width[0]), align_w) + + align(minify(minify(tex->base.width[0])), align_w); + } else { + mip1_width = align(minify(tex->base.width[0]), align_w) + + minify(minify(tex->base.width[0])); + } + + if (mip1_width > tex->pitch) { + tex->pitch = mip1_width; + } + } + + /* Pitch must be a whole number of dwords, even though we + * express it in texels. + */ + tex->pitch = brw_tex_pitch_align (tex, tex->pitch); + tex->total_height = 0; + + for ( level = 0 ; level <= tex->base.last_level ; level++ ) { + GLuint img_height; + + brw_tex_set_level_info(tex, level, 1, x, y, width, height, 1); + + if (tex->compressed) + img_height = MAX2(1, height/4); + else + img_height = align(height, align_h); + + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + tex->total_height = MAX2(tex->total_height, y + img_height); + + /* Layout_below: step right after second mipmap. + */ + if (level == 1) { + x += align(width, align_w); + } + else { + y += img_height; + } + + width = minify(width); + height = minify(height); + } +} + + +static boolean +brw_layout_cubemap_idgng( struct brw_texture *tex ) +{ + GLuint align_h = 2, align_w = 4; + GLuint level; + GLuint x = 0; + GLuint y = 0; + GLuint width = tex->base.width[0]; + GLuint height = tex->base.height[0]; + GLuint qpitch = 0; + GLuint y_pitch = 0; + + tex->pitch = tex->base.width[0]; + brw_tex_alignment_unit(tex->base.format, &align_w, &align_h); + y_pitch = align(height, align_h); + + if (tex->compressed) { + tex->pitch = align(tex->base.width[0], align_w); + } + + if (tex->base.last_level != 0) { + GLuint mip1_width; + + if (tex->compressed) { + mip1_width = (align(minify(tex->base.width[0]), align_w) + + align(minify(minify(tex->base.width[0])), align_w)); + } else { + mip1_width = (align(minify(tex->base.width[0]), align_w) + + minify(minify(tex->base.width[0]))); + } + + if (mip1_width > tex->pitch) { + tex->pitch = mip1_width; + } + } + + tex->pitch = brw_tex_pitch_align(tex, tex->pitch); + + if (tex->compressed) { + qpitch = ((y_pitch + + align(minify(y_pitch), align_h) + + 11 * align_h) / 4) * tex->pitch * tex->cpp; + + tex->total_height = ((y_pitch + + align(minify(y_pitch), align_h) + + 11 * align_h) / 4) * 6; + } else { + qpitch = (y_pitch + + align(minify(y_pitch), align_h) + + 11 * align_h) * tex->pitch * tex->cpp; + + tex->total_height = (y_pitch + + align(minify(y_pitch), align_h) + + 11 * align_h) * 6; + } + + for (level = 0; level <= tex->base.last_level; level++) { + GLuint img_height; + GLuint nr_images = 6; + GLuint q = 0; + + brw_tex_set_level_info(tex, level, nr_images, x, y, width, height, 1); + + for (q = 0; q < nr_images; q++) + brw_tex_set_image_offset(tex, level, q, x, y, q * qpitch); + + if (tex->compressed) + img_height = MAX2(1, height/4); + else + img_height = align(height, align_h); + + if (level == 1) { + x += align(width, align_w); + } + else { + y += img_height; + } + + width = minify(width); + height = minify(height); + } + + return TRUE; +} + + +static boolean +brw_layout_3d_cube( struct brw_texture *tex ) +{ + GLuint width = tex->base.width[0]; + GLuint height = tex->base.height[0]; + GLuint depth = tex->base.depth[0]; + GLuint pack_x_pitch, pack_x_nr; + GLuint pack_y_pitch; + GLuint level; + GLuint align_h = 2; + GLuint align_w = 4; + + tex->total_height = 0; + brw_tex_alignment_unit(tex->base.format, &align_w, &align_h); + + if (tex->compressed) { + tex->pitch = align(width, align_w); + pack_y_pitch = (height + 3) / 4; + } else { + tex->pitch = brw_tex_pitch_align(tex, tex->base.width[0]); + pack_y_pitch = align(tex->base.height[0], align_h); + } + + pack_x_pitch = width; + pack_x_nr = 1; + + for (level = 0 ; level <= tex->base.last_level ; level++) { + GLuint nr_images = tex->base.target == PIPE_TEXTURE_3D ? depth : 6; + GLint x = 0; + GLint y = 0; + GLint q, j; + + brw_tex_set_level_info(tex, level, nr_images, + 0, tex->total_height, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + brw_tex_set_image_offset(tex, level, q, x, y, 0); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + tex->total_height += y; + width = minify(width); + height = minify(height); + depth = minify(depth); + + if (tex->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > align(width, align_w)) { + pack_x_pitch = align(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr <= tex->pitch); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = align(pack_y_pitch, align_h); + } + } + } + + /* The 965's sampler lays cachelines out according to how accesses + * in the texture surfaces run, so they may be "vertical" through + * memory. As a result, the docs say in Surface Padding Requirements: + * Sampling Engine Surfaces that two extra rows of padding are required. + */ + if (tex->base.target == PIPE_TEXTURE_CUBE) + tex->total_height += 2; + + return TRUE; +} + + + +GLboolean brw_texture_layout(struct brw_screen *brw_screen, + struct brw_texture *tex ) +{ + switch (tex->base.target) { + case PIPE_TEXTURE_CUBE: + if (brw_screen->chipset.is_igdng) + brw_layout_cubemap_idgng( tex ); + else + brw_layout_3d_cube( tex ); + break; + + case PIPE_TEXTURE_3D: + brw_layout_3d_cube( tex ); + break; + + default: + brw_layout_2d( tex ); + break; + } + + if (BRW_DEBUG & DEBUG_TEXTURE) + debug_printf("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + tex->pitch, + tex->total_height, + tex->cpp, + tex->pitch * tex->total_height * tex->cpp ); + + return GL_TRUE; +} diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 3d069add6f..d527f22a8d 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -196,185 +196,6 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) } -GLboolean brw_miptree_layout(struct brw_context *brw, - struct intel_mipmap_tree *mt, - uint32_t tiling) -{ - /* XXX: these vary depending on image format: */ - /* GLint align_w = 4; */ - - switch (mt->target) { - case GL_TEXTURE_CUBE_MAP: - if (IS_IGDNG(brw->brw_screen->pci_id)) { - GLuint align_h = 2, align_w = 4; - GLuint level; - GLuint x = 0; - GLuint y = 0; - GLuint width = mt->width0; - GLuint height = mt->height0; - GLuint qpitch = 0; - GLuint y_pitch = 0; - - mt->pitch = mt->width0; - intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); - y_pitch = ALIGN(height, align_h); - - if (mt->compressed) { - mt->pitch = ALIGN(mt->width0, align_w); - } - - if (mt->last_level != 0) { - GLuint mip1_width; - - if (mt->compressed) { - mip1_width = ALIGN(minify(mt->width0), align_w) - + ALIGN(minify(minify(mt->width0)), align_w); - } else { - mip1_width = ALIGN(minify(mt->width0), align_w) - + minify(minify(mt->width0)); - } - - if (mip1_width > mt->pitch) { - mt->pitch = mip1_width; - } - } - - mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch); - - if (mt->compressed) { - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp; - mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6; - } else { - qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp; - mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6; - } - - for (level = 0; level <= mt->last_level; level++) { - GLuint img_height; - GLuint nr_images = 6; - GLuint q = 0; - - intel_miptree_set_level_info(mt, level, nr_images, x, y, width, - height, 1); - - for (q = 0; q < nr_images; q++) - intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch); - - if (mt->compressed) - img_height = MAX2(1, height/4); - else - img_height = ALIGN(height, align_h); - - if (level == 1) { - x += ALIGN(width, align_w); - } - else { - y += img_height; - } - - width = minify(width); - height = minify(height); - } - - break; - } - - case GL_TEXTURE_3D: { - GLuint width = mt->width0; - GLuint height = mt->height0; - GLuint depth = mt->depth0; - GLuint pack_x_pitch, pack_x_nr; - GLuint pack_y_pitch; - GLuint level; - GLuint align_h = 2; - GLuint align_w = 4; - - mt->total_height = 0; - intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); - - if (mt->compressed) { - mt->pitch = ALIGN(width, align_w); - pack_y_pitch = (height + 3) / 4; - } else { - mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0); - pack_y_pitch = ALIGN(mt->height0, align_h); - } - - pack_x_pitch = width; - pack_x_nr = 1; - - for (level = 0 ; level <= mt->last_level ; level++) { - GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6; - GLint x = 0; - GLint y = 0; - GLint q, j; - - intel_miptree_set_level_info(mt, level, nr_images, - 0, mt->total_height, - width, height, depth); - - for (q = 0; q < nr_images;) { - for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { - intel_miptree_set_image_offset(mt, level, q, x, y); - x += pack_x_pitch; - } - - x = 0; - y += pack_y_pitch; - } - - - mt->total_height += y; - width = minify(width); - height = minify(height); - depth = minify(depth); - - if (mt->compressed) { - pack_y_pitch = (height + 3) / 4; - - if (pack_x_pitch > ALIGN(width, align_w)) { - pack_x_pitch = ALIGN(width, align_w); - pack_x_nr <<= 1; - } - } else { - if (pack_x_pitch > 4) { - pack_x_pitch >>= 1; - pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= mt->pitch); - } - - if (pack_y_pitch > 2) { - pack_y_pitch >>= 1; - pack_y_pitch = ALIGN(pack_y_pitch, align_h); - } - } - - } - /* The 965's sampler lays cachelines out according to how accesses - * in the texture surfaces run, so they may be "vertical" through - * memory. As a result, the docs say in Surface Padding Requirements: - * Sampling Engine Surfaces that two extra rows of padding are required. - * We don't know of similar requirements for pre-965, but given that - * those docs are silent on padding requirements in general, let's play - * it safe. - */ - if (mt->target == GL_TEXTURE_CUBE_MAP) - mt->total_height += 2; - break; - } - - default: - i945_miptree_layout_2d(intel, mt, tiling); - break; - } - DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, - mt->pitch, - mt->total_height, - mt->cpp, - mt->pitch * mt->total_height * mt->cpp ); - - return GL_TRUE; -} static void brw_create_texture( struct pipe_screen *screen, @@ -382,6 +203,21 @@ static void brw_create_texture( struct pipe_screen *screen, { + tex->compressed = pf_is_compressed(tex->base.format); + + if (intel->use_texture_tiling && compress_byte == 0 && + intel->intelScreen->kernel_exec_fencing) { + if (IS_965(intel->intelScreen->deviceID) && + (base_format == GL_DEPTH_COMPONENT || + base_format == GL_DEPTH_STENCIL_EXT)) + tiling = I915_TILING_Y; + else + tiling = I915_TILING_X; + } else + tiling = I915_TILING_NONE; + + + key.format = tex->base.format; key.pitch = tex->pitch; key.depth = tex->base.depth[0]; @@ -389,7 +225,7 @@ static void brw_create_texture( struct pipe_screen *screen, key.offset = 0; key.target = tex->brw_target; /* translated to BRW enum */ - //key.depthmode = 0; /* XXX: add this to gallium? or the state tracker? */ + //key.depthmode = 0; /* XXX: add this to gallium? or handle in the state tracker? */ key.last_level = tex->base.last_level; key.width = tex->base.depth[0]; key.height = tex->base.height[0]; diff --git a/src/gallium/drivers/i965/brw_wm_constant_buffer.c b/src/gallium/drivers/i965/brw_wm_constant_buffer.c new file mode 100644 index 0000000000..7d2533b104 --- /dev/null +++ b/src/gallium/drivers/i965/brw_wm_constant_buffer.c @@ -0,0 +1,151 @@ +/* XXX: Constant buffers disabled + */ + + +/** + * Create the constant buffer surface. Vertex/fragment shader constants will be + * read from this buffer with Data Port Read instructions/messages. + */ +struct brw_winsys_buffer * +brw_create_constant_surface( struct brw_context *brw, + struct brw_surface_key *key ) +{ + const GLint w = key->width - 1; + struct brw_winsys_buffer *bo; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = BRW_SURFACE_BUFFER; + surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + assert(key->bo); + surf.ss1.base_addr = key->bo->offset; /* reloc */ + + surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ + surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ + surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ + surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ + + bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, key->bo ? 1 : 0, + &surf, sizeof(surf), + NULL, NULL); + + if (key->bo) { + /* Emit relocation to surface contents */ + brw->sws->bo_emit_reloc(bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + } + + return bo; +} + + + +/** + * Update the surface state for a WM constant buffer. + * The constant buffer will be (re)allocated here if needed. + */ +static void +brw_update_wm_constant_surface( struct brw_context *brw, + GLuint surf) +{ + struct brw_surface_key key; + struct brw_fragment_shader *fp = brw->curr.fragment_shader; + struct pipe_buffer *cbuf = brw->curr.fragment_constants; + int pitch = cbuf->size / (4 * sizeof(float)); + + /* If we're in this state update atom, we need to update WM constants, so + * free the old buffer and create a new one for the new contents. + */ + brw->sws->bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (cbuf == NULL) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + return; + } + + memset(&key, 0, sizeof(key)); + + key.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + key.ss0.surface_type = BRW_SURFACE_BUFFER; + key.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + key.bo = brw_buffer(cbuf)->bo; + + key.ss2.width = (pitch-1) & 0x7f; /* bits 6:0 of size or width */ + key.ss2.height = ((pitch-1) >> 7) & 0x1fff; /* bits 19:7 of size or width */ + key.ss3.depth = ((pitch-1) >> 20) & 0x7f; /* bits 26:20 of size or width */ + key.ss3.pitch = (pitch * 4 * sizeof(float)) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ + + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + brw->sws->bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, 1, + NULL); + if (brw->wm.surf_bo[surf] == NULL) { + brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; +} + +/** + * Updates surface / buffer for fragment shader constant buffer, if + * one is required. + * + * This consumes the state updates for the constant buffer, and produces + * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for + * inclusion in the binding table. + */ +static void prepare_wm_constant_surface(struct brw_context *brw ) +{ + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; + + drm_intel_bo_unreference(fp->const_buffer); + fp->const_buffer = brw_wm_update_constant_buffer(brw); + + /* If there's no constant buffer, then no surface BO is needed to point at + * it. + */ + if (fp->const_buffer == 0) { + if (brw->wm.surf_bo[surf] != NULL) { + drm_intel_bo_unreference(brw->wm.surf_bo[surf]); + brw->wm.surf_bo[surf] = NULL; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + } + return; + } + + brw_update_wm_constant_surface(ctx, surf); +} + +const struct brw_tracked_state brw_wm_constant_surface = { + .dirty = { + .mesa = (_NEW_PROGRAM_CONSTANTS), + .brw = (BRW_NEW_FRAGMENT_PROGRAM), + .cache = 0 + }, + .prepare = prepare_wm_constant_surface, +}; diff --git a/src/gallium/drivers/i965/intel_tex_layout.c b/src/gallium/drivers/i965/intel_tex_layout.c deleted file mode 100644 index 7e0ca553f2..0000000000 --- a/src/gallium/drivers/i965/intel_tex_layout.c +++ /dev/null @@ -1,137 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - /* - * Authors: - * Keith Whitwell - * Michel Dänzer - */ - -#include "intel_tex_layout.h" - -void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h) -{ - switch (internalFormat) { - case GL_COMPRESSED_RGB_FXT1_3DFX: - case GL_COMPRESSED_RGBA_FXT1_3DFX: - *w = 8; - *h = 4; - break; - - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - *w = 4; - *h = 4; - break; - - default: - *w = 4; - *h = 2; - break; - } -} - -void i945_miptree_layout_2d( struct intel_context *intel, - struct intel_mipmap_tree *mt, - uint32_t tiling ) -{ - GLuint align_h = 2, align_w = 4; - GLuint level; - GLuint x = 0; - GLuint y = 0; - GLuint width = mt->width0; - GLuint height = mt->height0; - - mt->pitch = mt->width0; - intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h); - - if (mt->compressed) { - mt->pitch = ALIGN(mt->width0, align_w); - } - - /* May need to adjust pitch to accomodate the placement of - * the 2nd mipmap. This occurs when the alignment - * constraints of mipmap placement push the right edge of the - * 2nd mipmap out past the width of its parent. - */ - if (mt->last_level) { - GLuint mip1_width; - - if (mt->compressed) { - mip1_width = ALIGN(minify(mt->width0), align_w) - + ALIGN(minify(minify(mt->width0)), align_w); - } else { - mip1_width = ALIGN(minify(mt->width0), align_w) - + minify(minify(mt->width0)); - } - - if (mip1_width > mt->pitch) { - mt->pitch = mip1_width; - } - } - - /* Pitch must be a whole number of dwords, even though we - * express it in texels. - */ - mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch); - mt->total_height = 0; - - for ( level = 0 ; level <= mt->last_level ; level++ ) { - GLuint img_height; - - intel_miptree_set_level_info(mt, level, 1, x, y, width, - height, 1); - - if (mt->compressed) - img_height = MAX2(1, height/4); - else - img_height = ALIGN(height, align_h); - - - /* Because the images are packed better, the final offset - * might not be the maximal one: - */ - mt->total_height = MAX2(mt->total_height, y + img_height); - - /* Layout_below: step right after second mipmap. - */ - if (level == 1) { - x += ALIGN(width, align_w); - } - else { - y += img_height; - } - - width = minify(width); - height = minify(height); - } -} -- cgit v1.2.3 From 6981bbfabeeaf60111d737dd0d5a93496fd16758 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 1 Nov 2009 15:59:21 +0000 Subject: i965g: more files compiling --- src/gallium/drivers/i965/Makefile | 3 +- src/gallium/drivers/i965/brw_context.h | 8 +- src/gallium/drivers/i965/brw_pipe_blend.c | 2 + src/gallium/drivers/i965/brw_pipe_rast.c | 199 ++++++++++++++++------- src/gallium/drivers/i965/brw_screen_tex_layout.c | 29 ++++ src/gallium/drivers/i965/brw_screen_texture.c | 157 ++++++++++-------- src/gallium/drivers/i965/brw_winsys.h | 8 +- src/gallium/drivers/i965/brw_wm_sampler_state.c | 10 +- 8 files changed, 269 insertions(+), 147 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index d88f34cb7e..48950544c9 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -31,6 +31,7 @@ C_SOURCES = \ brw_pipe_query.c \ brw_pipe_shader.c \ brw_pipe_flush.c \ + brw_pipe_rast.c \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ @@ -57,8 +58,8 @@ C_SOURCES = \ brw_wm_state.c \ brw_wm_surface_state.c \ brw_screen_tex_layout.c \ - brw_screen_surface.c \ brw_screen_texture.c \ + brw_screen_surface.c \ brw_batchbuffer.c include ../../Makefile.template diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 3e9315c41f..b94c511499 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -102,7 +102,7 @@ * * CS - Clipper. Mesa's clipping algorithms are imported to run on * this unit. The fixed function part performs cliptesting against - * the 6 fixed clipplanes and makes descisions on whether or not the + * the 6 fixed clipplanes and makes decisions on whether or not the * incoming primitive needs to be passed to a thread for clipping. * User clip planes are handled via cooperation with the VS thread. * @@ -123,8 +123,6 @@ struct brw_context; struct brw_depth_stencil_state { - //struct pipe_depth_stencil_alpha_state templ; /* for draw module */ - /* Precalculated hardware state: */ struct brw_cc0 cc0; @@ -138,8 +136,6 @@ struct brw_depth_stencil_state { struct brw_blend_state { - //struct pipe_depth_stencil_alpha_state templ; /* for draw module */ - /* Precalculated hardware state: */ struct brw_cc2 cc2; @@ -181,7 +177,7 @@ struct brw_fragment_shader { struct brw_sampler { - struct pipe_sampler_state templ; + float border_color[4]; struct brw_ss0 ss0; struct brw_ss1 ss1; struct brw_ss3 ss3; diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c index cc9ee2e8db..f6da9254ef 100644 --- a/src/gallium/drivers/i965/brw_pipe_blend.c +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -111,6 +111,8 @@ static void *brw_create_blend_state( struct pipe_context *pipe, const struct pipe_blend_state *templ ) { struct brw_blend_state *blend = CALLOC_STRUCT(brw_blend_state); + if (blend == NULL) + return NULL; if (templ->logicop_enable) { blend->cc2.logicop_enable = 1; diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c index 51159bf147..27c568de0a 100644 --- a/src/gallium/drivers/i965/brw_pipe_rast.c +++ b/src/gallium/drivers/i965/brw_pipe_rast.c @@ -1,84 +1,159 @@ -static void -calculate_clip_key_rast() -{ - if (BRW_IS_IGDNG(brw)) - key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP; - else - key.clip_mode = BRW_CLIPMODE_NORMAL; +#include "util/u_memory.h" +#include "pipe/p_defines.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_pipe_rast.h" +#include "brw_wm.h" - key.do_flat_shading = brw->rast->templ.flatshade; - if (key.primitive == PIPE_PRIM_TRIANGLES) { - if (brw->rast->templ.cull_mode = PIPE_WINDING_BOTH) - key.clip_mode = BRW_CLIPMODE_REJECT_ALL; - else { - key.fill_ccw = CLIP_CULL; - key.fill_cw = CLIP_CULL; - - if (!(brw->rast->templ.cull_mode & PIPE_WINDING_CCW)) { - key.fill_ccw = translate_fill(brw->rast.fill_ccw); - } - - if (!(brw->rast->templ.cull_mode & PIPE_WINDING_CW)) { - key.fill_cw = translate_fill(brw->rast.fill_cw); - } - - if (key.fill_cw != CLIP_FILL || - key.fill_ccw != CLIP_FILL) { - key.do_unfilled = 1; - key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; - } - - key.offset_ccw = brw->rast.templ.offset_ccw; - key.offset_cw = brw->rast.templ.offset_cw; - - if (brw->rast.templ.light_twoside && - key.fill_cw != CLIP_CULL) - key.copy_bfc_cw = 1; - - if (brw->rast.templ.light_twoside && - key.fill_ccw != CLIP_CULL) - key.copy_bfc_ccw = 1; - } - } +static unsigned translate_fill( unsigned fill ) +{ + switch (fill) { + case PIPE_POLYGON_MODE_FILL: + return CLIP_FILL; + case PIPE_POLYGON_MODE_LINE: + return CLIP_LINE; + case PIPE_POLYGON_MODE_POINT: + return CLIP_POINT; + default: + assert(0); + return CLIP_FILL; } } +/* Calculates the key for triangle-mode clipping. Non-triangle + * clipping keys use much less information and are computed on the + * fly. + */ static void -calculate_line_stipple_rast() +calculate_clip_key_rast( const struct brw_context *brw, + const struct pipe_rasterizer_state *templ, + const struct brw_rasterizer_state *rast, + struct brw_clip_prog_key *key) { - GLfloat tmp; - GLint tmpi; + memset(key, 0, sizeof *key); - memset(&bls, 0, sizeof(bls)); - bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; - bls.header.length = sizeof(bls)/4 - 2; - bls.bits0.pattern = brw->curr.rast.line_stipple_pattern; - bls.bits1.repeat_count = brw->curr.rast.line_stipple_factor + 1; + if (brw->chipset.is_igdng) + key->clip_mode = BRW_CLIPMODE_KERNEL_CLIP; + else + key->clip_mode = BRW_CLIPMODE_NORMAL; - tmp = 1.0 / (GLfloat) bls.bits1.repeat_count; - tmpi = tmp * (1<<13); + key->do_flat_shading = templ->flatshade; - bls.bits1.inverse_repeat_count = tmpi; + if (templ->cull_mode == PIPE_WINDING_BOTH) { + key->clip_mode = BRW_CLIPMODE_REJECT_ALL; + return; + } -} + key->fill_ccw = CLIP_CULL; + key->fill_cw = CLIP_CULL; + if (!(templ->cull_mode & PIPE_WINDING_CCW)) { + key->fill_ccw = translate_fill(templ->fill_ccw); + } + + if (!(templ->cull_mode & PIPE_WINDING_CW)) { + key->fill_cw = translate_fill(templ->fill_cw); + } + + if (key->fill_cw != CLIP_FILL || + key->fill_ccw != CLIP_FILL) { + key->do_unfilled = 1; + key->clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + } + + key->offset_ccw = templ->offset_ccw; + key->offset_cw = templ->offset_cw; + + if (templ->light_twoside && key->fill_cw != CLIP_CULL) + key->copy_bfc_cw = 1; + + if (templ->light_twoside && key->fill_ccw != CLIP_CULL) + key->copy_bfc_ccw = 1; +} static void -calculate_wm_lookup() +calculate_line_stipple_rast( const struct pipe_rasterizer_state *templ, + struct brw_line_stipple *bls ) { - if (rast->fill_cw == PIPE_POLYGON_MODE_LINE && - rast->fill_ccw == PIPE_POLYGON_MODE_LINE) { - line_aa = AA_ALWAYS; - } - else if (rast->fill_cw == PIPE_POLYGON_MODE_LINE || - rast->fill_ccw == PIPE_POLYGON_MODE_LINE) { - line_aa = AA_SOMETIMES; + GLfloat tmp = 1.0f / (templ->line_stipple_factor + 1); + GLint tmpi = tmp * (1<<13); + + bls->header.opcode = CMD_LINE_STIPPLE_PATTERN; + bls->header.length = sizeof(*bls)/4 - 2; + bls->bits0.pattern = templ->line_stipple_pattern; + bls->bits1.repeat_count = templ->line_stipple_factor + 1; + bls->bits1.inverse_repeat_count = tmpi; +} + +static void *brw_create_rasterizer_state( struct pipe_context *pipe, + const struct pipe_rasterizer_state *templ ) +{ + struct brw_context *brw = brw_context(pipe); + struct brw_rasterizer_state *rast; + + rast = CALLOC_STRUCT(brw_rasterizer_state); + if (rast == NULL) + return NULL; + + rast->templ = *templ; + + calculate_clip_key_rast( brw, templ, rast, &rast->clip_key ); + + if (templ->line_stipple_enable) + calculate_line_stipple_rast( templ, &rast->bls ); + + /* Caclculate lookup value for WM IZ table. + */ + if (templ->line_smooth) { + if (templ->fill_cw == PIPE_POLYGON_MODE_LINE && + templ->fill_ccw == PIPE_POLYGON_MODE_LINE) { + rast->unfilled_aa_line = AA_ALWAYS; + } + else if (templ->fill_cw == PIPE_POLYGON_MODE_LINE || + templ->fill_ccw == PIPE_POLYGON_MODE_LINE) { + rast->unfilled_aa_line = AA_SOMETIMES; + } + else { + rast->unfilled_aa_line = AA_NEVER; + } } else { - line_aa = AA_NEVER; + rast->unfilled_aa_line = AA_NEVER; } + + return (void *)rast; +} + + +static void brw_bind_rasterizer_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + brw->curr.rast = (const struct brw_rasterizer_state *)cso; + brw->state.dirty.mesa |= PIPE_NEW_RAST; +} + +static void brw_delete_rasterizer_state(struct pipe_context *pipe, + void *cso) +{ + struct brw_context *brw = brw_context(pipe); + assert((const void *)cso != (const void *)brw->curr.rast); + FREE(cso); +} + + + +void brw_pipe_rast_init( struct brw_context *brw ) +{ + brw->base.create_rasterizer_state = brw_create_rasterizer_state; + brw->base.bind_rasterizer_state = brw_bind_rasterizer_state; + brw->base.delete_rasterizer_state = brw_delete_rasterizer_state; +} + +void brw_pipe_rast_cleanup( struct brw_context *brw ) +{ } diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c index 8377d30564..bcdf8d8074 100644 --- a/src/gallium/drivers/i965/brw_screen_tex_layout.c +++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c @@ -1,3 +1,29 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ #include "pipe/p_format.h" @@ -7,6 +33,9 @@ #include "brw_screen.h" #include "brw_debug.h" +/* Code to layout images in a mipmap tree for i965. + */ + static int brw_tex_pitch_align (struct brw_texture *tex, int pitch) diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index d527f22a8d..989013953b 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -29,12 +29,12 @@ * Keith Whitwell */ -/* Code to layout images in a mipmap tree for i965. - */ +#include "util/u_memory.h" -#include "brw_tex_layout.h" - -#define FILE_DEBUG_FLAG DEBUG_MIPTREE +#include "brw_screen.h" +#include "brw_defines.h" +#include "brw_structs.h" +#include "brw_winsys.h" @@ -176,94 +176,113 @@ static GLuint translate_tex_format( enum pipe_format pf ) } } -static void -brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) -{ - switch (tiling) { - case BRW_TILING_NONE: - surf->ss3.tiled_surface = 0; - surf->ss3.tile_walk = 0; - break; - case BRW_TILING_X: - surf->ss3.tiled_surface = 1; - surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR; - break; - case BRW_TILING_Y: - surf->ss3.tiled_surface = 1; - surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR; - break; - } -} -static void brw_create_texture( struct pipe_screen *screen, - const pipe_texture *templ ) +static struct pipe_texture *brw_create_texture( struct pipe_screen *screen, + const struct pipe_texture *templ ) { + struct brw_screen *bscreen = brw_screen(screen); + struct brw_texture *tex; + + tex = CALLOC_STRUCT(brw_texture); + if (tex == NULL) + return NULL; tex->compressed = pf_is_compressed(tex->base.format); - if (intel->use_texture_tiling && compress_byte == 0 && - intel->intelScreen->kernel_exec_fencing) { - if (IS_965(intel->intelScreen->deviceID) && - (base_format == GL_DEPTH_COMPONENT || - base_format == GL_DEPTH_STENCIL_EXT)) - tiling = I915_TILING_Y; + /* XXX: No tiling with compressed textures?? + */ + if (tex->compressed == 0 + /* && bscreen->use_texture_tiling */ + /* && bscreen->kernel_exec_fencing */) + { + if (bscreen->chipset.is_965 && + pf_is_depth_or_stencil(templ->format)) + tex->tiling = BRW_TILING_Y; else - tiling = I915_TILING_X; - } else - tiling = I915_TILING_NONE; + tex->tiling = BRW_TILING_X; + } + else { + tex->tiling = BRW_TILING_NONE; + } + memcpy(&tex->base, templ, sizeof *templ); - key.format = tex->base.format; - key.pitch = tex->pitch; - key.depth = tex->base.depth[0]; - key.bo = tex->buffer; - key.offset = 0; + if (!brw_texture_layout( bscreen, tex )) + goto fail; - key.target = tex->brw_target; /* translated to BRW enum */ - //key.depthmode = 0; /* XXX: add this to gallium? or handle in the state tracker? */ - key.last_level = tex->base.last_level; - key.width = tex->base.depth[0]; - key.height = tex->base.height[0]; - key.cpp = tex->cpp; - key.tiling = tex->tiling; + tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); + tex->ss.ss0.surface_format = translate_tex_format(tex->base.format); + /* This is ok for all textures with channel width 8bit or less: + */ +/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + tex->ss.ss1.base_addr = tex->bo->offset; /* reloc */ + tex->ss.ss2.mip_count = tex->base.last_level; + tex->ss.ss2.width = tex->base.width[0] - 1; + tex->ss.ss2.height = tex->base.height[0] - 1; + switch (tex->tiling) { + case BRW_TILING_NONE: + tex->ss.ss3.tiled_surface = 0; + tex->ss.ss3.tile_walk = 0; + break; + case BRW_TILING_X: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + break; + case BRW_TILING_Y: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR; + break; + } - surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf.ss0.surface_type = translate_tex_target(key->target); - surf.ss0.surface_format = translate_tex_format(key->format /* , key->depthmode */ ); + tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1; + tex->ss.ss3.depth = tex->base.depth[0] - 1; - /* This is ok for all textures with channel width 8bit or less: - */ -/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - assert(key->bo); - surf.ss1.base_addr = key->bo->offset; /* reloc */ - surf.ss2.mip_count = key->last_level; - surf.ss2.width = key->width - 1; - surf.ss2.height = key->height - 1; - brw_set_surface_tiling(&surf, key->tiling); - surf.ss3.pitch = (key->pitch * key->cpp) - 1; - surf.ss3.depth = key->depth - 1; - - surf.ss4.min_lod = 0; + tex->ss.ss4.min_lod = 0; - if (key->target == PIPE_TEXTURE_CUBE) { - surf.ss0.cube_pos_x = 1; - surf.ss0.cube_pos_y = 1; - surf.ss0.cube_pos_z = 1; - surf.ss0.cube_neg_x = 1; - surf.ss0.cube_neg_y = 1; - surf.ss0.cube_neg_z = 1; + if (tex->base.target == PIPE_TEXTURE_CUBE) { + tex->ss.ss0.cube_pos_x = 1; + tex->ss.ss0.cube_pos_y = 1; + tex->ss.ss0.cube_pos_z = 1; + tex->ss.ss0.cube_neg_x = 1; + tex->ss.ss0.cube_neg_y = 1; + tex->ss.ss0.cube_neg_z = 1; } + return &tex->base; + +fail: + bscreen->sws->bo_unreference(tex->bo); + FREE(tex); + return NULL; } +static struct pipe_texture *brw_texture_blanket(struct pipe_screen *screen, + const struct pipe_texture *templ, + const unsigned *stride, + struct pipe_buffer *buffer) +{ + return NULL; +} + +static void brw_texture_destroy(struct pipe_texture *pt) +{ + //bscreen->sws->bo_unreference(tex->bo); + FREE(pt); +} + + + + + diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index 33032276bc..66a94b4b00 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -243,10 +243,10 @@ boolean brw_get_texture_buffer_brw(struct pipe_texture *texture, * * TODO UGLY */ -struct pipe_texture * brw_texture_blanket(struct pipe_screen *screen, - struct pipe_texture *tmplt, - unsigned pitch, - struct brw_winsys_buffer *buffer); +struct pipe_texture * brw_texture_blanket_ws(struct pipe_screen *screen, + const struct pipe_texture *tmplt, + const unsigned *stride, + struct brw_winsys_buffer *buffer); diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c index 55698a58bb..ddd88d6e22 100644 --- a/src/gallium/drivers/i965/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c @@ -126,10 +126,10 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw) if (pf_is_depth_or_stencil(tex->base.format)) { float bordercolor[4] = { - sampler->templ.border_color[0], - sampler->templ.border_color[0], - sampler->templ.border_color[0], - sampler->templ.border_color[0] + sampler->border_color[0], + sampler->border_color[0], + sampler->border_color[0], + sampler->border_color[0] }; /* GL specs that border color for depth textures is taken from the * R channel, while the hardware uses A. Spam R into all the @@ -137,7 +137,7 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw) */ brw->wm.sdc_bo[i] = upload_default_color(brw, bordercolor); } else { - brw->wm.sdc_bo[i] = upload_default_color(brw, sampler->templ.border_color); + brw->wm.sdc_bo[i] = upload_default_color(brw, sampler->border_color); } } } -- cgit v1.2.3 From 4a3e002cf9bad3e7314653abbc740624c2c8b31a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 1 Nov 2009 17:18:56 +0000 Subject: i965g: more files compiling --- src/gallium/drivers/i965/brw_defines.h | 1 + src/gallium/drivers/i965/brw_screen.h | 42 +++- src/gallium/drivers/i965/brw_screen_surface.c | 304 +++++++++++++------------- src/gallium/drivers/i965/brw_screen_texture.c | 24 +- 4 files changed, 205 insertions(+), 166 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h index 92c6b6edc3..e201ce4d7c 100644 --- a/src/gallium/drivers/i965/brw_defines.h +++ b/src/gallium/drivers/i965/brw_defines.h @@ -417,6 +417,7 @@ #define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D #define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E #define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F +#define BRW_SURFACEFORMAT_INVALID 0xFFF #define BRW_SURFACERETURNFORMAT_FLOAT32 0 #define BRW_SURFACERETURNFORMAT_S1 1 diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index bd04e689d9..11b480b1ac 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -69,17 +69,35 @@ struct brw_buffer #define BRW_TILING_Y 1 #define BRW_TILING_X 2 +union brw_surface_id { + struct { + unsigned face:3; + unsigned zslice:13; + unsigned level:16; + } bits; + unsigned value; +}; + + +struct brw_surface +{ + struct pipe_surface base; + union brw_surface_id id; + struct brw_surface_state ss; + struct brw_winsys_buffer *bo; + struct brw_surface *next, *prev; +}; + + struct brw_texture { struct pipe_texture base; - struct brw_winsys_buffer *bo; struct brw_surface_state ss; unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; boolean compressed; @@ -88,15 +106,11 @@ struct brw_texture unsigned tiling; unsigned cpp; unsigned total_height; + + struct brw_surface views[2]; }; -struct brw_surface -{ - struct pipe_surface base; - struct brw_surface_state ss; - struct brw_winsys_buffer *bo; -}; /* * Cast wrappers @@ -125,6 +139,12 @@ brw_buffer(struct pipe_buffer *buffer) return (struct brw_buffer *)buffer; } +static INLINE struct brw_texture * +brw_texture(struct pipe_texture *texture) +{ + return (struct brw_texture *)texture; +} + /* Pipe buffer helpers */ @@ -146,6 +166,12 @@ brw_surface_pitch( const struct pipe_surface *surface ); GLboolean brw_texture_layout(struct brw_screen *brw_screen, struct brw_texture *tex ); +void brw_update_texture( struct brw_screen *brw_screen, + struct brw_texture *tex ); + + +void brw_screen_tex_init( struct brw_screen *brw_screen ); +void brw_screen_tex_surface_init( struct brw_screen *brw_screen ); diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c index 01d4b2d2b1..b4ad91278b 100644 --- a/src/gallium/drivers/i965/brw_screen_surface.c +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -1,135 +1,51 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + +#include "util/u_memory.h" +#include "util/u_simple_list.h" #include "pipe/p_screen.h" #include "brw_screen.h" +#include "brw_defines.h" - -/** - * Sets up a surface state structure to point at the given region. - * While it is only used for the front/back buffer currently, it should be - * usable for further buffers when doing ARB_draw_buffer support. - */ -static void -brw_update_renderbuffer_surface(struct brw_context *brw, - struct gl_renderbuffer *rb, - unsigned int unit) -{ - struct brw_winsys_buffer *region_bo = NULL; - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - struct intel_region *region = irb ? irb->region : NULL; - struct { - unsigned int surface_type; - unsigned int surface_format; - unsigned int width, height, pitch, cpp; - GLubyte color_mask[4]; - GLboolean color_blend; - uint32_t tiling; - uint32_t draw_offset; - } key; - - memset(&key, 0, sizeof(key)); - - if (region != NULL) { - region_bo = region->buffer; - - key.surface_type = BRW_SURFACE_2D; - switch (irb->texformat->MesaFormat) { - case PIPE_FORMAT_ARGB8888: - key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - break; - case PIPE_FORMAT_RGB565: - key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - break; - case PIPE_FORMAT_ARGB1555: - key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; - break; - case PIPE_FORMAT_ARGB4444: - key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; - break; - default: - debug_printf("Bad renderbuffer format: %d\n", - irb->texformat->MesaFormat); - assert(0); - key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - return; - } - key.tiling = region->tiling; - if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) { - key.width = rb->Width; - key.height = rb->Height; - } else { - key.width = region->width; - key.height = region->height; - } - key.pitch = region->pitch; - key.cpp = region->cpp; - key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */ - } - - memcpy(key.color_mask, ctx->Color.ColorMask, - sizeof(key.color_mask)); - - key.color_blend = (!ctx->Color._LogicOpEnabled && - ctx->Color.BlendEnabled); - - brw->sws->bo_unreference(brw->wm.surf_bo[unit]); - brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - ®ion_bo, 1, - NULL); - - if (brw->wm.surf_bo[unit] == NULL) { - struct brw_surface_state surf; - - memset(&surf, 0, sizeof(surf)); - - surf.ss0.surface_format = key.surface_format; - surf.ss0.surface_type = key.surface_type; - if (key.tiling == I915_TILING_NONE) { - surf.ss1.base_addr = key.draw_offset; - } else { - uint32_t tile_offset = key.draw_offset % 4096; - - surf.ss1.base_addr = key.draw_offset - tile_offset; - - assert(BRW_IS_G4X(brw) || tile_offset == 0); - if (BRW_IS_G4X(brw)) { - if (key.tiling == I915_TILING_X) { - /* Note that the low bits of these fields are missing, so - * there's the possibility of getting in trouble. - */ - surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4; - surf.ss5.y_offset = tile_offset / 512 / 2; - } else { - surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4; - surf.ss5.y_offset = tile_offset / 128 / 2; - } - } - } - - if (region_bo != NULL) - surf.ss1.base_addr += region_bo->offset; /* reloc */ - - surf.ss2.width = key.width - 1; - surf.ss2.height = key.height - 1; - brw_set_surface_tiling(&surf, key.tiling); - surf.ss3.pitch = (key.pitch * key.cpp) - 1; - -} - - - -struct brw_surface_id { - unsigned face:3; - unsigned zslice:13; - unsigned level:16; +enum { + BRW_VIEW_LINEAR, + BRW_VIEW_IN_PLACE }; + static boolean need_linear_view( struct brw_screen *brw_screen, struct brw_texture *brw_texture, - unsigned face, - unsigned level, - unsigned zslice ) + union brw_surface_id id, + unsigned usage ) { #if 0 /* XXX: what about IDGNG? @@ -178,71 +94,155 @@ static boolean need_linear_view( struct brw_screen *brw_screen, /* Look at all texture views and figure out if any of them need to be * back-copied into the texture for sampling */ -void brw_update_texture( struct pipe_screen *screen, - struct pipe_texture *texture ) +void brw_update_texture( struct brw_screen *brw_screen, + struct brw_texture *tex ) { /* currently nothing to do */ } -static struct pipe_surface *create_linear_view( struct brw_screen *brw_screen, - struct brw_texture *brw_tex, - struct brw_surface_id id ) +/* Create a new surface with linear layout to serve as a render-target + * where it would be illegal (perhaps due to tiling constraints) to do + * this in-place. + * + * Currently not implmented, not sure if it's needed. + */ +static struct brw_surface *create_linear_view( struct brw_screen *brw_screen, + struct brw_texture *tex, + union brw_surface_id id, + unsigned usage ) { - + return NULL; } -static struct pipe_surface *create_in_place_view( struct brw_screen *brw_screen, - struct brw_texture *brw_tex, - struct brw_surface_id id ) + +/* Create a pipe_surface that just points directly into the existing + * texture's storage. + */ +static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen, + struct brw_texture *tex, + union brw_surface_id id, + unsigned usage ) { - struct brw_surface *surface = CALLOC_STRUCT(brw_surface); + struct brw_surface *surface; + + surface = CALLOC_STRUCT(brw_surface); + if (surface == NULL) + return NULL; + + /* XXX: ignoring render-to-slice-of-3d-texture + */ + assert(id.bits.zslice == 0); + + surface->base.format = tex->base.format; + surface->base.width = tex->base.width[id.bits.level]; + surface->base.height = tex->base.height[id.bits.level]; + surface->base.offset = tex->image_offset[id.bits.level][id.bits.face]; + surface->base.usage = usage; + surface->base.zslice = id.bits.zslice; + surface->base.face = id.bits.face; + surface->base.level = id.bits.level; surface->id = id; - + + pipe_texture_reference( &surface->base.texture, &tex->base ); + + surface->ss.ss0.surface_format = tex->ss.ss0.surface_format; + surface->ss.ss0.surface_type = BRW_SURFACE_2D; + + if (tex->tiling == BRW_TILING_NONE) { + surface->ss.ss1.base_addr = surface->base.offset; + } else { + uint32_t tile_offset = surface->base.offset % 4096; + + surface->ss.ss1.base_addr = surface->base.offset - tile_offset; + + if (brw_screen->chipset.is_g4x) { + if (tex->tiling == BRW_TILING_X) { + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4; + surface->ss.ss5.y_offset = tile_offset / 512 / 2; + } else { + surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4; + surface->ss.ss5.y_offset = tile_offset / 128 / 2; + } + } + else { + assert(tile_offset == 0); + } + } + +#if 0 + if (region_bo != NULL) + surface->ss.ss1.base_addr += region_bo->offset; /* reloc */ +#endif + + surface->ss.ss2.width = surface->base.width - 1; + surface->ss.ss2.height = surface->base.height - 1; + surface->ss.ss3.tiled_surface = tex->ss.ss3.tiled_surface; + surface->ss.ss3.tile_walk = tex->ss.ss3.tile_walk; + surface->ss.ss3.pitch = tex->ss.ss3.pitch; + + return surface; } /* Get a surface which is view into a texture */ -struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen, - struct pipe_texture *texture, - unsigned face, unsigned level, - unsigned zslice, - unsigned usage ) +static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, + unsigned zslice, + unsigned usage ) { + struct brw_texture *tex = brw_texture(pt); struct brw_screen *bscreen = brw_screen(screen); - struct brw_surface_id id; + struct brw_surface *surface; + union brw_surface_id id; + int type; - id.face = face; - id.level = level; - id.zslice = zslice; + id.bits.face = face; + id.bits.level = level; + id.bits.zslice = zslice; - if (need_linear_view(brw_screen, brw_tex, id)) + if (need_linear_view(bscreen, tex, id, usage)) type = BRW_VIEW_LINEAR; else type = BRW_VIEW_IN_PLACE; - foreach (surface, texture->views[type]) { + foreach (surface, &tex->views[type]) { if (id.value == surface->id.value) - return surface; + return &surface->base; } switch (type) { case BRW_VIEW_LINEAR: - surface = create_linear_view( texture, id, type ); + surface = create_linear_view( bscreen, tex, id, usage ); break; case BRW_VIEW_IN_PLACE: - surface = create_in_place_view( texture, id, type ); + surface = create_in_place_view( bscreen, tex, id, usage ); break; default: return NULL; } - insert_at_head( texture->views[type], surface ); - return surface; + insert_at_head( &tex->views[type], surface ); + return &surface->base; +} + + +static void brw_tex_surface_destroy( struct pipe_surface *surface ) +{ + /* Unreference texture, shared buffer: + */ + + FREE(surface); } -void brw_tex_surface_destroy( struct pipe_surface *surface ) +void brw_screen_tex_surface_init( struct brw_screen *brw_screen ) { + brw_screen->base.get_tex_surface = brw_get_tex_surface; + brw_screen->base.tex_surface_destroy = brw_tex_surface_destroy; } diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 989013953b..3fd486986f 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -171,8 +171,7 @@ static GLuint translate_tex_format( enum pipe_format pf ) return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; default: - assert(0); - return 0; + return BRW_SURFACEFORMAT_INVALID; } } @@ -180,7 +179,7 @@ static GLuint translate_tex_format( enum pipe_format pf ) -static struct pipe_texture *brw_create_texture( struct pipe_screen *screen, +static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, const struct pipe_texture *templ ) { @@ -218,6 +217,7 @@ static struct pipe_texture *brw_create_texture( struct pipe_screen *screen, tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); tex->ss.ss0.surface_format = translate_tex_format(tex->base.format); + assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID); /* This is ok for all textures with channel width 8bit or less: */ @@ -281,8 +281,20 @@ static void brw_texture_destroy(struct pipe_texture *pt) } +static boolean brw_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) +{ + return translate_tex_format(format) != BRW_SURFACEFORMAT_INVALID; +} - - - +void brw_screen_tex_init( struct brw_screen *brw_screen ) +{ + brw_screen->base.is_format_supported = brw_is_format_supported; + brw_screen->base.texture_create = brw_texture_create; + brw_screen->base.texture_destroy = brw_texture_destroy; + brw_screen->base.texture_blanket = brw_texture_blanket; +} -- cgit v1.2.3 From efda453d646c767fbf0f8e85aa09178095ab04d6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 1 Nov 2009 17:55:16 +0000 Subject: i965g: the whole drivers/i965 directory is compiling That was a lot more work than I expected. Still the winsys to go, then the small matter of making it work and re-enabling the missing functionality. --- src/gallium/drivers/i965/brw_batchbuffer.c | 85 ++++++++++++------------ src/gallium/drivers/i965/brw_batchbuffer.h | 64 ++++++++++++++---- src/gallium/drivers/i965/brw_winsys.h | 102 +++++------------------------ 3 files changed, 108 insertions(+), 143 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index 1cffc0ab39..080c92046b 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -25,38 +25,42 @@ * **************************************************************************/ +#include "util/u_memory.h" + +#include "pipe/p_error.h" + #include "brw_batchbuffer.h" -#include "brw_decode.h" +//#include "brw_decode.h" #include "brw_reg.h" #include "brw_winsys.h" +#include "brw_debug.h" +#include "brw_structs.h" +#define BATCH_SIZE (32*1024) +#define USE_LOCAL_BUFFER 1 +#define ALWAYS_EMIT_MI_FLUSH 1 void brw_batchbuffer_reset(struct brw_batchbuffer *batch) { - struct intel_context *intel = batch->intel; - if (batch->buf != NULL) { - brw->sws->bo_unreference(batch->buf); + batch->sws->bo_unreference(batch->buf); batch->buf = NULL; } - if (!batch->buffer && intel->ttm == GL_TRUE) - batch->buffer = malloc (intel->maxBatchSize); + if (USE_LOCAL_BUFFER && !batch->buffer) + batch->buffer = MALLOC(BATCH_SIZE); batch->buf = batch->sws->bo_alloc(batch->sws, BRW_BUFFER_TYPE_BATCH, - intel->maxBatchSize, 4096); + BATCH_SIZE, 4096); if (batch->buffer) batch->map = batch->buffer; - else { - batch->sws->bo_map(batch->buf, GL_TRUE); - batch->map = batch->buf->virtual; - } - batch->size = intel->maxBatchSize; + else + batch->map = batch->sws->bo_map(batch->buf, GL_TRUE); + + batch->size = BATCH_SIZE; batch->ptr = batch->map; - batch->dirty_state = ~0; - batch->cliprect_mode = IGNORE_CLIPRECTS; } struct brw_batchbuffer * @@ -74,79 +78,74 @@ void brw_batchbuffer_free(struct brw_batchbuffer *batch) { if (batch->map) { - dri_bo_unmap(batch->buf); + batch->sws->bo_unmap(batch->buf); batch->map = NULL; } - brw->sws->bo_unreference(batch->buf); + + batch->sws->bo_unreference(batch->buf); batch->buf = NULL; + + FREE(batch->buffer); FREE(batch); } void -_brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file, - int line) +_brw_batchbuffer_flush(struct brw_batchbuffer *batch, + const char *file, + int line) { - struct intel_context *intel = batch->intel; GLuint used = batch->ptr - batch->map; if (used == 0) return; - if (intel->first_post_swapbuffers_batch == NULL) { - intel->first_post_swapbuffers_batch = intel->batch->buf; - batch->sws->bo_reference(intel->first_post_swapbuffers_batch); - } - - if (intel->first_post_swapbuffers_batch == NULL) { - intel->first_post_swapbuffers_batch = intel->batch->buf; - batch->sws->bo_reference(intel->first_post_swapbuffers_batch); - } - + /* Post-swap throttling done by the state tracker. + */ if (BRW_DEBUG & DEBUG_BATCH) - debug_printf("%s:%d: Batchbuffer flush with %db used\n", file, line, - used); + debug_printf("%s:%d: Batchbuffer flush with %db used\n", + file, line, used); -#if 0 - if (intel->always_flush_cache || 1) { - *(GLuint *) (batch->ptr) = ((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); + if (ALWAYS_EMIT_MI_FLUSH) { + *(GLuint *) (batch->ptr) = ((MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); batch->ptr += 4; used = batch->ptr - batch->map; } -#endif - - /* Round batchbuffer usage to 2 DWORDs. */ + /* Round batchbuffer usage to 2 DWORDs. + */ if ((used & 4) == 0) { *(GLuint *) (batch->ptr) = 0; /* noop */ batch->ptr += 4; used = batch->ptr - batch->map; } - /* Mark the end of the buffer. */ - *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */ + /* Mark the end of the buffer. + */ + *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; batch->ptr += 4; used = batch->ptr - batch->map; batch->sws->bo_unmap(batch->buf); - batch->map = NULL; batch->ptr = NULL; batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 ); - + +#if 0 if (BRW_DEBUG & DEBUG_BATCH) { void *ptr = batch->sws->bo_map(batch->buf, GL_FALSE); intel_decode(ptr, used / 4, batch->buf->offset, - batch->chipset); + batch->chipset.pci_id); batch->sws->bo_unmap(batch->buf); } +#endif if (BRW_DEBUG & DEBUG_SYNC) { /* Abuse map/unmap to achieve wait-for-fence. @@ -214,7 +213,7 @@ brw_batchbuffer_data(struct brw_batchbuffer *batch, if (ret) return ret; - __memcpy(batch->ptr, data, bytes); + memcpy(batch->ptr, data, bytes); batch->ptr += bytes; return 0; } diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index 25bb9cefca..d687b79f93 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -18,6 +18,43 @@ enum cliprect_mode { REFERENCES_CLIPRECTS }; + + + +struct brw_batchbuffer { + + struct brw_winsys_screen *sws; + struct brw_winsys_buffer *buf; + + /* Main-memory copy of the batch-buffer, built up incrementally & + * then copied as one to the true buffer. + * + * XXX: is this still necessary? + * XXX: if so, can this be hidden inside the GEM-specific winsys code? + */ + uint8_t *buffer; + + /** + * Values exported to speed up the writing the batchbuffer, + * instead of having to go trough a accesor function for + * each dword written. + */ + /*{@*/ + uint8_t *map; + uint8_t *ptr; + size_t size; + struct { + uint8_t *end_ptr; + } emit; + + + size_t relocs; + size_t max_relocs; + /*@}*/ +}; + +struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws ); + void brw_batchbuffer_free(struct brw_batchbuffer *batch); void _brw_batchbuffer_flush(struct brw_batchbuffer *batch, @@ -83,26 +120,27 @@ brw_batchbuffer_require_space(struct brw_batchbuffer *batch, /* Here are the crusty old macros, to be removed: */ #define BEGIN_BATCH(n, cliprect_mode) do { \ - brw_batchbuffer_require_space(brw->batch, (n)*4); \ -} while (0) + brw_batchbuffer_require_space(brw->batch, (n)*4); \ + } while (0) #define OUT_BATCH(d) brw_batchbuffer_emit_dword(brw->batch, d) #define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ - assert((unsigned) (delta) < buf->size); \ - brw_batchbuffer_emit_reloc(brw->batch, buf, \ - read_domains, write_domain, delta); \ -} while (0) + assert((unsigned) (delta) < buf->size); \ + brw_batchbuffer_emit_reloc(brw->batch, buf, \ + read_domains, write_domain, delta); \ + } while (0) #ifdef DEBUG #define ADVANCE_BATCH() do { \ - unsigned int _n = brw->batch->ptr - brw->batch->emit.end_ptr; \ - if (_n != 0) { \ - debug_printf("%s: %d too many bytes emitted to batch\n", __FUNCTION__, _n); \ - abort(); \ - } \ - brw->batch->emit.end_ptr = NULL; \ -} while(0) + unsigned int _n = brw->batch->ptr - brw->batch->emit.end_ptr; \ + if (_n != 0) { \ + debug_printf("%s: %d too many bytes emitted to batch\n", \ + __FUNCTION__, _n); \ + abort(); \ + } \ + brw->batch->emit.end_ptr = NULL; \ + } while(0) #else #define ADVANCE_BATCH() #endif diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index 66a94b4b00..bc3d31196c 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -72,66 +72,8 @@ enum brw_buffer_type BRW_BUFFER_TYPE_STATE_CACHE, }; - -/* AKA winsys context: - */ -struct brw_batchbuffer { - - struct brw_winsys *iws; - struct brw_winsys_buffer *buf; - - /** - * Values exported to speed up the writing the batchbuffer, - * instead of having to go trough a accesor function for - * each dword written. - */ - /*{@*/ - uint8_t *map; - uint8_t *ptr; - size_t size; - struct { - uint8_t *end_ptr; - } emit; - - - size_t relocs; - size_t max_relocs; - /*@}*/ -}; - struct brw_winsys_screen { - /** - * Batchbuffer functions. - */ - /*@{*/ - /** - * Create a new batchbuffer. - */ - struct brw_batchbuffer *(*batchbuffer_create)(struct brw_winsys_screen *iws); - - /** - * Emit a relocation to a buffer. - * Target position in batchbuffer is the same as ptr. - */ - int (*batchbuffer_reloc)(struct brw_batchbuffer *batch, - unsigned offset, - struct brw_winsys_buffer *reloc, - unsigned pre_add, - enum brw_buffer_usage usage); - - /** - * Flush a bufferbatch. - */ - void (*batchbuffer_flush)(struct brw_batchbuffer *batch, - struct pipe_fence_handle **fence); - - /** - * Destroy a batchbuffer. - */ - void (*batchbuffer_destroy)(struct brw_batchbuffer *batch); - /*@}*/ - /** * Buffer functions. @@ -150,12 +92,21 @@ struct brw_winsys_screen { */ void (*bo_reference)( struct brw_winsys_buffer *buffer ); void (*bo_unreference)( struct brw_winsys_buffer *buffer ); - void (*bo_emit_reloc)( struct brw_winsys_buffer *buffer, - unsigned domain, - unsigned a, - unsigned b, - unsigned offset, - struct brw_winsys_buffer *b2); + + /* XXX: parameter names!! + */ + int (*bo_emit_reloc)( struct brw_winsys_buffer *buffer, + unsigned domain, + unsigned a, + unsigned b, + unsigned offset, + struct brw_winsys_buffer *b2); + + int (*bo_exec)( struct brw_winsys_buffer *buffer, + unsigned bytes_used, + void *foo, + int a, + int b ); void (*bo_subdata)(struct brw_winsys_buffer *buffer, size_t offset, @@ -186,29 +137,6 @@ struct brw_winsys_screen { /*@}*/ - /** - * Fence functions. - */ - /*@{*/ - /** - * Reference fence and set ptr to fence. - */ - void (*fence_reference)(struct brw_winsys *iws, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence); - - /** - * Check if a fence has finished. - */ - int (*fence_signalled)(struct brw_winsys *iws, - struct pipe_fence_handle *fence); - - /** - * Wait on a fence to finish. - */ - int (*fence_finish)(struct brw_winsys *iws, - struct pipe_fence_handle *fence); - /*@}*/ /** -- cgit v1.2.3 From 15a8ac2c9d6ed13468ef88f3f3bd3ccf4ee2fd0e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 1 Nov 2009 19:30:53 +0000 Subject: i965g: driver and winsys compile A milestone of sorts. Still a long way from something working -- the old one compiled too, at least some of the time... --- src/gallium/drivers/i965/brw_batchbuffer.c | 11 +- src/gallium/drivers/i965/brw_cc.c | 2 +- src/gallium/drivers/i965/brw_clip_state.c | 2 +- src/gallium/drivers/i965/brw_gs_state.c | 2 +- src/gallium/drivers/i965/brw_screen_texture.c | 6 +- src/gallium/drivers/i965/brw_sf_state.c | 4 +- src/gallium/drivers/i965/brw_state_dump.c | 6 +- src/gallium/drivers/i965/brw_vs_state.c | 2 +- src/gallium/drivers/i965/brw_winsys.h | 39 +-- src/gallium/drivers/i965/brw_wm_sampler_state.c | 2 +- src/gallium/drivers/i965/brw_wm_state.c | 6 +- src/gallium/drivers/i965/brw_wm_surface_state.c | 4 +- src/gallium/winsys/drm/i965/gem/Makefile | 4 +- src/gallium/winsys/drm/i965/gem/SConscript | 2 - src/gallium/winsys/drm/i965/gem/i965_drm_api.c | 105 ++++---- .../winsys/drm/i965/gem/i965_drm_batchbuffer.c | 244 ------------------ src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c | 287 ++++++++++++++------- src/gallium/winsys/drm/i965/gem/i965_drm_fence.c | 81 ------ src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h | 50 ++-- 19 files changed, 308 insertions(+), 551 deletions(-) delete mode 100644 src/gallium/winsys/drm/i965/gem/i965_drm_batchbuffer.c delete mode 100644 src/gallium/winsys/drm/i965/gem/i965_drm_fence.c (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index 080c92046b..72650cdb5d 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -36,7 +36,6 @@ #include "brw_debug.h" #include "brw_structs.h" -#define BATCH_SIZE (32*1024) #define USE_LOCAL_BUFFER 1 #define ALWAYS_EMIT_MI_FLUSH 1 @@ -49,17 +48,17 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch) } if (USE_LOCAL_BUFFER && !batch->buffer) - batch->buffer = MALLOC(BATCH_SIZE); + batch->buffer = MALLOC(BRW_BATCH_SIZE); batch->buf = batch->sws->bo_alloc(batch->sws, BRW_BUFFER_TYPE_BATCH, - BATCH_SIZE, 4096); + BRW_BATCH_SIZE, 4096); if (batch->buffer) batch->map = batch->buffer; else batch->map = batch->sws->bo_map(batch->buf, GL_TRUE); - batch->size = BATCH_SIZE; + batch->size = BRW_BATCH_SIZE; batch->ptr = batch->map; } @@ -132,7 +131,7 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, batch->map = NULL; batch->ptr = NULL; - batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 ); + batch->sws->bo_exec(batch->buf, used ); #if 0 if (BRW_DEBUG & DEBUG_BATCH) { @@ -196,7 +195,7 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, * the buffer doesn't move and we can short-circuit the relocation processing * in the kernel */ - brw_batchbuffer_emit_dword (batch, buffer->offset + delta); + brw_batchbuffer_emit_dword (batch, buffer->offset[0] + delta); return 0; } diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index bdd6418ae1..cf3791e11e 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -137,7 +137,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) cc.cc3 = key->cc3; /* CACHE_NEW_CC_VP */ - cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */ + cc.cc4.cc_viewport_state_offset = *(brw->cc.vp_bo->offset) >> 5; /* reloc */ cc.cc5 = key->cc5; cc.cc6 = key->cc6; diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index bf4e6f5103..31e2e0bc17 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -83,7 +83,7 @@ clip_unit_create_from_key(struct brw_context *brw, clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; /* reloc */ - clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; + clip.thread0.kernel_start_pointer = *(brw->clip.prog_bo->offset) >> 6; clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; clip.thread1.single_program_flow = 1; diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c index 15a66c9741..9046969394 100644 --- a/src/gallium/drivers/i965/brw_gs_state.c +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -79,7 +79,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; if (key->prog_active) /* reloc */ - gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; + gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset[0] >> 6; gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; gs.thread1.single_program_flow = 1; diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 3fd486986f..48b3451bfc 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -222,7 +222,11 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, /* This is ok for all textures with channel width 8bit or less: */ /* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - tex->ss.ss1.base_addr = tex->bo->offset; /* reloc */ + + + /* XXX: what happens when tex->bo->offset changes??? + */ + tex->ss.ss1.base_addr = tex->bo->offset[0]; /* reloc */ tex->ss.ss2.mip_count = tex->base.last_level; tex->ss.ss2.width = tex->base.width[0] - 1; tex->ss.ss2.height = tex->base.height[0] - 1; diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index fbc9f15eb4..4ab5709d53 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -138,7 +138,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, memset(&sf, 0, sizeof(sf)); sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; - sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */ + sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset[0] >> 6; /* reloc */ sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -171,7 +171,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread4.stats_enable = 1; /* CACHE_NEW_SF_VP */ - sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */ + sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset[0] >> 5; /* reloc */ sf.sf5.viewport_transform = 1; diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c index 72604304d4..345e42a6b2 100644 --- a/src/gallium/drivers/i965/brw_state_dump.c +++ b/src/gallium/drivers/i965/brw_state_dump.c @@ -67,7 +67,7 @@ state_struct_out(struct brw_winsys_screen *sws, data = sws->bo_map(buffer, GL_FALSE); for (i = 0; i < state_size / 4; i++) { - state_out(name, data, buffer->offset, i, + state_out(name, data, buffer->offset[0], i, "dword %d\n", i); } sws->bo_unmap(buffer); @@ -115,7 +115,7 @@ static void dump_wm_surface_state(struct brw_context *brw) continue; } surf = (struct brw_surface_state *)brw->sws->bo_map(surf_bo, GL_FALSE); - surfoff = surf_bo->offset; + surfoff = surf_bo->offset[0]; sprintf(name, "WM SS%d", i); state_out(name, surf, surfoff, 0, "%s %s\n", @@ -145,7 +145,7 @@ static void dump_sf_viewport_state(struct brw_context *brw) return; vp = (struct brw_sf_viewport *)brw->sws->bo_map(brw->sf.vp_bo, GL_FALSE); - vp_off = brw->sf.vp_bo->offset; + vp_off = brw->sf.vp_bo->offset[0]; state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c index 549696f7ae..6a2395dd96 100644 --- a/src/gallium/drivers/i965/brw_vs_state.c +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -87,7 +87,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) memset(&vs, 0, sizeof(vs)); - vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ + vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset[0] >> 6; /* reloc */ vs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; /* Choosing multiple program flow means that we may get 2-vertex threads, diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index bc3d31196c..d19cd5d248 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -31,12 +31,15 @@ struct brw_winsys; struct pipe_fence_handle; -/* This currently just wraps dri_bo: +/* Not sure why the winsys needs this: + */ +#define BRW_BATCH_SIZE (32*1024) + + +/* Need a tiny bit of information inside the abstract buffer struct: */ struct brw_winsys_buffer { - struct brw_winsys_screen *sws; - void *bo; - unsigned offset; + unsigned *offset; unsigned size; }; @@ -70,6 +73,8 @@ enum brw_buffer_type BRW_BUFFER_TYPE_WM_SCRATCH, BRW_BUFFER_TYPE_BATCH, BRW_BUFFER_TYPE_STATE_CACHE, + + BRW_BUFFER_TYPE_MAX /* Count of possible values */ }; struct brw_winsys_screen { @@ -103,12 +108,9 @@ struct brw_winsys_screen { struct brw_winsys_buffer *b2); int (*bo_exec)( struct brw_winsys_buffer *buffer, - unsigned bytes_used, - void *foo, - int a, - int b ); + unsigned bytes_used ); - void (*bo_subdata)(struct brw_winsys_buffer *buffer, + int (*bo_subdata)(struct brw_winsys_buffer *buffer, size_t offset, size_t size, const void *data); @@ -142,14 +144,14 @@ struct brw_winsys_screen { /** * Destroy the winsys. */ - void (*destroy)(struct brw_winsys *iws); + void (*destroy)(struct brw_winsys_screen *iws); }; /** * Create brw pipe_screen. */ -struct pipe_screen *brw_create_screen(struct brw_winsys *iws, unsigned pci_id); +struct pipe_screen *brw_create_screen(struct brw_winsys_screen *iws, unsigned pci_id); /** * Create a brw pipe_context. @@ -162,19 +164,20 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen); * TODO UGLY */ struct pipe_texture; -boolean brw_get_texture_buffer_brw(struct pipe_texture *texture, - struct brw_winsys_buffer **buffer, - unsigned *stride); +boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture, + struct brw_winsys_buffer **buffer, + unsigned *stride); /** * Wrap a brw_winsys buffer with a texture blanket. * * TODO UGLY */ -struct pipe_texture * brw_texture_blanket_ws(struct pipe_screen *screen, - const struct pipe_texture *tmplt, - const unsigned *stride, - struct brw_winsys_buffer *buffer); +struct pipe_texture * +brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, + const struct pipe_texture *template, + const unsigned pitch, + struct brw_winsys_buffer *buffer); diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c index ddd88d6e22..d43968c85a 100644 --- a/src/gallium/drivers/i965/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c @@ -81,7 +81,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw, entry->ss0 = sampler->ss0; entry->ss1 = sampler->ss1; - entry->ss2.default_color_pointer = brw->wm.sdc_bo[i]->offset >> 5; /* reloc */ + entry->ss2.default_color_pointer = brw->wm.sdc_bo[i]->offset[0] >> 5; /* reloc */ entry->ss3 = sampler->ss3; /* Cube-maps on 965 and later must use the same wrap mode for all 3 diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index f161de9b40..5cfa8fe2d1 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -148,7 +148,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, memset(&wm, 0, sizeof(wm)); wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; - wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ + wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset[0] >> 6; /* reloc */ wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -159,7 +159,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, if (key->total_scratch != 0) { wm.thread2.scratch_space_base_pointer = - brw->wm.scratch_bo->offset >> 10; /* reloc */ + brw->wm.scratch_bo->offset[0] >> 10; /* reloc */ wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; } else { wm.thread2.scratch_space_base_pointer = 0; @@ -179,7 +179,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, if (brw->wm.sampler_bo != NULL) { /* reloc */ - wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; + wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset[0] >> 5; } else { wm.wm4.sampler_state_pointer = 0; } diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index 88485c76cb..f55a6c4af2 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -118,7 +118,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, */ brw->sws->bo_emit_reloc(brw->wm.surf_bo[unit], I915_GEM_DOMAIN_RENDER, 0, - ss.ss1.base_addr - surface->bo->offset, /* XXX */ + ss.ss1.base_addr - surface->bo->offset[0], /* XXX */ offsetof(struct brw_surface_state, ss1), surface->bo); } @@ -150,7 +150,7 @@ brw_wm_get_binding_table(struct brw_context *brw) int i; for (i = 0; i < brw->wm.nr_surfaces; i++) - data[i] = brw->wm.surf_bo[i]->offset; + data[i] = brw->wm.surf_bo[i]->offset[0]; bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, diff --git a/src/gallium/winsys/drm/i965/gem/Makefile b/src/gallium/winsys/drm/i965/gem/Makefile index 74d81b4bc8..6a7497b6be 100644 --- a/src/gallium/winsys/drm/i965/gem/Makefile +++ b/src/gallium/winsys/drm/i965/gem/Makefile @@ -1,12 +1,10 @@ TOP = ../../../../../.. include $(TOP)/configs/current -LIBNAME = inteldrm +LIBNAME = i965drm C_SOURCES = \ - i965_drm_batchbuffer.c \ i965_drm_buffer.c \ - i965_drm_fence.c \ i965_drm_api.c LIBRARY_INCLUDES = $(shell pkg-config libdrm --cflags-only-I) diff --git a/src/gallium/winsys/drm/i965/gem/SConscript b/src/gallium/winsys/drm/i965/gem/SConscript index 9f1391caff..6256ec6eaf 100644 --- a/src/gallium/winsys/drm/i965/gem/SConscript +++ b/src/gallium/winsys/drm/i965/gem/SConscript @@ -4,9 +4,7 @@ env = drienv.Clone() i965drm_sources = [ 'i965_drm_api.c', - 'i965_drm_batchbuffer.c', 'i965_drm_buffer.c', - 'i965_drm_fence.c', ] i965drm = env.ConvenienceLibrary( diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c index de68cb3551..8b9c777a6f 100644 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c @@ -1,11 +1,12 @@ +#include #include "state_tracker/drm_api.h" #include "i965_drm_winsys.h" #include "util/u_memory.h" -#include "brw/brw_context.h" /* XXX: shouldn't be doing this */ -#include "brw/brw_screen.h" /* XXX: shouldn't be doing this */ +#include "i965/brw_context.h" /* XXX: shouldn't be doing this */ +#include "i965/brw_screen.h" /* XXX: shouldn't be doing this */ #include "trace/tr_drm.h" @@ -15,7 +16,7 @@ static void -i965_drm_get_device_id(unsigned int *device_id) +i965_libdrm_get_device_id(unsigned int *device_id) { char path[512]; FILE *file; @@ -36,29 +37,28 @@ i965_drm_get_device_id(unsigned int *device_id) fclose(file); } -static struct i965_buffer * -i965_drm_buffer_from_handle(struct i965_drm_winsys *idws, +static struct i965_libdrm_buffer * +i965_libdrm_buffer_from_handle(struct i965_libdrm_winsys *idws, const char* name, unsigned handle) { - struct i965_drm_buffer *buf = CALLOC_STRUCT(i965_drm_buffer); + struct i965_libdrm_buffer *buf = CALLOC_STRUCT(i965_libdrm_buffer); uint32_t tile = 0, swizzle = 0; if (!buf) return NULL; - buf->magic = 0xDEAD1337; - buf->bo = drm_i965_bo_gem_create_from_name(idws->pools.gem, name, handle); + buf->bo = drm_intel_bo_gem_create_from_name(idws->gem, name, handle); buf->flinked = TRUE; buf->flink = handle; if (!buf->bo) goto err; - drm_i965_bo_get_tiling(buf->bo, &tile, &swizzle); - if (tile != I965_TILE_NONE) + drm_intel_bo_get_tiling(buf->bo, &tile, &swizzle); + if (tile != 0) buf->map_gtt = TRUE; - return (struct i965_buffer *)buf; + return buf; err: FREE(buf); @@ -72,38 +72,43 @@ err: static struct pipe_texture * -i965_drm_texture_from_shared_handle(struct drm_api *api, +i965_libdrm_texture_from_shared_handle(struct drm_api *api, struct pipe_screen *screen, - struct pipe_texture *templ, + struct pipe_texture *template, const char* name, unsigned pitch, unsigned handle) { - struct i965_drm_winsys *idws = i965_drm_winsys(i965_screen(screen)->iws); - struct i965_buffer *buffer; + /* XXX: this is silly -- there should be a way to get directly from + * the "drm_api" struct to ourselves, without peering into + * unrelated code: + */ + struct i965_libdrm_winsys *idws = i965_libdrm_winsys(brw_screen(screen)->sws); + struct i965_libdrm_buffer *buffer; - buffer = i965_drm_buffer_from_handle(idws, name, handle); + buffer = i965_libdrm_buffer_from_handle(idws, name, handle); if (!buffer) return NULL; - return i965_texture_blanket_i965(screen, templ, pitch, buffer); + return brw_texture_blanket_winsys_buffer(screen, template, pitch, &buffer->base); } + static boolean -i965_drm_shared_handle_from_texture(struct drm_api *api, +i965_libdrm_shared_handle_from_texture(struct drm_api *api, struct pipe_screen *screen, struct pipe_texture *texture, unsigned *pitch, unsigned *handle) { - struct i965_drm_buffer *buf = NULL; - struct i965_buffer *buffer = NULL; - if (!i965_get_texture_buffer_i965(texture, &buffer, pitch)) + struct i965_libdrm_buffer *buf = NULL; + struct brw_winsys_buffer *buffer = NULL; + if (!brw_texture_get_winsys_buffer(texture, &buffer, pitch)) return FALSE; - buf = i965_drm_buffer(buffer); + buf = i965_libdrm_buffer(buffer); if (!buf->flinked) { - if (drm_i965_bo_flink(buf->bo, &buf->flink)) + if (drm_intel_bo_flink(buf->bo, &buf->flink)) return FALSE; buf->flinked = TRUE; } @@ -114,36 +119,36 @@ i965_drm_shared_handle_from_texture(struct drm_api *api, } static boolean -i965_drm_local_handle_from_texture(struct drm_api *api, +i965_libdrm_local_handle_from_texture(struct drm_api *api, struct pipe_screen *screen, struct pipe_texture *texture, unsigned *pitch, unsigned *handle) { - struct i965_buffer *buffer = NULL; - if (!i965_get_texture_buffer_i965(texture, &buffer, pitch)) + struct brw_winsys_buffer *buffer = NULL; + if (!brw_texture_get_winsys_buffer(texture, &buffer, pitch)) return FALSE; - *handle = i965_drm_buffer(buffer)->bo->handle; + *handle = i965_libdrm_buffer(buffer)->bo->handle; return TRUE; } static void -i965_drm_winsys_destroy(struct i965_winsys *iws) +i965_libdrm_winsys_destroy(struct brw_winsys_screen *iws) { - struct i965_drm_winsys *idws = i965_drm_winsys(iws); + struct i965_libdrm_winsys *idws = i965_libdrm_winsys(iws); - drm_i965_bufmgr_destroy(idws->pools.gem); + drm_intel_bufmgr_destroy(idws->gem); FREE(idws); } static struct pipe_screen * -i965_drm_create_screen(struct drm_api *api, int drmFD, +i965_libdrm_create_screen(struct drm_api *api, int drmFD, struct drm_create_screen_arg *arg) { - struct i965_drm_winsys *idws; + struct i965_libdrm_winsys *idws; unsigned int deviceID; if (arg != NULL) { @@ -155,35 +160,31 @@ i965_drm_create_screen(struct drm_api *api, int drmFD, } } - idws = CALLOC_STRUCT(i965_drm_winsys); + idws = CALLOC_STRUCT(i965_libdrm_winsys); if (!idws) return NULL; - i965_drm_get_device_id(&deviceID); + i965_libdrm_get_device_id(&deviceID); - i965_drm_winsys_init_batchbuffer_functions(idws); - i965_drm_winsys_init_buffer_functions(idws); - i965_drm_winsys_init_fence_functions(idws); + i965_libdrm_winsys_init_buffer_functions(idws); idws->fd = drmFD; idws->id = deviceID; - idws->max_batch_size = 16 * 4096; - idws->base.destroy = i965_drm_winsys_destroy; + idws->base.destroy = i965_libdrm_winsys_destroy; - idws->pools.gem = drm_i965_bufmgr_gem_init(idws->fd, idws->max_batch_size); - drm_i965_bufmgr_gem_enable_reuse(idws->pools.gem); + idws->gem = drm_intel_bufmgr_gem_init(idws->fd, BRW_BATCH_SIZE); + drm_intel_bufmgr_gem_enable_reuse(idws->gem); - idws->softpipe = FALSE; idws->dump_cmd = debug_get_bool_option("I965_DUMP_CMD", FALSE); - return i965_create_screen(&idws->base, deviceID); + return brw_create_screen(&idws->base, deviceID); } static struct pipe_context * -i965_drm_create_context(struct drm_api *api, struct pipe_screen *screen) +i965_libdrm_create_context(struct drm_api *api, struct pipe_screen *screen) { - return i965_create_context(screen); + return brw_create_context(screen); } static void @@ -192,18 +193,18 @@ destroy(struct drm_api *api) } -struct drm_api i965_drm_api = +struct drm_api i965_libdrm_api = { - .create_context = i965_drm_create_context, - .create_screen = i965_drm_create_screen, - .texture_from_shared_handle = i965_drm_texture_from_shared_handle, - .shared_handle_from_texture = i965_drm_shared_handle_from_texture, - .local_handle_from_texture = i965_drm_local_handle_from_texture, + .create_context = i965_libdrm_create_context, + .create_screen = i965_libdrm_create_screen, + .texture_from_shared_handle = i965_libdrm_texture_from_shared_handle, + .shared_handle_from_texture = i965_libdrm_shared_handle_from_texture, + .local_handle_from_texture = i965_libdrm_local_handle_from_texture, .destroy = destroy, }; struct drm_api * drm_api_create() { - return trace_drm_create(&i965_drm_api); + return trace_drm_create(&i965_libdrm_api); } diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_batchbuffer.c b/src/gallium/winsys/drm/i965/gem/i965_drm_batchbuffer.c deleted file mode 100644 index 5b4dafc8e4..0000000000 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_batchbuffer.c +++ /dev/null @@ -1,244 +0,0 @@ - -#include "intel_drm_winsys.h" -#include "util/u_memory.h" - -#include "i915_drm.h" - -#define BATCH_RESERVED 16 - -#define INTEL_DEFAULT_RELOCS 100 -#define INTEL_MAX_RELOCS 400 - -#define INTEL_BATCH_NO_CLIPRECTS 0x1 -#define INTEL_BATCH_CLIPRECTS 0x2 - -#undef INTEL_RUN_SYNC -#undef INTEL_MAP_BATCHBUFFER -#undef INTEL_MAP_GTT -#define INTEL_ALWAYS_FLUSH - -struct intel_drm_batchbuffer -{ - struct intel_batchbuffer base; - - size_t actual_size; - - drm_intel_bo *bo; -}; - -static INLINE struct intel_drm_batchbuffer * -intel_drm_batchbuffer(struct intel_batchbuffer *batch) -{ - return (struct intel_drm_batchbuffer *)batch; -} - -static void -intel_drm_batchbuffer_reset(struct intel_drm_batchbuffer *batch) -{ - struct intel_drm_winsys *idws = intel_drm_winsys(batch->base.iws); - int ret; - - if (batch->bo) - drm_intel_bo_unreference(batch->bo); - batch->bo = drm_intel_bo_alloc(idws->pools.gem, - "gallium3d_batchbuffer", - batch->actual_size, - 4096); - -#ifdef INTEL_MAP_BATCHBUFFER -#ifdef INTEL_MAP_GTT - ret = drm_intel_gem_bo_map_gtt(batch->bo); -#else - ret = drm_intel_bo_map(batch->bo, TRUE); -#endif - assert(ret == 0); - batch->base.map = batch->bo->virtual; -#else - (void)ret; -#endif - - memset(batch->base.map, 0, batch->actual_size); - batch->base.ptr = batch->base.map; - batch->base.size = batch->actual_size - BATCH_RESERVED; - batch->base.relocs = 0; -} - -static struct intel_batchbuffer * -intel_drm_batchbuffer_create(struct intel_winsys *iws) -{ - struct intel_drm_winsys *idws = intel_drm_winsys(iws); - struct intel_drm_batchbuffer *batch = CALLOC_STRUCT(intel_drm_batchbuffer); - - batch->actual_size = idws->max_batch_size; - -#ifdef INTEL_MAP_BATCHBUFFER - batch->base.map = NULL; -#else - batch->base.map = MALLOC(batch->actual_size); -#endif - batch->base.ptr = NULL; - batch->base.size = 0; - - batch->base.relocs = 0; - batch->base.max_relocs = 300;/*INTEL_DEFAULT_RELOCS;*/ - - batch->base.iws = iws; - - intel_drm_batchbuffer_reset(batch); - - return &batch->base; -} - -static int -intel_drm_batchbuffer_reloc(struct intel_batchbuffer *ibatch, - struct intel_buffer *buffer, - enum intel_buffer_usage usage, - unsigned pre_add) -{ - struct intel_drm_batchbuffer *batch = intel_drm_batchbuffer(ibatch); - unsigned write_domain = 0; - unsigned read_domain = 0; - unsigned offset; - int ret = 0; - - assert(batch->base.relocs < batch->base.max_relocs); - - if (usage == INTEL_USAGE_SAMPLER) { - write_domain = 0; - read_domain = I915_GEM_DOMAIN_SAMPLER; - - } else if (usage == INTEL_USAGE_RENDER) { - write_domain = I915_GEM_DOMAIN_RENDER; - read_domain = I915_GEM_DOMAIN_RENDER; - - } else if (usage == INTEL_USAGE_2D_TARGET) { - write_domain = I915_GEM_DOMAIN_RENDER; - read_domain = I915_GEM_DOMAIN_RENDER; - - } else if (usage == INTEL_USAGE_2D_SOURCE) { - write_domain = 0; - read_domain = I915_GEM_DOMAIN_RENDER; - - } else if (usage == INTEL_USAGE_VERTEX) { - write_domain = 0; - read_domain = I915_GEM_DOMAIN_VERTEX; - - } else { - assert(0); - return -1; - } - - offset = (unsigned)(batch->base.ptr - batch->base.map); - - ret = drm_intel_bo_emit_reloc(batch->bo, offset, - intel_bo(buffer), pre_add, - read_domain, - write_domain); - - ((uint32_t*)batch->base.ptr)[0] = intel_bo(buffer)->offset + pre_add; - batch->base.ptr += 4; - - if (!ret) - batch->base.relocs++; - - return ret; -} - -static void -intel_drm_batchbuffer_flush(struct intel_batchbuffer *ibatch, - struct pipe_fence_handle **fence) -{ - struct intel_drm_batchbuffer *batch = intel_drm_batchbuffer(ibatch); - unsigned used = 0; - int ret = 0; - int i; - - assert(intel_batchbuffer_space(ibatch) >= 0); - - used = batch->base.ptr - batch->base.map; - assert((used & 3) == 0); - - -#ifdef INTEL_ALWAYS_FLUSH - /* MI_FLUSH | FLUSH_MAP_CACHE */ - intel_batchbuffer_dword(ibatch, (0x4<<23)|(1<<0)); - used += 4; -#endif - - if ((used & 4) == 0) { - /* MI_NOOP */ - intel_batchbuffer_dword(ibatch, 0); - } - /* MI_BATCH_BUFFER_END */ - intel_batchbuffer_dword(ibatch, (0xA<<23)); - - used = batch->base.ptr - batch->base.map; - assert((used & 4) == 0); - -#ifdef INTEL_MAP_BATCHBUFFER -#ifdef INTEL_MAP_GTT - drm_intel_gem_bo_unmap_gtt(batch->bo); -#else - drm_intel_bo_unmap(batch->bo); -#endif -#else - drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map); -#endif - - /* Do the sending to HW */ - ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0); - assert(ret == 0); - - if (intel_drm_winsys(ibatch->iws)->dump_cmd) { - unsigned *ptr; - drm_intel_bo_map(batch->bo, FALSE); - ptr = (unsigned*)batch->bo->virtual; - - debug_printf("%s:\n", __func__); - for (i = 0; i < used / 4; i++, ptr++) { - debug_printf("\t%08x: %08x\n", i*4, *ptr); - } - - drm_intel_bo_unmap(batch->bo); - } else { -#ifdef INTEL_RUN_SYNC - drm_intel_bo_map(batch->bo, FALSE); - drm_intel_bo_unmap(batch->bo); -#endif - } - - if (fence) { - ibatch->iws->fence_reference(ibatch->iws, fence, NULL); - -#ifdef INTEL_RUN_SYNC - /* we run synced to GPU so just pass null */ - (*fence) = intel_drm_fence_create(NULL); -#else - (*fence) = intel_drm_fence_create(batch->bo); -#endif - } - - intel_drm_batchbuffer_reset(batch); -} - -static void -intel_drm_batchbuffer_destroy(struct intel_batchbuffer *ibatch) -{ - struct intel_drm_batchbuffer *batch = intel_drm_batchbuffer(ibatch); - - if (batch->bo) - drm_intel_bo_unreference(batch->bo); - -#ifndef INTEL_MAP_BATCHBUFFER - FREE(batch->base.map); -#endif - FREE(batch); -} - -void intel_drm_winsys_init_batchbuffer_functions(struct intel_drm_winsys *idws) -{ - idws->base.batchbuffer_create = intel_drm_batchbuffer_create; - idws->base.batchbuffer_reloc = intel_drm_batchbuffer_reloc; - idws->base.batchbuffer_flush = intel_drm_batchbuffer_flush; - idws->base.batchbuffer_destroy = intel_drm_batchbuffer_destroy; -} diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c index 4f123bae05..5dbfd2e6b0 100644 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c @@ -3,48 +3,58 @@ #include "util/u_memory.h" #include "i915_drm.h" - -static struct intel_buffer * -intel_drm_buffer_create(struct intel_winsys *iws, - unsigned size, unsigned alignment, - enum intel_buffer_type type) +#include "intel_bufmgr.h" + +const char *names[BRW_BUFFER_TYPE_MAX] = { + "texture", + "scanout", + "vertex", + "curbe", + "query", + "shader_constants", + "wm_scratch", + "batch", + "state_cache", +}; + +static struct brw_winsys_buffer * +i965_libdrm_bo_alloc( struct brw_winsys_screen *sws, + enum brw_buffer_type type, + unsigned size, + unsigned alignment ) { - struct intel_drm_buffer *buf = CALLOC_STRUCT(intel_drm_buffer); - struct intel_drm_winsys *idws = intel_drm_winsys(iws); - drm_intel_bufmgr *pool; - char *name; + struct i965_libdrm_winsys *idws = i965_libdrm_winsys(sws); + struct i965_libdrm_buffer *buf; + buf = CALLOC_STRUCT(i965_libdrm_buffer); if (!buf) return NULL; - buf->magic = 0xDEAD1337; - buf->flinked = FALSE; - buf->flink = 0; - buf->map_gtt = FALSE; - - if (type == INTEL_NEW_TEXTURE) { - name = "gallium3d_texture"; - pool = idws->pools.gem; - } else if (type == INTEL_NEW_VERTEX) { - name = "gallium3d_vertex"; - pool = idws->pools.gem; + switch (type) { + case BRW_BUFFER_TYPE_TEXTURE: + break; + case BRW_BUFFER_TYPE_VERTEX: buf->map_gtt = TRUE; - } else if (type == INTEL_NEW_SCANOUT) { - name = "gallium3d_scanout"; - pool = idws->pools.gem; + break; + case BRW_BUFFER_TYPE_SCANOUT: buf->map_gtt = TRUE; - } else { - assert(0); - name = "gallium3d_unknown"; - pool = idws->pools.gem; + break; + default: + break; } - buf->bo = drm_intel_bo_alloc(pool, name, size, alignment); + buf->bo = drm_intel_bo_alloc(idws->gem, + names[type], + size, + alignment); if (!buf->bo) goto err; - return (struct intel_buffer *)buf; + buf->base.offset = &buf->bo->offset; + buf->base.size = size; + + return &buf->base; err: assert(0); @@ -52,103 +62,186 @@ err: return NULL; } -static int -intel_drm_buffer_set_fence_reg(struct intel_winsys *iws, - struct intel_buffer *buffer, - unsigned stride, - enum intel_buffer_tile tile) + + + +/* Reference and unreference buffers: + */ +static void +i965_libdrm_bo_reference( struct brw_winsys_buffer *buffer ) { - struct intel_drm_buffer *buf = intel_drm_buffer(buffer); - assert(I915_TILING_NONE == INTEL_TILE_NONE); - assert(I915_TILING_X == INTEL_TILE_X); - assert(I915_TILING_Y == INTEL_TILE_Y); + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); - if (tile != INTEL_TILE_NONE) { - assert(buf->map_count == 0); - buf->map_gtt = TRUE; - } + /* I think we have to refcount ourselves and then just pass through + * the final dereference to the bo on destruction. + */ + buf->cheesy_refcount++; +} - return drm_intel_bo_set_tiling(buf->bo, &tile, stride); +static void +i965_libdrm_bo_unreference( struct brw_winsys_buffer *buffer ) +{ + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + + if (--buf->cheesy_refcount == 0) { + drm_intel_bo_unreference(buf->bo); + FREE(buffer); + } } -static void * -intel_drm_buffer_map(struct intel_winsys *iws, - struct intel_buffer *buffer, - boolean write) + /* XXX: parameter names!! + */ +static int +i965_libdrm_bo_emit_reloc( struct brw_winsys_buffer *buffer, + unsigned domain, + unsigned a, + unsigned b, + unsigned offset, + struct brw_winsys_buffer *buffer2) { - struct intel_drm_buffer *buf = intel_drm_buffer(buffer); - drm_intel_bo *bo = intel_bo(buffer); - int ret = 0; + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + struct i965_libdrm_buffer *buf2 = i965_libdrm_buffer(buffer2); + int ret; - assert(bo); + ret = dri_bo_emit_reloc( buf->bo, domain, a, b, offset, buf2->bo ); + if (ret) + return -1; - if (buf->map_count) - goto out; + return 0; +} - if (buf->map_gtt) - ret = drm_intel_gem_bo_map_gtt(bo); - else - ret = drm_intel_bo_map(bo, write); +static int +i965_libdrm_bo_exec( struct brw_winsys_buffer *buffer, + unsigned bytes_used ) +{ + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + int ret; - buf->ptr = bo->virtual; + ret = dri_bo_exec(buf->bo, bytes_used, NULL, 0, 0); + if (ret) + return -1; + + return 0; +} + +static int +i965_libdrm_bo_subdata(struct brw_winsys_buffer *buffer, + size_t offset, + size_t size, + const void *data) +{ + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + int ret; - assert(ret == 0); -out: + /* XXX: use bo_map_gtt/memcpy/unmap_gtt under some circumstances??? + */ + ret = drm_intel_bo_subdata(buf->bo, offset, size, (void*)data); if (ret) - return NULL; + return -1; + + return 0; +} - buf->map_count++; - return buf->ptr; + +static boolean +i965_libdrm_bo_is_busy(struct brw_winsys_buffer *buffer) +{ + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + + return drm_intel_bo_busy(buf->bo); } -static void -intel_drm_buffer_unmap(struct intel_winsys *iws, - struct intel_buffer *buffer) +static boolean +i965_libdrm_bo_references(struct brw_winsys_buffer *a, + struct brw_winsys_buffer *b) { - struct intel_drm_buffer *buf = intel_drm_buffer(buffer); + struct i965_libdrm_buffer *bufa = i965_libdrm_buffer(a); + struct i965_libdrm_buffer *bufb = i965_libdrm_buffer(b); - if (--buf->map_count) - return; + /* XXX: can't find this func: + */ + return drm_intel_bo_references(bufa->bo, bufb->bo); +} - if (buf->map_gtt) - drm_intel_gem_bo_unmap_gtt(intel_bo(buffer)); - else - drm_intel_bo_unmap(intel_bo(buffer)); +/* XXX: couldn't this be handled by returning true/false on + * bo_emit_reloc? + */ +static boolean +i965_libdrm_check_aperture_space( struct brw_winsys_screen *iws, + struct brw_winsys_buffer **buffers, + unsigned count ) +{ + static drm_intel_bo *bos[128]; + int i; + + if (count > Elements(bos)) { + assert(0); + return FALSE; + } + + for (i = 0; i < count; i++) + bos[i] = i965_libdrm_buffer(buffers[i])->bo; + + return dri_bufmgr_check_aperture_space(bos, count); } -static int -intel_drm_buffer_write(struct intel_winsys *iws, - struct intel_buffer *buffer, - size_t offset, - size_t size, - const void *data) +/** + * Map a buffer. + */ +static void * +i965_libdrm_bo_map(struct brw_winsys_buffer *buffer, + boolean write) { - struct intel_drm_buffer *buf = intel_drm_buffer(buffer); + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + int ret; + + if (!buf->map_count) { + if (buf->map_gtt) { + ret = drm_intel_gem_bo_map_gtt(buf->bo); + if (ret) + return NULL; + } + else { + ret = drm_intel_bo_map(buf->bo, write); + if (ret) + return NULL; + } + } - return drm_intel_bo_subdata(buf->bo, offset, size, (void*)data); + buf->map_count++; + return buf->bo->virtual; } -static void -intel_drm_buffer_destroy(struct intel_winsys *iws, - struct intel_buffer *buffer) +/** + * Unmap a buffer. + */ +static void +i965_libdrm_bo_unmap(struct brw_winsys_buffer *buffer) { - drm_intel_bo_unreference(intel_bo(buffer)); + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); -#ifdef DEBUG - intel_drm_buffer(buffer)->magic = 0; - intel_drm_buffer(buffer)->bo = NULL; -#endif + if (--buf->map_count > 0) + return; - FREE(buffer); + if (buf->map_gtt) + drm_intel_gem_bo_unmap_gtt(buf->bo); + else + drm_intel_bo_unmap(buf->bo); } + void -intel_drm_winsys_init_buffer_functions(struct intel_drm_winsys *idws) +i965_libdrm_winsys_init_buffer_functions(struct i965_libdrm_winsys *idws) { - idws->base.buffer_create = intel_drm_buffer_create; - idws->base.buffer_set_fence_reg = intel_drm_buffer_set_fence_reg; - idws->base.buffer_map = intel_drm_buffer_map; - idws->base.buffer_unmap = intel_drm_buffer_unmap; - idws->base.buffer_write = intel_drm_buffer_write; - idws->base.buffer_destroy = intel_drm_buffer_destroy; + idws->base.bo_alloc = i965_libdrm_bo_alloc; + idws->base.bo_reference = i965_libdrm_bo_reference; + idws->base.bo_unreference = i965_libdrm_bo_unreference; + idws->base.bo_emit_reloc = i965_libdrm_bo_emit_reloc; + idws->base.bo_exec = i965_libdrm_bo_exec; + idws->base.bo_subdata = i965_libdrm_bo_subdata; + idws->base.bo_is_busy = i965_libdrm_bo_is_busy; + idws->base.bo_references = i965_libdrm_bo_references; + idws->base.check_aperture_space = i965_libdrm_check_aperture_space; + idws->base.bo_map = i965_libdrm_bo_map; + idws->base.bo_unmap = i965_libdrm_bo_unmap; } diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_fence.c b/src/gallium/winsys/drm/i965/gem/i965_drm_fence.c deleted file mode 100644 index e70bfe7b44..0000000000 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_fence.c +++ /dev/null @@ -1,81 +0,0 @@ - -#include "intel_drm_winsys.h" -#include "util/u_memory.h" -#include "pipe/p_refcnt.h" - -/** - * Because gem does not have fence's we have to create our own fences. - * - * They work by keeping the batchbuffer around and checking if that has - * been idled. If bo is NULL fence has expired. - */ -struct intel_drm_fence -{ - struct pipe_reference reference; - drm_intel_bo *bo; -}; - - -struct pipe_fence_handle * -intel_drm_fence_create(drm_intel_bo *bo) -{ - struct intel_drm_fence *fence = CALLOC_STRUCT(intel_drm_fence); - - pipe_reference_init(&fence->reference, 1); - /* bo is null if fence already expired */ - if (bo) { - drm_intel_bo_reference(bo); - fence->bo = bo; - } - - return (struct pipe_fence_handle *)fence; -} - -static void -intel_drm_fence_reference(struct intel_winsys *iws, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence) -{ - struct intel_drm_fence *old = (struct intel_drm_fence *)*ptr; - struct intel_drm_fence *f = (struct intel_drm_fence *)fence; - - if (pipe_reference((struct pipe_reference**)ptr, &f->reference)) { - if (old->bo) - drm_intel_bo_unreference(old->bo); - FREE(old); - } -} - -static int -intel_drm_fence_signalled(struct intel_winsys *iws, - struct pipe_fence_handle *fence) -{ - assert(0); - - return 0; -} - -static int -intel_drm_fence_finish(struct intel_winsys *iws, - struct pipe_fence_handle *fence) -{ - struct intel_drm_fence *f = (struct intel_drm_fence *)fence; - - /* fence already expired */ - if (!f->bo) - return 0; - - drm_intel_bo_wait_rendering(f->bo); - drm_intel_bo_unreference(f->bo); - f->bo = NULL; - - return 0; -} - -void -intel_drm_winsys_init_fence_functions(struct intel_drm_winsys *idws) -{ - idws->base.fence_reference = intel_drm_fence_reference; - idws->base.fence_signalled = intel_drm_fence_signalled; - idws->base.fence_finish = intel_drm_fence_finish; -} diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h index 9854756880..bfcd512cef 100644 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h @@ -2,56 +2,45 @@ #ifndef INTEL_DRM_WINSYS_H #define INTEL_DRM_WINSYS_H -#include "i965/intel_batchbuffer.h" +#include "i965/brw_winsys.h" #include "drm.h" #include "intel_bufmgr.h" + /* * Winsys */ -struct intel_drm_winsys +struct i965_libdrm_winsys { - struct intel_winsys base; + struct brw_winsys_screen base; + drm_intel_bufmgr *gem; - boolean softpipe; boolean dump_cmd; int fd; /**< Drm file discriptor */ unsigned id; - - size_t max_batch_size; - - struct { - drm_intel_bufmgr *gem; - } pools; }; -static INLINE struct intel_drm_winsys * -intel_drm_winsys(struct intel_winsys *iws) +static INLINE struct i965_libdrm_winsys * +i965_libdrm_winsys(struct brw_winsys_screen *iws) { - return (struct intel_drm_winsys *)iws; + return (struct i965_libdrm_winsys *)iws; } -struct intel_drm_winsys * intel_drm_winsys_create(int fd, unsigned pci_id); -struct pipe_fence_handle * intel_drm_fence_create(drm_intel_bo *bo); +struct i965_libdrm_winsys *i965_libdrm_winsys_create(int fd, unsigned pci_id); -void intel_drm_winsys_init_batchbuffer_functions(struct intel_drm_winsys *idws); -void intel_drm_winsys_init_buffer_functions(struct intel_drm_winsys *idws); -void intel_drm_winsys_init_fence_functions(struct intel_drm_winsys *idws); +void i965_libdrm_winsys_init_buffer_functions(struct i965_libdrm_winsys *idws); -/* - * Buffer +/* Buffer. */ - - -struct intel_drm_buffer { - unsigned magic; +struct i965_libdrm_buffer { + struct brw_winsys_buffer base; drm_intel_bo *bo; @@ -61,18 +50,15 @@ struct intel_drm_buffer { boolean flinked; unsigned flink; + + unsigned cheesy_refcount; }; -static INLINE struct intel_drm_buffer * -intel_drm_buffer(struct intel_buffer *buffer) +static INLINE struct i965_libdrm_buffer * +i965_libdrm_buffer(struct brw_winsys_buffer *buffer) { - return (struct intel_drm_buffer *)buffer; + return (struct i965_libdrm_buffer *)buffer; } -static INLINE drm_intel_bo * -intel_bo(struct intel_buffer *buffer) -{ - return intel_drm_buffer(buffer)->bo; -} #endif -- cgit v1.2.3 From 23c2ba828058255d8fdf3471bb924127e6c27f6c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 2 Nov 2009 08:48:26 +0000 Subject: i965g: rename brw_constant_buffer to brw_curbe_buffer Now that there are real constant buffers, try to reduce naming confusion. --- src/gallium/drivers/i965/brw_curbe.c | 10 +++++----- src/gallium/drivers/i965/brw_state.h | 2 +- src/gallium/drivers/i965/brw_state_upload.c | 8 +------- 3 files changed, 7 insertions(+), 13 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 3dd08f6eeb..ed5b250f82 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -160,7 +160,7 @@ static GLfloat fixed_plane[6][4] = { * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ -static int prepare_constant_buffer(struct brw_context *brw) +static int prepare_curbe_buffer(struct brw_context *brw) { const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); @@ -312,7 +312,7 @@ static int prepare_constant_buffer(struct brw_context *brw) return 0; } -static int emit_constant_buffer(struct brw_context *brw) +static int emit_curbe_buffer(struct brw_context *brw) { GLuint sz = brw->curbe.total_size; @@ -330,7 +330,7 @@ static int emit_constant_buffer(struct brw_context *brw) return 0; } -const struct brw_tracked_state brw_constant_buffer = { +const struct brw_tracked_state brw_curbe_buffer = { .dirty = { .mesa = (PIPE_NEW_FRAGMENT_CONSTANTS | PIPE_NEW_VERTEX_CONSTANTS | @@ -343,7 +343,7 @@ const struct brw_tracked_state brw_constant_buffer = { BRW_NEW_BATCH), .cache = (CACHE_NEW_WM_PROG) }, - .prepare = prepare_constant_buffer, - .emit = emit_constant_buffer, + .prepare = prepare_curbe_buffer, + .emit = emit_curbe_buffer, }; diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index b47b04fd46..3b9151ab2f 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -55,7 +55,7 @@ const struct brw_tracked_state brw_cc_vp; const struct brw_tracked_state brw_check_fallback; const struct brw_tracked_state brw_clip_prog; const struct brw_tracked_state brw_clip_unit; -const struct brw_tracked_state brw_constant_buffer; +const struct brw_tracked_state brw_curbe_buffer; const struct brw_tracked_state brw_curbe_offsets; const struct brw_tracked_state brw_invarient_state; const struct brw_tracked_state brw_gs_prog; diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index eff3a40a46..4132c6ac69 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -36,12 +36,6 @@ #include "brw_batchbuffer.h" #include "brw_debug.h" -/* This is used to initialize brw->state.atoms[]. We could use this - * list directly except for a single atom, brw_constant_buffer, which - * has a .dirty value which changes according to the parameters of the - * current fragment and vertex programs, and so cannot be a static - * value. - */ const struct brw_tracked_state *atoms[] = { &brw_check_fallback, @@ -94,7 +88,7 @@ const struct brw_tracked_state *atoms[] = &brw_index_buffer, &brw_vertices, - &brw_constant_buffer + &brw_curbe_buffer }; -- cgit v1.2.3 From 4ea94c04c9ab7b11fa06c60f2487a911f1422844 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 2 Nov 2009 08:49:02 +0000 Subject: i965g: format RELOCs similarly --- src/gallium/drivers/i965/brw_cc.c | 3 +-- src/gallium/drivers/i965/brw_clip_state.c | 3 +-- src/gallium/drivers/i965/brw_misc_state.c | 32 +++++++++++++++++++++++-------- 3 files changed, 26 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index cf3791e11e..c6267e1c60 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -151,8 +151,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) /* Emit CC viewport relocation */ brw->sws->bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, - 0, + I915_GEM_DOMAIN_INSTRUCTION, 0, 0, offsetof(struct brw_cc_unit_state, cc4), brw->cc.vp_bo); diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 31e2e0bc17..8be53e4bfb 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -150,8 +150,7 @@ clip_unit_create_from_key(struct brw_context *brw, /* Emit clip program relocation */ assert(brw->clip.prog_bo); brw->sws->bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, - 0, + I915_GEM_DOMAIN_INSTRUCTION, 0, clip.thread0.grf_reg_count << 1, offsetof(struct brw_clip_unit_state, thread0), brw->clip.prog_bo); diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index db8a2a5008..06b9a2d2df 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -110,13 +110,17 @@ static int upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); if (brw->vs.bind_bo != NULL) - OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ + OUT_RELOC(brw->vs.bind_bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0); /* vs */ else OUT_BATCH(0); OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ + OUT_RELOC(brw->wm.bind_bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0); /* wm/ps */ ADVANCE_BATCH(); return 0; } @@ -142,15 +146,27 @@ static int upload_pipelined_state_pointers(struct brw_context *brw ) { BEGIN_BATCH(7, IGNORE_CLIPRECTS); OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); - OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->vs.state_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); if (brw->gs.prog_active) - OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->gs.state_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 1); else OUT_BATCH(0); - OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); - OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); + OUT_RELOC(brw->clip.state_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 1); + OUT_RELOC(brw->sf.state_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_RELOC(brw->wm.state_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); + OUT_RELOC(brw->cc.state_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0); ADVANCE_BATCH(); brw->state.dirty.brw |= BRW_NEW_PSP; -- cgit v1.2.3 From a277bb20debc413f6ccf46f529497bf8bafa64dd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 3 Nov 2009 23:16:02 +0000 Subject: i965g: convert read/write domain pairs into single usage value Easier to understand what's going on in the driver sources, convert stereotype usage values back to GEM read/write domain flags in the winsys. --- src/gallium/drivers/i965/brw_batchbuffer.c | 9 +++-- src/gallium/drivers/i965/brw_batchbuffer.h | 7 ++-- src/gallium/drivers/i965/brw_cc.c | 2 +- src/gallium/drivers/i965/brw_clip_state.c | 2 +- src/gallium/drivers/i965/brw_curbe.c | 2 +- src/gallium/drivers/i965/brw_draw_upload.c | 8 ++--- src/gallium/drivers/i965/brw_gs_state.c | 2 +- src/gallium/drivers/i965/brw_misc_state.c | 18 +++++----- src/gallium/drivers/i965/brw_pipe_query.c | 4 +-- src/gallium/drivers/i965/brw_sf_state.c | 4 +-- src/gallium/drivers/i965/brw_vs_state.c | 2 +- src/gallium/drivers/i965/brw_vs_surface_state.c | 2 +- src/gallium/drivers/i965/brw_winsys.h | 40 ++++++++++----------- src/gallium/drivers/i965/brw_wm_constant_buffer.c | 2 +- src/gallium/drivers/i965/brw_wm_sampler_state.c | 2 +- src/gallium/drivers/i965/brw_wm_state.c | 26 +++++++------- src/gallium/drivers/i965/brw_wm_surface_state.c | 6 ++-- src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c | 42 ++++++++++++++++++++--- 18 files changed, 104 insertions(+), 76 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index 72650cdb5d..fd6b34cb8a 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -168,9 +168,9 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, */ enum pipe_error brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, - struct brw_winsys_buffer *buffer, - uint32_t read_domains, uint32_t write_domain, - uint32_t delta) + struct brw_winsys_buffer *buffer, + uint32_t usage, + uint32_t delta) { int ret; @@ -182,8 +182,7 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, } ret = batch->sws->bo_emit_reloc(batch->buf, - read_domains, - write_domain, + usage, delta, batch->ptr - batch->map, buffer); diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index d687b79f93..b7186b3757 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -77,8 +77,7 @@ int brw_batchbuffer_data(struct brw_batchbuffer *batch, int brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, struct brw_winsys_buffer *buffer, - uint32_t read_domains, - uint32_t write_domain, + enum brw_buffer_usage usage, uint32_t offset); /* Inline functions - might actually be better off with these @@ -125,10 +124,10 @@ brw_batchbuffer_require_space(struct brw_batchbuffer *batch, #define OUT_BATCH(d) brw_batchbuffer_emit_dword(brw->batch, d) -#define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ +#define OUT_RELOC(buf, usage, delta) do { \ assert((unsigned) (delta) < buf->size); \ brw_batchbuffer_emit_reloc(brw->batch, buf, \ - read_domains, write_domain, delta); \ + usage, delta); \ } while (0) #ifdef DEBUG diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index c6267e1c60..20967f0191 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -151,7 +151,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) /* Emit CC viewport relocation */ brw->sws->bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, 0, offsetof(struct brw_cc_unit_state, cc4), brw->cc.vp_bo); diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 8be53e4bfb..6f8309fea9 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -150,7 +150,7 @@ clip_unit_create_from_key(struct brw_context *brw, /* Emit clip program relocation */ assert(brw->clip.prog_bo); brw->sws->bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, clip.thread0.grf_reg_count << 1, offsetof(struct brw_clip_unit_state, thread0), brw->clip.prog_bo); diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index ed5b250f82..3910174bda 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -323,7 +323,7 @@ static int emit_curbe_buffer(struct brw_context *brw) } else { OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); OUT_RELOC(brw->curbe.curbe_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, (sz - 1) + brw->curbe.curbe_offset); } ADVANCE_BATCH(); diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index 040d8ca93a..f0b7c741c0 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -300,11 +300,11 @@ static int brw_emit_vertex_buffers( struct brw_context *brw ) BRW_VB0_ACCESS_VERTEXDATA | (brw->vb.vb[i].stride << BRW_VB0_PITCH_SHIFT)); OUT_RELOC(brw->vb.vb[i].bo, - I915_GEM_DOMAIN_VERTEX, 0, + BRW_USAGE_VERTEX, brw->vb.vb[i].offset); if (BRW_IS_IGDNG(brw)) { OUT_RELOC(brw->vb.vb[i].bo, - I915_GEM_DOMAIN_VERTEX, 0, + BRW_USAGE_VERTEX, brw->vb.vb[i].bo->size - 1); } else OUT_BATCH(brw->vb.vb[i].stride ? brw->vb.vb[i].vertex_count : 0); @@ -527,10 +527,10 @@ static int brw_emit_index_buffer(struct brw_context *brw) BEGIN_BATCH(4, IGNORE_CLIPRECTS); OUT_BATCH( ib.header.dword ); OUT_RELOC(brw->ib.bo, - I915_GEM_DOMAIN_VERTEX, 0, + BRW_USAGE_VERTEX, brw->ib.offset); OUT_RELOC(brw->ib.bo, - I915_GEM_DOMAIN_VERTEX, 0, + BRW_USAGE_VERTEX, brw->ib.offset + brw->ib.size - 1); OUT_BATCH( 0 ); ADVANCE_BATCH(); diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c index 9046969394..f27f886a65 100644 --- a/src/gallium/drivers/i965/brw_gs_state.c +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -113,7 +113,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) if (key->prog_active) { /* Emit GS program relocation */ brw->sws->bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, gs.thread0.grf_reg_count << 1, offsetof(struct brw_gs_unit_state, thread0), brw->gs.prog_bo); diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index 06b9a2d2df..e786ea1100 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -111,7 +111,7 @@ static int upload_binding_table_pointers(struct brw_context *brw) OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); if (brw->vs.bind_bo != NULL) OUT_RELOC(brw->vs.bind_bo, - I915_GEM_DOMAIN_SAMPLER, 0, + BRW_USAGE_SAMPLER, 0); /* vs */ else OUT_BATCH(0); @@ -119,7 +119,7 @@ static int upload_binding_table_pointers(struct brw_context *brw) OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ OUT_RELOC(brw->wm.bind_bo, - I915_GEM_DOMAIN_SAMPLER, 0, + BRW_USAGE_SAMPLER, 0); /* wm/ps */ ADVANCE_BATCH(); return 0; @@ -147,25 +147,25 @@ static int upload_pipelined_state_pointers(struct brw_context *brw ) BEGIN_BATCH(7, IGNORE_CLIPRECTS); OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); OUT_RELOC(brw->vs.state_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, 0); if (brw->gs.prog_active) OUT_RELOC(brw->gs.state_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, 1); else OUT_BATCH(0); OUT_RELOC(brw->clip.state_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, 1); OUT_RELOC(brw->sf.state_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, 0); OUT_RELOC(brw->wm.state_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, 0); OUT_RELOC(brw->cc.state_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, 0); ADVANCE_BATCH(); @@ -288,7 +288,7 @@ static int emit_depthbuffer(struct brw_context *brw) ((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) | (BRW_SURFACE_2D << 29)); OUT_RELOC(bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + BRW_USAGE_DEPTH_BUFFER, surface->offset); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | ((pitch - 1) << 6) | diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c index 18a9b71af0..1fe2f4da4f 100644 --- a/src/gallium/drivers/i965/brw_pipe_query.c +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -193,7 +193,7 @@ brw_emit_query_begin(struct brw_context *brw) * to pick up the results. */ OUT_RELOC(brw->query.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + BRW_USAGE_QUERY_RESULT, PIPE_CONTROL_GLOBAL_GTT_WRITE | ((brw->query.index * 2) * sizeof(uint64_t))); OUT_BATCH(0); @@ -234,7 +234,7 @@ brw_emit_query_end(struct brw_context *brw) PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_DEPTH_COUNT); OUT_RELOC(brw->query.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + BRW_USAGE_QUERY_RESULT, PIPE_CONTROL_GLOBAL_GTT_WRITE | ((brw->query.index * 2 + 1) * sizeof(uint64_t))); OUT_BATCH(0); diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index 4ab5709d53..31343ff245 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -284,14 +284,14 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, */ /* Emit SF program relocation */ brw->sws->bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, sf.thread0.grf_reg_count << 1, offsetof(struct brw_sf_unit_state, thread0), brw->sf.prog_bo); /* Emit SF viewport relocation */ brw->sws->bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), offsetof(struct brw_sf_unit_state, sf5), brw->sf.vp_bo); diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c index 6a2395dd96..26d5d005fa 100644 --- a/src/gallium/drivers/i965/brw_vs_state.c +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -149,7 +149,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) /* Emit VS program relocation */ brw->sws->bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, vs.thread0.grf_reg_count << 1, offsetof(struct brw_vs_unit_state, thread0), brw->vs.prog_bo); diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c index 9a9d47a8a3..32fb9b2a8b 100644 --- a/src/gallium/drivers/i965/brw_vs_surface_state.c +++ b/src/gallium/drivers/i965/brw_vs_surface_state.c @@ -170,7 +170,7 @@ brw_vs_get_binding_table(struct brw_context *brw) */ drm_intel_bo_emit_reloc(bind_bo, i * 4, brw->vs.surf_bo[i], 0, - I915_GEM_DOMAIN_INSTRUCTION, 0); + BRW_USAGE_STATE); } } diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index d19cd5d248..d0bd97d994 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -43,25 +43,22 @@ struct brw_winsys_buffer { unsigned size; }; +/* Describe the usage of a particular buffer in a relocation. The DRM + * winsys will translate these back to GEM read/write domain flags. + */ enum brw_buffer_usage { - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_SAMPLER, - I915_GEM_DOMAIN_VERTEX, - I915_GEM_DOMAIN_INSTRUCTION, - - - /* XXX: migrate from domains to explicit usage cases, eg below: - */ - - /* use on textures */ - BRW_USAGE_RENDER = 0x01, - BRW_USAGE_SAMPLER = 0x02, - BRW_USAGE_2D_TARGET = 0x04, - BRW_USAGE_2D_SOURCE = 0x08, - /* use on vertex */ - BRW_USAGE_VERTEX = 0x10, + BRW_USAGE_STATE, /* INSTRUCTION, 0 */ + BRW_USAGE_QUERY_RESULT, /* INSTRUCTION, INSTRUCTION */ + BRW_USAGE_RENDER_TARGET, /* RENDER, 0 */ + BRW_USAGE_DEPTH_BUFFER, /* RENDER, RENDER */ + BRW_USAGE_SAMPLER, /* SAMPLER, 0 */ + BRW_USAGE_VERTEX, /* VERTEX, 0 */ + BRW_USAGE_SCRATCH, /* 0, 0 */ }; +/* Should be possible to validate usages above against buffer creation + * types, below: + */ enum brw_buffer_type { BRW_BUFFER_TYPE_TEXTURE, @@ -70,10 +67,9 @@ enum brw_buffer_type BRW_BUFFER_TYPE_CURBE, BRW_BUFFER_TYPE_QUERY, BRW_BUFFER_TYPE_SHADER_CONSTANTS, - BRW_BUFFER_TYPE_WM_SCRATCH, + BRW_BUFFER_TYPE_SHADER_SCRATCH, BRW_BUFFER_TYPE_BATCH, BRW_BUFFER_TYPE_STATE_CACHE, - BRW_BUFFER_TYPE_MAX /* Count of possible values */ }; @@ -98,12 +94,12 @@ struct brw_winsys_screen { void (*bo_reference)( struct brw_winsys_buffer *buffer ); void (*bo_unreference)( struct brw_winsys_buffer *buffer ); - /* XXX: parameter names!! + /* delta -- added to b2->offset, and written into buffer + * offset -- location above value is written to within buffer */ int (*bo_emit_reloc)( struct brw_winsys_buffer *buffer, - unsigned domain, - unsigned a, - unsigned b, + enum brw_buffer_usage usage, + unsigned delta, unsigned offset, struct brw_winsys_buffer *b2); diff --git a/src/gallium/drivers/i965/brw_wm_constant_buffer.c b/src/gallium/drivers/i965/brw_wm_constant_buffer.c index 7d2533b104..50ecef29a4 100644 --- a/src/gallium/drivers/i965/brw_wm_constant_buffer.c +++ b/src/gallium/drivers/i965/brw_wm_constant_buffer.c @@ -37,7 +37,7 @@ brw_create_constant_surface( struct brw_context *brw, if (key->bo) { /* Emit relocation to surface contents */ brw->sws->bo_emit_reloc(bo, - I915_GEM_DOMAIN_SAMPLER, 0, + BRW_USAGE_SAMPLER, 0, offsetof(struct brw_surface_state, ss1), key->bo); diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c index d43968c85a..2909dd3876 100644 --- a/src/gallium/drivers/i965/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c @@ -182,7 +182,7 @@ static int upload_wm_samplers( struct brw_context *brw ) /* Emit SDC relocations */ for (i = 0; i < key.sampler_count; i++) { brw->sws->bo_emit_reloc(brw->wm.sampler_bo, - I915_GEM_DOMAIN_SAMPLER, 0, + BRW_USAGE_SAMPLER, 0, i * sizeof(struct brw_sampler_state) + offsetof(struct brw_sampler_state, ss2), diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index 5cfa8fe2d1..ccbb647bcd 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -230,27 +230,27 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, /* Emit WM program relocation */ brw->sws->bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - wm.thread0.grf_reg_count << 1, - offsetof(struct brw_wm_unit_state, thread0), - brw->wm.prog_bo); + BRW_USAGE_STATE, + wm.thread0.grf_reg_count << 1, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); /* Emit scratch space relocation */ if (key->total_scratch != 0) { brw->sws->bo_emit_reloc(bo, - 0, 0, - wm.thread2.per_thread_scratch_space, - offsetof(struct brw_wm_unit_state, thread2), - brw->wm.scratch_bo); + BRW_USAGE_SCRATCH, + wm.thread2.per_thread_scratch_space, + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.scratch_bo); } /* Emit sampler state relocation */ if (key->sampler_count != 0) { brw->sws->bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), - offsetof(struct brw_wm_unit_state, wm4), - brw->wm.sampler_bo); + BRW_USAGE_STATE, + wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); } return bo; @@ -277,7 +277,7 @@ static int upload_wm_unit( struct brw_context *brw ) } if (brw->wm.scratch_bo == NULL) { brw->wm.scratch_bo = brw->sws->bo_alloc(brw->sws, - BRW_BUFFER_TYPE_WM_SCRATCH, + BRW_BUFFER_TYPE_SHADER_SCRATCH, total, 4096); } diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index f55a6c4af2..e5a0ed7d61 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -60,7 +60,7 @@ brw_update_texture_surface( struct brw_context *brw, /* Emit relocation to surface contents */ brw->sws->bo_emit_reloc(brw->wm.surf_bo[surf], - I915_GEM_DOMAIN_SAMPLER, 0, + BRW_USAGE_SAMPLER, 0, offsetof(struct brw_surface_state, ss1), tex->bo); @@ -117,7 +117,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, /* XXX: we will only be rendering to this surface: */ brw->sws->bo_emit_reloc(brw->wm.surf_bo[unit], - I915_GEM_DOMAIN_RENDER, 0, + BRW_USAGE_RENDER_TARGET, ss.ss1.base_addr - surface->bo->offset[0], /* XXX */ offsetof(struct brw_surface_state, ss1), surface->bo); @@ -161,7 +161,7 @@ brw_wm_get_binding_table(struct brw_context *brw) /* Emit binding table relocations to surface state */ for (i = 0; i < brw->wm.nr_surfaces; i++) { brw->sws->bo_emit_reloc(bind_bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, + BRW_USAGE_STATE, 0, i * sizeof(GLuint), brw->wm.surf_bo[i]); diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c index 5dbfd2e6b0..61717d2942 100644 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c @@ -93,17 +93,51 @@ i965_libdrm_bo_unreference( struct brw_winsys_buffer *buffer ) */ static int i965_libdrm_bo_emit_reloc( struct brw_winsys_buffer *buffer, - unsigned domain, - unsigned a, - unsigned b, + enum brw_buffer_usage usage, + unsigned delta, unsigned offset, struct brw_winsys_buffer *buffer2) { struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); struct i965_libdrm_buffer *buf2 = i965_libdrm_buffer(buffer2); + int read, write; int ret; - ret = dri_bo_emit_reloc( buf->bo, domain, a, b, offset, buf2->bo ); + switch (usage) { + case BRW_USAGE_STATE: + read = I915_GEM_DOMAIN_INSTRUCTION; + write = 0; + break; + case BRW_USAGE_QUERY_RESULT: + read = I915_GEM_DOMAIN_INSTRUCTION; + write = I915_GEM_DOMAIN_INSTRUCTION; + break; + case BRW_USAGE_RENDER_TARGET: + read = I915_GEM_DOMAIN_RENDER; + write = 0; + break; + case BRW_USAGE_DEPTH_BUFFER: + read = I915_GEM_DOMAIN_RENDER; + write = I915_GEM_DOMAIN_RENDER; + break; + case BRW_USAGE_SAMPLER: + read = I915_GEM_DOMAIN_SAMPLER; + write = 0; + break; + case BRW_USAGE_VERTEX: + read = I915_GEM_DOMAIN_VERTEX; + write = 0; + break; + case BRW_USAGE_SCRATCH: + read = 0; + write = 0; + break; + default: + assert(0); + return -1; + } + + ret = dri_bo_emit_reloc( buf->bo, read, write, delta, offset, buf2->bo ); if (ret) return -1; -- cgit v1.2.3 From 211d7ab22b13430aaae00a0dfe95492450bcca20 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 13:03:35 +0000 Subject: i965g: add standalone xlib debug winsys Create a dummy winsys that just debug-prints on calls into the winsys functions. Will use this to get to the point where we are generating sane-looking debug dumps and diassembly. Also fix various warnings generated with the new compiler flags set in this config. --- Makefile | 1 + configs/linux-i965 | 8 + src/gallium/drivers/i965/brw_context.h | 5 +- src/gallium/drivers/i965/brw_curbe.c | 2 +- src/gallium/drivers/i965/brw_disasm.c | 1 + src/gallium/drivers/i965/brw_state.h | 2 +- src/gallium/drivers/i965/brw_state_cache.c | 6 +- src/gallium/drivers/i965/brw_winsys.h | 1 + src/gallium/winsys/drm/i965/xlib/Makefile | 97 +++++++ src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 362 +++++++++++++++++++++++++++ src/gallium/winsys/xlib/Makefile | 1 + 11 files changed, 479 insertions(+), 7 deletions(-) create mode 100644 configs/linux-i965 create mode 100644 src/gallium/winsys/drm/i965/xlib/Makefile create mode 100644 src/gallium/winsys/drm/i965/xlib/xlib_i965.c (limited to 'src/gallium/drivers/i965') diff --git a/Makefile b/Makefile index 7f073fd516..e437bd27d4 100644 --- a/Makefile +++ b/Makefile @@ -105,6 +105,7 @@ irix6-n32-static \ irix6-o32 \ irix6-o32-static \ linux \ +linux-i965 \ linux-alpha \ linux-alpha-static \ linux-cell \ diff --git a/configs/linux-i965 b/configs/linux-i965 new file mode 100644 index 0000000000..e66abc347b --- /dev/null +++ b/configs/linux-i965 @@ -0,0 +1,8 @@ +# Configuration for standalone mode i965 debug + +include $(TOP)/configs/linux-debug + +CONFIG_NAME = linux-i965 + +GALLIUM_DRIVER_DIRS = i965 +GALLIUM_WINSYS_DIRS = drm/i965/xlib diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index b94c511499..97b2a8e27d 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -167,8 +167,8 @@ struct brw_fragment_shader { unsigned iz_lookup; //unsigned wm_lookup; - boolean uses_depth:1; - boolean has_flow_control:1; + unsigned uses_depth:1; + unsigned has_flow_control:1; unsigned id; struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ @@ -573,6 +573,7 @@ struct brw_context } vb[PIPE_MAX_ATTRIBS]; struct { + int dummy; } ve[PIPE_MAX_ATTRIBS]; unsigned nr_vb; /* currently the same as curr.num_vertex_buffers */ diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 3910174bda..5763173bca 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -243,7 +243,7 @@ static int prepare_curbe_buffer(struct brw_context *brw) buf[i+0], buf[i+1], buf[i+2], buf[i+3]); debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", - brw->curbe.last_buf, buf, + (void *)brw->curbe.last_buf, (void *)buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); } diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index a84c581c03..29fe848005 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -143,6 +143,7 @@ char *chan_sel[4] = { }; char *dest_condmod[16] = { + [0] = NULL }; char *debug_ctrl[2] = { diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index 3b9151ab2f..94d2cb6f82 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -47,7 +47,7 @@ brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo) brw->sws->bo_reference(bo); brw->state.validated_bos[brw->state.validated_bo_count++] = bo; } -}; +} const struct brw_tracked_state brw_blend_constant_color; const struct brw_tracked_state brw_cc_unit; diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c index 9cf44f7a5c..1cb1b5e721 100644 --- a/src/gallium/drivers/i965/brw_state_cache.c +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -236,8 +236,8 @@ brw_upload_cache( struct brw_cache *cache, tmp = MALLOC(key_size + aux_size + relocs_size); memcpy(tmp, key, key_size); - memcpy(tmp + key_size, aux, cache->aux_size[cache_id]); - memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); + memcpy((char *)tmp + key_size, aux, cache->aux_size[cache_id]); + memcpy((char *)tmp + key_size + aux_size, reloc_bufs, relocs_size); for (i = 0; i < nr_reloc_bufs; i++) { if (reloc_bufs[i] != NULL) cache->sws->bo_reference(reloc_bufs[i]); @@ -247,7 +247,7 @@ brw_upload_cache( struct brw_cache *cache, item->key = tmp; item->hash = hash; item->key_size = key_size; - item->reloc_bufs = tmp + key_size + aux_size; + item->reloc_bufs = (struct brw_winsys_buffer **)((char *)tmp + key_size + aux_size); item->nr_reloc_bufs = nr_reloc_bufs; item->bo = bo; diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index d0bd97d994..9338923da3 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -54,6 +54,7 @@ enum brw_buffer_usage { BRW_USAGE_SAMPLER, /* SAMPLER, 0 */ BRW_USAGE_VERTEX, /* VERTEX, 0 */ BRW_USAGE_SCRATCH, /* 0, 0 */ + BRW_USAGE_MAX }; /* Should be possible to validate usages above against buffer creation diff --git a/src/gallium/winsys/drm/i965/xlib/Makefile b/src/gallium/winsys/drm/i965/xlib/Makefile new file mode 100644 index 0000000000..0efa0ca6f9 --- /dev/null +++ b/src/gallium/winsys/drm/i965/xlib/Makefile @@ -0,0 +1,97 @@ +# src/gallium/winsys/xlib/Makefile + +# This makefile produces a "stand-alone" libGL.so which is based on +# Xlib (no DRI HW acceleration) + + +TOP = ../../../../../.. +include $(TOP)/configs/current + + +GL_MAJOR = 1 +GL_MINOR = 5 +GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) + + +INCLUDE_DIRS = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/drivers/i965 \ + -I$(TOP)/src/gallium/drivers/i965/include \ + -I$(TOP)/src/gallium/state_trackers/glx/xlib \ + -I$(TOP)/src/gallium/auxiliary \ + -I/usr/include/drm + +XLIB_WINSYS_SOURCES = \ + xlib_i965.c \ + + + +XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o) + + + +LIBS = \ + $(TOP)/src/gallium/drivers/i965/libi965.a \ + $(TOP)/src/gallium/drivers/trace/libtrace.a \ + $(TOP)/src/gallium/state_trackers/glx/xlib/libxlib.a \ + $(TOP)/src/mesa/libglapi.a \ + $(TOP)/src/mesa/libmesagallium.a \ + $(GALLIUM_AUXILIARIES) + +# $(TOP)/src/gallium/drivers/i965/lib/libi9xx.a \ + +.SUFFIXES : .cpp + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(DEFINES) $(CFLAGS) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDE_DIRS) $(DEFINES) $(CXXFLAGS) $< -o $@ + + + +default: $(TOP)/$(LIB_DIR)/gallium $(TOP)/$(LIB_DIR)/gallium/$(GL_LIB_NAME) + +$(TOP)/$(LIB_DIR)/gallium: + @ mkdir -p $(TOP)/$(LIB_DIR)/gallium + +# Make the libGL.so library +$(TOP)/$(LIB_DIR)/gallium/$(GL_LIB_NAME): $(XLIB_WINSYS_OBJECTS) $(LIBS) Makefile + $(TOP)/bin/mklib -o $(GL_LIB) \ + -linker "$(CC)" \ + -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ + -install $(TOP)/$(LIB_DIR)/gallium \ + $(MKLIB_OPTIONS) $(XLIB_WINSYS_OBJECTS) \ + -Wl,--start-group $(LIBS) -Wl,--end-group $(GL_LIB_DEPS) + + +depend: $(XLIB_WINSYS_SOURCES) + @ echo "running $(MKDEP)" + @ rm -f depend # workaround oops on gutsy?!? + @ touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(XLIB_WINSYS_SOURCES) \ + > /dev/null 2>/dev/null + + +install: default + $(INSTALL) -d $(INSTALL_DIR)/include/GL + $(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR) + $(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL + @if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \ + $(MINSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \ + fi + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h + +clean: + -rm -f *.o + + +include depend diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c new file mode 100644 index 0000000000..60ab8e1993 --- /dev/null +++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c @@ -0,0 +1,362 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/* + * Authors: + * Keith Whitwell + * Brian Paul + */ + + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "pipe/p_context.h" + +#include "xm_winsys.h" + +#include "i965/brw_winsys.h" +#include "i965/brw_screen.h" +#include "i965/brw_reg.h" + +#define MAX_VRAM (128*1024*1024) + +struct xlib_brw_buffer +{ + struct brw_winsys_buffer base; + unsigned offset; + unsigned type; + char *virtual; + unsigned cheesy_refcount; + int map_count; +}; + + +/** + * Subclass of brw_winsys_screen for Xlib winsys + */ +struct xlib_brw_winsys +{ + struct brw_winsys_screen base; + unsigned offset; +}; + +static struct xlib_brw_winsys * +xlib_brw_winsys( struct brw_winsys_screen *screen ) +{ + return (struct xlib_brw_winsys *)screen; +} + + +static struct xlib_brw_buffer * +xlib_brw_buffer( struct brw_winsys_buffer *buffer ) +{ + return (struct xlib_brw_buffer *)buffer; +} + + + +const char *names[BRW_BUFFER_TYPE_MAX] = { + "texture", + "scanout", + "vertex", + "curbe", + "query", + "shader_constants", + "wm_scratch", + "batch", + "state_cache", +}; + +const char *usages[BRW_USAGE_MAX] = { + "state", + "query_result", + "render_target", + "depth_buffer", + "sampler", + "vertex", + "scratch" +}; + +static struct brw_winsys_buffer * +xlib_brw_bo_alloc( struct brw_winsys_screen *sws, + enum brw_buffer_type type, + unsigned size, + unsigned alignment ) +{ + struct xlib_brw_winsys *xbw = xlib_brw_winsys(sws); + struct xlib_brw_buffer *buf; + + debug_printf("%s type %d sz %d align %d\n", + __FUNCTION__, type, size, alignment ); + + buf = CALLOC_STRUCT(xlib_brw_buffer); + if (!buf) + return NULL; + + buf->offset = align(xbw->offset, alignment); + buf->type = type; + buf->virtual = MALLOC(size); + buf->base.offset = &buf->offset; /* hmm, cheesy */ + buf->base.size = size; + + xbw->offset = align(xbw->offset, alignment) + size; + if (xbw->offset > MAX_VRAM) + goto err; + + return &buf->base; + +err: + assert(0); + FREE(buf); + return NULL; +} + +static void +xlib_brw_bo_reference( struct brw_winsys_buffer *buffer ) +{ + struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); + + buf->cheesy_refcount++; +} + +static void +xlib_brw_bo_unreference( struct brw_winsys_buffer *buffer ) +{ + struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); + + if (--buf->cheesy_refcount == 0) { + FREE(buffer); + } +} + +static int +xlib_brw_bo_emit_reloc( struct brw_winsys_buffer *buffer, + enum brw_buffer_usage usage, + unsigned delta, + unsigned offset, + struct brw_winsys_buffer *buffer2) +{ + struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); + struct xlib_brw_buffer *buf2 = xlib_brw_buffer(buffer2); + + debug_printf("%s buf %p offset %x val %x + %x buf2 %p/%s/%s\n", + __FUNCTION__, (void *)buffer, offset, + buf2->offset, delta, + (void *)buffer2, names[buf2->type], usages[usage]); + + *(uint32_t *)(buf->virtual + offset) = buf2->offset + delta; + + return 0; +} + +static int +xlib_brw_bo_exec( struct brw_winsys_buffer *buffer, + unsigned bytes_used ) +{ + debug_printf("execute buffer %p, bytes %d\n", (void *)buffer, bytes_used); + + return 0; +} + +static int +xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer, + size_t offset, + size_t size, + const void *data) +{ + struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); + + debug_printf("%s buf %p off %d sz %d data %p\n", + __FUNCTION__, + (void *)buffer, offset, size, data); + + memcpy(buf->virtual + offset, data, size); + return 0; +} + + +static boolean +xlib_brw_bo_is_busy(struct brw_winsys_buffer *buffer) +{ + debug_printf("%s %p\n", __FUNCTION__, (void *)buffer); + return TRUE; +} + +static boolean +xlib_brw_bo_references(struct brw_winsys_buffer *a, + struct brw_winsys_buffer *b) +{ + debug_printf("%s %p %p\n", __FUNCTION__, (void *)a, (void *)b); + return TRUE; +} + +static boolean +xlib_brw_check_aperture_space( struct brw_winsys_screen *iws, + struct brw_winsys_buffer **buffers, + unsigned count ) +{ + unsigned tot_size = 0; + unsigned i; + + for (i = 0; i < count; i++) + tot_size += buffers[i]->size; + + debug_printf("%s %d bufs, tot_size: %d kb\n", + __FUNCTION__, count, + (tot_size + 1023) / 1024); + + return TRUE; +} + +static void * +xlib_brw_bo_map(struct brw_winsys_buffer *buffer, + boolean write) +{ + struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); + + debug_printf("%s %p %s\n", __FUNCTION__, (void *)buffer, + write ? "read/write" : "read"); + + buf->map_count++; + return buf->virtual; +} + +static void +xlib_brw_bo_unmap(struct brw_winsys_buffer *buffer) +{ + struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); + + debug_printf("%s %p\n", __FUNCTION__, (void *)buffer); + + --buf->map_count; + assert(buf->map_count >= 0); +} + + +static void +xlib_brw_winsys_destroy( struct brw_winsys_screen *screen ) +{ + /* XXX: free all buffers */ + FREE(screen); +} + +static struct brw_winsys_screen * +xlib_create_brw_winsys_screen( void ) +{ + struct xlib_brw_winsys *ws; + + ws = CALLOC_STRUCT(xlib_brw_winsys); + if (!ws) + return NULL; + + ws->base.destroy = xlib_brw_winsys_destroy; + ws->base.bo_alloc = xlib_brw_bo_alloc; + ws->base.bo_reference = xlib_brw_bo_reference; + ws->base.bo_unreference = xlib_brw_bo_unreference; + ws->base.bo_emit_reloc = xlib_brw_bo_emit_reloc; + ws->base.bo_exec = xlib_brw_bo_exec; + ws->base.bo_subdata = xlib_brw_bo_subdata; + ws->base.bo_is_busy = xlib_brw_bo_is_busy; + ws->base.bo_references = xlib_brw_bo_references; + ws->base.check_aperture_space = xlib_brw_check_aperture_space; + ws->base.bo_map = xlib_brw_bo_map; + ws->base.bo_unmap = xlib_brw_bo_unmap; + + return &ws->base; +} + + +/*********************************************************************** + * Implementation of Xlib co-state-tracker's winsys interface + */ + +static struct pipe_screen * +xlib_create_i965_screen( void ) +{ + struct brw_winsys_screen *winsys; + struct pipe_screen *screen; + + winsys = xlib_create_brw_winsys_screen(); + if (winsys == NULL) + return NULL; + + screen = brw_create_screen(winsys, + PCI_CHIP_GM45_GM); + if (screen == NULL) + goto fail; + + return screen; + +fail: + if (winsys) + winsys->destroy( winsys ); + + return NULL; +} + + +static struct pipe_context * +xlib_create_i965_context( struct pipe_screen *screen, + void *context_private ) +{ + struct pipe_context *pipe; + + pipe = brw_create_context(screen); + if (pipe == NULL) + goto fail; + + pipe->priv = context_private; + return pipe; + +fail: + /* Free stuff here */ + return NULL; +} + + +static void +xlib_i965_display_surface(struct xmesa_buffer *xm_buffer, + struct pipe_surface *surf) +{ + /* struct brw_texture *texture = brw_texture(surf->texture); */ + + debug_printf("%s tex %p, sz %dx%d\n", __FUNCTION__, + (void *)surf->texture, + surf->texture->width[0], + surf->texture->height[0]); +} + + +struct xm_driver xlib_i965_driver = +{ + .create_pipe_screen = xlib_create_i965_screen, + .create_pipe_context = xlib_create_i965_context, + .display_surface = xlib_i965_display_surface +}; + + diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile index 3dc38a78e4..a3c87ea272 100644 --- a/src/gallium/winsys/xlib/Makefile +++ b/src/gallium/winsys/xlib/Makefile @@ -30,6 +30,7 @@ DEFINES += \ XLIB_WINSYS_SOURCES = \ xlib.c \ + xlib_i965.c \ xlib_cell.c \ xlib_llvmpipe.c \ xlib_softpipe.c \ -- cgit v1.2.3 From b1d293321458ab00cc809aea4a19f46a256a7f98 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 13:54:44 +0000 Subject: i965g: hook up brw_screen.c --- src/gallium/drivers/i965/Makefile | 1 + src/gallium/drivers/i965/brw_reg.h | 10 ++--- src/gallium/drivers/i965/brw_screen.c | 76 +++++++++++++++-------------------- 3 files changed, 38 insertions(+), 49 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 48950544c9..94b52bf0ec 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -57,6 +57,7 @@ C_SOURCES = \ brw_wm_sampler_state.c \ brw_wm_state.c \ brw_wm_surface_state.c \ + brw_screen.c \ brw_screen_tex_layout.c \ brw_screen_texture.c \ brw_screen_surface.c \ diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h index f428ec9269..a63403b6af 100644 --- a/src/gallium/drivers/i965/brw_reg.h +++ b/src/gallium/drivers/i965/brw_reg.h @@ -98,11 +98,11 @@ #define PCI_CHIP_ILM_G 0x0046 struct brw_chipset { - int pci_id:16; - int is_965:1; - int is_igdng:1; - int is_g4x:1; - int pad:13; + unsigned pci_id:16; + unsigned is_965:1; + unsigned is_igdng:1; + unsigned is_g4x:1; + unsigned pad:13; }; diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 671467989d..a02e6acc39 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -33,9 +33,8 @@ #include "brw_reg.h" #include "brw_context.h" #include "brw_screen.h" -#include "brw_buffer.h" -#include "brw_texture.h" #include "brw_winsys.h" +#include "brw_debug.h" #ifdef DEBUG static const struct debug_named_value debug_names[] = { @@ -49,18 +48,13 @@ static const struct debug_named_value debug_names[] = { { "bat", DEBUG_BATCH}, { "pix", DEBUG_PIXEL}, { "buf", DEBUG_BUFMGR}, - { "reg", DEBUG_REGION}, - { "fbo", DEBUG_FBO}, - { "lock", DEBUG_LOCK}, { "sync", DEBUG_SYNC}, { "prim", DEBUG_PRIMS }, { "vert", DEBUG_VERTS }, - { "dri", DEBUG_DRI }, { "dma", DEBUG_DMA }, { "san", DEBUG_SANITY }, { "sleep", DEBUG_SLEEP }, { "stats", DEBUG_STATS }, - { "tile", DEBUG_TILE }, { "sing", DEBUG_SINGLE_THREAD }, { "thre", DEBUG_SINGLE_THREAD }, { "wm", DEBUG_WM }, @@ -90,7 +84,7 @@ brw_get_name(struct pipe_screen *screen) static char buffer[128]; const char *chipset; - switch (brw_screen(screen)->pci_id) { + switch (brw_screen(screen)->chipset.pci_id) { case PCI_CHIP_I965_G: chipset = "I965_G"; break; @@ -250,9 +244,6 @@ brw_fence_reference(struct pipe_screen *screen, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) { - struct brw_screen *is = brw_screen(screen); - - is->iws->fence_reference(is->iws, ptr, fence); } static int @@ -260,19 +251,15 @@ brw_fence_signalled(struct pipe_screen *screen, struct pipe_fence_handle *fence, unsigned flags) { - struct brw_screen *is = brw_screen(screen); - - return is->iws->fence_signalled(is->iws, fence); + return 0; /* XXX shouldn't this be a boolean? */ } static int brw_fence_finish(struct pipe_screen *screen, - struct pipe_fence_handle *fence, - unsigned flags) + struct pipe_fence_handle *fence, + unsigned flags) { - struct brw_screen *is = brw_screen(screen); - - return is->iws->fence_finish(is->iws, fence); + return 0; } @@ -284,21 +271,21 @@ brw_fence_finish(struct pipe_screen *screen, static void brw_destroy_screen(struct pipe_screen *screen) { - struct brw_screen *is = brw_screen(screen); + struct brw_screen *bscreen = brw_screen(screen); - if (is->iws) - is->iws->destroy(is->iws); + if (bscreen->sws) + bscreen->sws->destroy(bscreen->sws); - FREE(is); + FREE(bscreen); } /** * Create a new brw_screen object */ struct pipe_screen * -brw_create_screen(struct intel_winsys *iws, uint pci_id) +brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) { - struct brw_screen *is; + struct brw_screen *bscreen; struct brw_chipset chipset; #ifdef DEBUG @@ -341,25 +328,26 @@ brw_create_screen(struct intel_winsys *iws, uint pci_id) } - is = CALLOC_STRUCT(brw_screen); - if (!is) + bscreen = CALLOC_STRUCT(brw_screen); + if (!bscreen) return NULL; - is->chipset = chipset; - is->iws = iws; - is->base.winsys = NULL; - is->base.destroy = brw_destroy_screen; - is->base.get_name = brw_get_name; - is->base.get_vendor = brw_get_vendor; - is->base.get_param = brw_get_param; - is->base.get_paramf = brw_get_paramf; - is->base.is_format_supported = brw_is_format_supported; - is->base.fence_reference = brw_fence_reference; - is->base.fence_signalled = brw_fence_signalled; - is->base.fence_finish = brw_fence_finish; - - brw_screen_init_texture_functions(is); - brw_screen_init_buffer_functions(is); - - return &is->base; + bscreen->chipset = chipset; + bscreen->sws = sws; + bscreen->base.winsys = NULL; + bscreen->base.destroy = brw_destroy_screen; + bscreen->base.get_name = brw_get_name; + bscreen->base.get_vendor = brw_get_vendor; + bscreen->base.get_param = brw_get_param; + bscreen->base.get_paramf = brw_get_paramf; + bscreen->base.is_format_supported = brw_is_format_supported; + bscreen->base.fence_reference = brw_fence_reference; + bscreen->base.fence_signalled = brw_fence_signalled; + bscreen->base.fence_finish = brw_fence_finish; + + brw_screen_tex_init(bscreen); + brw_screen_tex_surface_init(bscreen); + brw_screen_init_buffer_functions(bscreen); + + return &bscreen->base; } -- cgit v1.2.3 From a09b3d50975e68c13c0421d770f3865ad2a1257c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 15:10:34 +0000 Subject: i965g: add missing buffer functions --- src/gallium/drivers/i965/Makefile | 1 + src/gallium/drivers/i965/brw_screen.c | 2 +- src/gallium/drivers/i965/brw_screen.h | 12 ++- src/gallium/drivers/i965/brw_screen_buffers.c | 142 ++++++++++++++++++++++++++ src/gallium/drivers/i965/brw_winsys.h | 4 + src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 4 + 6 files changed, 162 insertions(+), 3 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_screen_buffers.c (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 94b52bf0ec..38b7a30944 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -58,6 +58,7 @@ C_SOURCES = \ brw_wm_state.c \ brw_wm_surface_state.c \ brw_screen.c \ + brw_screen_buffers.c \ brw_screen_tex_layout.c \ brw_screen_texture.c \ brw_screen_surface.c \ diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index a02e6acc39..7991f4ae52 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -347,7 +347,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) brw_screen_tex_init(bscreen); brw_screen_tex_surface_init(bscreen); - brw_screen_init_buffer_functions(bscreen); + brw_screen_buffer_init(bscreen); return &bscreen->base; } diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index 11b480b1ac..dda516ee68 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -60,9 +60,16 @@ struct brw_transfer struct brw_buffer { struct pipe_buffer base; + + /* One of either bo or user_buffer will be non-null, depending on + * whether this is a hardware or user buffer. + */ struct brw_winsys_buffer *bo; + void *user_buffer; + + /* Mapped pointer?? + */ void *ptr; - boolean is_user_buffer; }; #define BRW_TILING_NONE 0 @@ -151,7 +158,7 @@ brw_texture(struct pipe_texture *texture) static INLINE boolean brw_buffer_is_user_buffer( const struct pipe_buffer *buf ) { - return ((const struct brw_buffer *)buf)->is_user_buffer; + return ((const struct brw_buffer *)buf)->user_buffer != NULL; } struct brw_winsys_buffer * @@ -173,6 +180,7 @@ void brw_update_texture( struct brw_screen *brw_screen, void brw_screen_tex_init( struct brw_screen *brw_screen ); void brw_screen_tex_surface_init( struct brw_screen *brw_screen ); +void brw_screen_buffer_init(struct brw_screen *brw_screen); #endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c new file mode 100644 index 0000000000..0bf885ce8c --- /dev/null +++ b/src/gallium/drivers/i965/brw_screen_buffers.c @@ -0,0 +1,142 @@ + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "brw_screen.h" +#include "brw_winsys.h" + + + +static void * +brw_buffer_map( struct pipe_screen *screen, + struct pipe_buffer *buffer, + unsigned usage ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf = brw_buffer( buffer ); + + if (buf->user_buffer) + return buf->user_buffer; + + return sws->bo_map( buf->bo, + (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE ); +} + +static void +brw_buffer_unmap( struct pipe_screen *screen, + struct pipe_buffer *buffer ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf = brw_buffer( buffer ); + + if (buf->bo) + sws->bo_unmap(buf->bo); +} + +static void +brw_buffer_destroy( struct pipe_buffer *buffer ) +{ + struct brw_screen *bscreen = brw_screen( buffer->screen ); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf = brw_buffer( buffer ); + + assert(!p_atomic_read(&buffer->reference.count)); + + if (buf->bo) + sws->bo_unreference(buf->bo); + + FREE(buf); +} + + +static struct pipe_buffer * +brw_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf; + unsigned usage_type; + + buf = CALLOC_STRUCT(brw_buffer); + if (!buf) + return NULL; + + pipe_reference_init(&buf->base.reference, 1); + buf->base.screen = screen; + buf->base.alignment = alignment; + buf->base.usage = usage; + buf->base.size = size; + + switch (usage & (PIPE_BUFFER_USAGE_VERTEX | + PIPE_BUFFER_USAGE_INDEX | + PIPE_BUFFER_USAGE_PIXEL | + PIPE_BUFFER_USAGE_CONSTANT)) + { + case PIPE_BUFFER_USAGE_VERTEX: + case PIPE_BUFFER_USAGE_INDEX: + case (PIPE_BUFFER_USAGE_VERTEX|PIPE_BUFFER_USAGE_INDEX): + usage_type = BRW_BUFFER_TYPE_VERTEX; + break; + + case PIPE_BUFFER_USAGE_PIXEL: + usage_type = BRW_BUFFER_TYPE_PIXEL; + break; + + case PIPE_BUFFER_USAGE_CONSTANT: + usage_type = BRW_BUFFER_TYPE_SHADER_CONSTANTS; + break; + + default: + usage_type = BRW_BUFFER_TYPE_GENERIC; + break; + } + + buf->bo = sws->bo_alloc( sws, + usage_type, + size, + alignment ); + + return &buf->base; +} + + +static struct pipe_buffer * +brw_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + struct brw_buffer *buf; + + buf = CALLOC_STRUCT(brw_buffer); + if (!buf) + return NULL; + + buf->user_buffer = ptr; + + pipe_reference_init(&buf->base.reference, 1); + buf->base.screen = screen; + buf->base.alignment = 1; + buf->base.usage = 0; + buf->base.size = bytes; + + return &buf->base; +} + + +void brw_screen_buffer_init(struct brw_screen *brw_screen) +{ + brw_screen->base.buffer_create = brw_buffer_create; + brw_screen->base.user_buffer_create = brw_user_buffer_create; + brw_screen->base.buffer_map = brw_buffer_map; + brw_screen->base.buffer_unmap = brw_buffer_unmap; + brw_screen->base.buffer_destroy = brw_buffer_destroy; +} diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index 9338923da3..b2ba3e86f9 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -51,6 +51,8 @@ enum brw_buffer_usage { BRW_USAGE_QUERY_RESULT, /* INSTRUCTION, INSTRUCTION */ BRW_USAGE_RENDER_TARGET, /* RENDER, 0 */ BRW_USAGE_DEPTH_BUFFER, /* RENDER, RENDER */ + BRW_USAGE_BLIT_SOURCE, /* RENDER, 0 */ + BRW_USAGE_BLIT_DEST, /* RENDER, RENDER */ BRW_USAGE_SAMPLER, /* SAMPLER, 0 */ BRW_USAGE_VERTEX, /* VERTEX, 0 */ BRW_USAGE_SCRATCH, /* 0, 0 */ @@ -71,6 +73,8 @@ enum brw_buffer_type BRW_BUFFER_TYPE_SHADER_SCRATCH, BRW_BUFFER_TYPE_BATCH, BRW_BUFFER_TYPE_STATE_CACHE, + BRW_BUFFER_TYPE_PIXEL, /* image uploads, pbo's, etc */ + BRW_BUFFER_TYPE_GENERIC, /* unknown */ BRW_BUFFER_TYPE_MAX /* Count of possible values */ }; diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c index 4d4bc0cb30..d5c65fa214 100644 --- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c +++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c @@ -90,6 +90,8 @@ const char *names[BRW_BUFFER_TYPE_MAX] = { "wm_scratch", "batch", "state_cache", + "pixel", + "generic", }; const char *usages[BRW_USAGE_MAX] = { @@ -97,6 +99,8 @@ const char *usages[BRW_USAGE_MAX] = { "query_result", "render_target", "depth_buffer", + "blit_source", + "blit_dest", "sampler", "vertex", "scratch" -- cgit v1.2.3 From 9706a83bc959ba8445d0258e47639b44da2238fc Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 15:25:42 +0000 Subject: i965g: hook up more pipe_context functions --- src/gallium/drivers/i965/Makefile | 1 + src/gallium/drivers/i965/brw_batchbuffer.h | 2 -- src/gallium/drivers/i965/brw_context.c | 23 ++++++++++++- src/gallium/drivers/i965/brw_context.h | 3 ++ src/gallium/drivers/i965/brw_draw.c | 4 +-- src/gallium/drivers/i965/brw_pipe_flush.c | 51 ++++++++++++++++------------- src/gallium/drivers/i965/brw_pipe_misc.c | 21 ++++++++++-- src/gallium/drivers/i965/brw_pipe_query.c | 2 +- src/gallium/drivers/i965/brw_pipe_sampler.c | 6 +++- 9 files changed, 81 insertions(+), 32 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 38b7a30944..b42d9a92c4 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -31,6 +31,7 @@ C_SOURCES = \ brw_pipe_query.c \ brw_pipe_shader.c \ brw_pipe_flush.c \ + brw_pipe_misc.c \ brw_pipe_rast.c \ brw_sf.c \ brw_sf_emit.c \ diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index b7186b3757..04ca6265ed 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -60,8 +60,6 @@ void brw_batchbuffer_free(struct brw_batchbuffer *batch); void _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file, int line); -#define brw_batchbuffer_flush(batch) \ - _brw_batchbuffer_flush(batch, __FILE__, __LINE__) void brw_batchbuffer_reset(struct brw_batchbuffer *batch); diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index e10b7d8bf5..30cc243255 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -50,6 +50,17 @@ static void brw_destroy_context( struct pipe_context *pipe ) brw_draw_cleanup( brw ); + brw_pipe_blend_cleanup( brw ); + brw_pipe_depth_stencil_cleanup( brw ); + brw_pipe_framebuffer_cleanup( brw ); + brw_pipe_flush_cleanup( brw ); + brw_pipe_misc_cleanup( brw ); + brw_pipe_query_cleanup( brw ); + brw_pipe_rast_cleanup( brw ); + brw_pipe_sampler_cleanup( brw ); + brw_pipe_shader_cleanup( brw ); + brw_pipe_vertex_cleanup( brw ); + FREE(brw->wm.compile_data); for (i = 0; i < brw->curr.fb.nr_cbufs; i++) @@ -98,7 +109,17 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen) brw->base.destroy = brw_destroy_context; - brw_init_query( brw ); + brw_pipe_blend_init( brw ); + brw_pipe_depth_stencil_init( brw ); + brw_pipe_framebuffer_init( brw ); + brw_pipe_flush_init( brw ); + brw_pipe_misc_init( brw ); + brw_pipe_query_init( brw ); + brw_pipe_rast_init( brw ); + brw_pipe_sampler_init( brw ); + brw_pipe_shader_init( brw ); + brw_pipe_vertex_init( brw ); + brw_init_state( brw ); brw_draw_init( brw ); diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 97b2a8e27d..a4c48e6fd2 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -777,6 +777,9 @@ void brw_pipe_shader_cleanup( struct brw_context *brw ); void brw_pipe_vertex_cleanup( struct brw_context *brw ); +void brw_context_flush( struct brw_context *brw ); + + /* brw_urb.c */ int brw_upload_urb_fence(struct brw_context *brw); diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index b5fe7c9601..a2bed6256b 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -166,7 +166,7 @@ try_draw_range_elements(struct brw_context *brw, return ret; if (brw->flags.always_flush_batch) - brw_batchbuffer_flush(brw->batch); + brw_context_flush( brw ); return 0; } @@ -217,7 +217,7 @@ brw_draw_range_elements(struct pipe_context *pipe, /* Otherwise, flush and retry: */ if (ret != 0) { - brw_batchbuffer_flush(brw->batch); + brw_context_flush( brw ); ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count ); assert(ret == 0); } diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index 1b43428760..9b52b56eae 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -2,50 +2,55 @@ #include "util/u_upload_mgr.h" #include "brw_context.h" +#include "brw_batchbuffer.h" -/** - * called from brw_batchbuffer_flush and children before sending a - * batchbuffer off. + +/* All batchbuffer flushes must go through this function. */ -static void brw_finish_batch(struct brw_context *brw) +void brw_context_flush( struct brw_context *brw ) { + /* + * + */ brw_emit_query_end(brw); -} + /* Move to the end of the current upload buffer so that we'll force choosing + * a new buffer next time. + */ + u_upload_flush( brw->vb.upload_vertex ); + u_upload_flush( brw->vb.upload_index ); -/** - * called from intelFlushBatchLocked - */ -static void brw_new_batch( struct brw_context *brw ) -{ - brw->curbe.need_new_bo = GL_TRUE; + _brw_batchbuffer_flush( brw->batch, __FILE__, __LINE__ ); /* Mark all context state as needing to be re-emitted. * This is probably not as severe as on 915, since almost all of our state * is just in referenced buffers. */ brw->state.dirty.brw |= BRW_NEW_CONTEXT; - brw->state.dirty.mesa |= ~0; brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; - /* Move to the end of the current upload buffer so that we'll force choosing - * a new buffer next time. - */ - u_upload_flush( brw->vb.upload_vertex ); - u_upload_flush( brw->vb.upload_index ); + brw->curbe.need_new_bo = GL_TRUE; +} +static void +brw_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ) +{ + brw_context_flush( brw_context( pipe ) ); + *fence = NULL; } -/* called from intelWaitForIdle() and intelFlush() - * - * For now, just flush everything. Could be smarter later. - */ -static GLuint brw_flush_cmd( void ) + +void brw_pipe_flush_init( struct brw_context *brw ) { - return ((MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); + brw->base.flush = brw_flush; } +void brw_pipe_flush_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_pipe_misc.c b/src/gallium/drivers/i965/brw_pipe_misc.c index fb8d7ecc59..a7ccde5917 100644 --- a/src/gallium/drivers/i965/brw_pipe_misc.c +++ b/src/gallium/drivers/i965/brw_pipe_misc.c @@ -1,7 +1,12 @@ +#include "brw_context.h" +#include "brw_structs.h" +#include "brw_defines.h" + static void brw_set_polygon_stipple( struct pipe_context *pipe, - const unsigned *stipple ) + const struct pipe_poly_stipple *stip ) { + struct brw_context *brw = brw_context(pipe); struct brw_polygon_stipple *bps = &brw->curr.bps; GLuint i; @@ -10,5 +15,17 @@ static void brw_set_polygon_stipple( struct pipe_context *pipe, bps->header.length = sizeof *bps/4-2; for (i = 0; i < 32; i++) - bps->stipple[i] = brw->curr.poly_stipple[i]; /* don't invert */ + bps->stipple[i] = stip->stipple[i]; /* don't invert */ +} + + + +void brw_pipe_misc_init( struct brw_context *brw ) +{ + brw->base.set_polygon_stipple = brw_set_polygon_stipple; +} + + +void brw_pipe_misc_cleanup( struct brw_context *brw ) +{ } diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c index 1fe2f4da4f..d3e173f5ec 100644 --- a/src/gallium/drivers/i965/brw_pipe_query.c +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -137,7 +137,7 @@ brw_query_end(struct pipe_context *pipe, struct pipe_query *q) */ if (query->bo) { brw_emit_query_end(brw); - brw_batchbuffer_flush(brw->batch); + brw_context_flush( brw ); brw->sws->bo_unreference(brw->query.bo); brw->query.bo = NULL; diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c index 08a5d22009..56cf95c4cd 100644 --- a/src/gallium/drivers/i965/brw_pipe_sampler.c +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -156,10 +156,14 @@ static void brw_set_sampler_textures(struct pipe_context *pipe, } -void brw_sampler_init( struct brw_context *brw ) +void brw_pipe_sampler_init( struct brw_context *brw ) { brw->base.set_sampler_textures = brw_set_sampler_textures; brw->base.create_sampler_state = brw_create_sampler_state; brw->base.bind_sampler_state = brw_bind_sampler_state; brw->base.destroy_sampler_state = brw_destroy_sampler_state; } + +void brw_pipe_sampler_cleanup( struct brw_context *brw ) +{ +} -- cgit v1.2.3 From 7373bc0e0294d68bc3e64f4a6de1bb4ec3132f02 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 15:59:56 +0000 Subject: i965g: hook up pipe sampler callbacks --- src/gallium/drivers/i965/Makefile | 1 + src/gallium/drivers/i965/brw_context.h | 2 +- src/gallium/drivers/i965/brw_pipe_sampler.c | 177 +++++++++++++++--------- src/gallium/drivers/i965/brw_wm.c | 2 +- src/gallium/drivers/i965/brw_wm_sampler_state.c | 4 +- src/gallium/drivers/i965/brw_wm_surface_state.c | 2 +- 6 files changed, 120 insertions(+), 68 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index b42d9a92c4..8603907dc2 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -32,6 +32,7 @@ C_SOURCES = \ brw_pipe_shader.c \ brw_pipe_flush.c \ brw_pipe_misc.c \ + brw_pipe_sampler.c \ brw_pipe_rast.c \ brw_sf.c \ brw_sf_emit.c \ diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index a4c48e6fd2..b6f77d1253 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -505,7 +505,7 @@ struct brw_context unsigned num_vertex_elements; unsigned num_samplers; - struct brw_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; unsigned num_textures; unsigned num_vertex_buffers; diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c index 56cf95c4cd..f0a765ecf5 100644 --- a/src/gallium/drivers/i965/brw_pipe_sampler.c +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -1,5 +1,7 @@ #include "util/u_memory.h" +#include "util/u_math.h" + #include "pipe/p_context.h" #include "pipe/p_state.h" @@ -39,119 +41,166 @@ static GLuint translate_wrap_mode( unsigned wrap ) } } +static GLuint translate_img_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return BRW_MAPFILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return BRW_MAPFILTER_LINEAR; + case PIPE_TEX_FILTER_ANISO: + return BRW_MAPFILTER_ANISOTROPIC; + default: + assert(0); + return BRW_MAPFILTER_NEAREST; + } +} - -static void *brw_create_sampler_state( struct pipe_context *pipe, - const struct pipe_sampler_state *templ ) +static GLuint translate_mip_filter( unsigned filter ) { - struct brw_sampler_state *sampler = CALLOC_STRUCT(brw_sampler_state); + switch (filter) { + case PIPE_TEX_MIPFILTER_NONE: + return BRW_MIPFILTER_NONE; + case PIPE_TEX_MIPFILTER_NEAREST: + return BRW_MIPFILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: + return BRW_MIPFILTER_LINEAR; + default: + assert(0); + return BRW_MIPFILTER_NONE; + } +} - switch (key->minfilter) { - case GL_NEAREST: - sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; - sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; - break; - case GL_LINEAR: - sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; - sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; - break; - case GL_NEAREST_MIPMAP_NEAREST: - sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; - sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; - break; - case GL_LINEAR_MIPMAP_NEAREST: - sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; - sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; - break; - case GL_NEAREST_MIPMAP_LINEAR: - sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; - sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; - break; - case GL_LINEAR_MIPMAP_LINEAR: - sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; - sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; - break; +/* XXX: not sure why there are special translations for the shadow tex + * compare functions. In particular ALWAYS is translated to NEVER. + * Is this a hardware issue? Does i965 really suffer from this? + */ +static GLuint translate_shadow_compare_func( unsigned func ) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return BRW_COMPAREFUNCTION_ALWAYS; + case PIPE_FUNC_LESS: + return BRW_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_LEQUAL: + return BRW_COMPAREFUNCTION_LESS; + case PIPE_FUNC_GREATER: + return BRW_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_GEQUAL: + return BRW_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_NOTEQUAL: + return BRW_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_EQUAL: + return BRW_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_ALWAYS: + return BRW_COMPAREFUNCTION_NEVER; default: - break; + assert(0); + return BRW_COMPAREFUNCTION_NEVER; } +} + + + + +static void * +brw_create_sampler_state( struct pipe_context *pipe, + const struct pipe_sampler_state *template ) +{ + struct brw_sampler_state *sampler = CALLOC_STRUCT(brw_sampler_state); + + sampler->ss0.min_filter = translate_img_filter( template->min_img_filter ); + sampler->ss0.mag_filter = translate_img_filter( template->mag_img_filter ); + sampler->ss0.mip_filter = translate_mip_filter( template->min_mip_filter ); - /* Set Anisotropy: + + /* XXX: anisotropy logic slightly changed: */ - if (key->max_aniso > 1.0) { + if (template->max_anisotropy > 1.0) { sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC; - if (key->max_aniso > 2.0) { - sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2, + if (template->max_anisotropy > 2.0) { + sampler->ss3.max_aniso = MIN2((template->max_anisotropy - 2) / 2, BRW_ANISORATIO_16); } } - else { - switch (key->magfilter) { - case GL_NEAREST: - sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; - break; - case GL_LINEAR: - sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; - break; - default: - break; - } - } - sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r); - sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s); - sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t); + sampler->ss1.r_wrap_mode = translate_wrap_mode(template->wrap_r); + sampler->ss1.s_wrap_mode = translate_wrap_mode(template->wrap_s); + sampler->ss1.t_wrap_mode = translate_wrap_mode(template->wrap_t); /* Set LOD bias: */ - sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6); + sampler->ss0.lod_bias = + util_signed_fixed(CLAMP(template->lod_bias, -16, 15), 6); + sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ /* Set shadow function: */ - if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) { + if (template->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + /* Shadowing is "enabled" by emitting a particular sampler * message (sample_c). So need to recompile WM program when * shadow comparison is enabled on each/any texture unit. */ sampler->ss0.shadow_function = - intel_translate_shadow_compare_func(key->comparefunc); + translate_shadow_compare_func(template->compare_func); } /* Set BaseMipLevel, MaxLOD, MinLOD: */ - sampler->ss0.base_level = U_FIXED(0, 1); + sampler->ss0.base_level = + util_unsigned_fixed(0, 1); + + sampler->ss1.max_lod = + util_unsigned_fixed(CLAMP(template->max_lod, 0, 13), 6); - sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6); - sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6); + sampler->ss1.min_lod = + util_unsigned_fixed(CLAMP(template->min_lod, 0, 13), 6); return (void *)sampler; } static void brw_bind_sampler_state(struct pipe_context *pipe, - void *cso) + unsigned num, void **sampler) { struct brw_context *brw = brw_context(pipe); - brw->curr.sampler = (const struct brw_sampler_state *)cso; - brw->state.dirty.mesa |= PIPE_NEW_SAMPLER; + int i; + + for (i = 0; i < num; i++) + brw->curr.sampler[i] = sampler[i]; + + for (i = num; i < brw->curr.num_samplers; i++) + brw->curr.sampler[i] = NULL; + + brw->curr.num_samplers = num; + brw->state.dirty.mesa |= PIPE_NEW_SAMPLERS; } static void brw_delete_sampler_state(struct pipe_context *pipe, void *cso) { - struct brw_context *brw = brw_context(pipe); FREE(cso); } static void brw_set_sampler_textures(struct pipe_context *pipe, - unsigned num_textures, - struct pipe_texture **tex) + unsigned num, + struct pipe_texture **texture) { struct brw_context *brw = brw_context(pipe); + int i; + for (i = 0; i < num; i++) + pipe_texture_reference(&brw->curr.texture[i], texture[i]); + + for (i = num; i < brw->curr.num_textures; i++) + pipe_texture_reference(&brw->curr.texture[i], NULL); + + brw->curr.num_textures = num; brw->state.dirty.mesa |= PIPE_NEW_BOUND_TEXTURES; } @@ -160,8 +209,10 @@ void brw_pipe_sampler_init( struct brw_context *brw ) { brw->base.set_sampler_textures = brw_set_sampler_textures; brw->base.create_sampler_state = brw_create_sampler_state; - brw->base.bind_sampler_state = brw_bind_sampler_state; - brw->base.destroy_sampler_state = brw_destroy_sampler_state; + brw->base.bind_sampler_states = brw_bind_sampler_state; + brw->base.delete_sampler_state = brw_delete_sampler_state; + + brw->base.set_sampler_textures = brw_set_sampler_textures; } void brw_pipe_sampler_cleanup( struct brw_context *brw ) diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 4fbf9de9bb..90780272da 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -248,7 +248,7 @@ static void brw_wm_populate_key( struct brw_context *brw, /* PIPE_NEW_BOUND_TEXTURES */ for (i = 0; i < brw->curr.num_textures; i++) { - const struct brw_texture *tex = brw->curr.texture[i]; + const struct brw_texture *tex = brw_texture(brw->curr.texture[i]); if (tex->base.format == PIPE_FORMAT_YCBCR) key->yuvtex_mask |= 1 << i; diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c index 2909dd3876..2fddb4ad89 100644 --- a/src/gallium/drivers/i965/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c @@ -75,7 +75,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw, brw->curr.num_samplers); for (i = 0; i < key->sampler_count; i++) { - const struct brw_texture *tex = brw->curr.texture[i]; + const struct brw_texture *tex = brw_texture(brw->curr.texture[i]); const struct brw_sampler *sampler = brw->curr.sampler[i]; struct brw_sampler_state *entry = &key->sampler[i]; @@ -119,7 +119,7 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw) int i; for (i = 0; i < nr; i++) { - const struct brw_texture *tex = brw->curr.texture[i]; + const struct brw_texture *tex = brw_texture(brw->curr.texture[i]); const struct brw_sampler *sampler = brw->curr.sampler[i]; brw->sws->bo_unreference(brw->wm.sdc_bo[i]); diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index e5a0ed7d61..6c29db045f 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -201,7 +201,7 @@ static int prepare_wm_surfaces(struct brw_context *brw ) */ for (i = 0; i < brw->curr.num_textures; i++) { brw_update_texture_surface(brw, - brw->curr.texture[i], + brw_texture(brw->curr.texture[i]), nr_surfaces++); } -- cgit v1.2.3 From 5f8dde99ed62beaf1c2590515c33ed8b5076ed8d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 16:03:52 +0000 Subject: i965g: stubs for brw_pipe_vertex.c --- src/gallium/drivers/i965/Makefile | 1 + src/gallium/drivers/i965/brw_pipe_vertex.c | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 8603907dc2..d7262cf07c 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -33,6 +33,7 @@ C_SOURCES = \ brw_pipe_flush.c \ brw_pipe_misc.c \ brw_pipe_sampler.c \ + brw_pipe_vertex.c \ brw_pipe_rast.c \ brw_sf.c \ brw_sf_emit.c \ diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index d1d0d7cd43..0b69718fd8 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -1,11 +1,25 @@ +#include "brw_context.h" +void +brw_pipe_vertex_init( struct brw_context *brw ) +{ +} + void brw_pipe_vertex_cleanup( struct brw_context *brw ) { - for (i = 0; i < VERT_ATTRIB_MAX; i++) { + + /* Release bound pipe vertex_buffers + */ + + /* Release some other stuff + */ +#if 0 + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { brw->sws->bo_unreference(brw->vb.inputs[i].bo); brw->vb.inputs[i].bo = NULL; } +#endif } -- cgit v1.2.3 From 99394a737a46999a2fc08915e9f1408246109c4a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 16:42:44 +0000 Subject: i965g: add some missing texture creation code --- src/gallium/drivers/i965/brw_context.c | 1 + src/gallium/drivers/i965/brw_screen_texture.c | 32 ++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index 30cc243255..0692412b32 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -107,6 +107,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen) //ctx->Shader.EmitCondCodes = GL_TRUE; //ctx->Shader.EmitNVTempInitialization = GL_TRUE; + brw->base.screen = screen; brw->base.destroy = brw_destroy_context; brw_pipe_blend_init( brw ); diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 48b3451bfc..fe3e57da90 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -30,6 +30,7 @@ */ #include "util/u_memory.h" +#include "util/u_simple_list.h" #include "brw_screen.h" #include "brw_defines.h" @@ -190,8 +191,18 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, if (tex == NULL) return NULL; + memcpy(&tex->base, templ, sizeof *templ); + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + /* XXX: compressed textures need special treatment here + */ + tex->cpp = pf_get_size(tex->base.format); tex->compressed = pf_is_compressed(tex->base.format); + make_empty_list(&tex->views[0]); + make_empty_list(&tex->views[1]); + /* XXX: No tiling with compressed textures?? */ if (tex->compressed == 0 @@ -209,11 +220,30 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, } - memcpy(&tex->base, templ, sizeof *templ); + if (!brw_texture_layout( bscreen, tex )) goto fail; + + if (templ->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + } + else if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_PRIMARY)) { + } + else if (templ->tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { + } + else if (templ->tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) { + } + + if (templ->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) { + } + + tex->bo = bscreen->sws->bo_alloc( bscreen->sws, + BRW_USAGE_SAMPLER, + tex->pitch * tex->total_height * tex->cpp, + 64 ); + tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); tex->ss.ss0.surface_format = translate_tex_format(tex->base.format); -- cgit v1.2.3 From 58e3360c11d6041de2927b604416146acb0c3817 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 18:26:34 +0000 Subject: i965g: add more missing pipe callbacks --- src/gallium/drivers/i965/brw_pipe_fb.c | 9 +++++++++ src/gallium/drivers/i965/brw_pipe_misc.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index c65f9bc374..d9b70f4eef 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -41,6 +41,7 @@ static void brw_set_framebuffer_state( struct pipe_context *pipe, brw->curr.fb.nr_cbufs = fb->nr_cbufs; } + static void brw_set_viewport_state( struct pipe_context *pipe, const struct pipe_viewport_state *viewport ) { @@ -58,4 +59,12 @@ void brw_pipe_framebuffer_init( struct brw_context *brw ) void brw_pipe_framebuffer_cleanup( struct brw_context *brw ) { + struct pipe_framebuffer_state *fb = &brw->curr.fb; + int i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&fb->cbufs[i], NULL); + } + + pipe_surface_reference(&fb->zsbuf, NULL); } diff --git a/src/gallium/drivers/i965/brw_pipe_misc.c b/src/gallium/drivers/i965/brw_pipe_misc.c index a7ccde5917..0d0d92df82 100644 --- a/src/gallium/drivers/i965/brw_pipe_misc.c +++ b/src/gallium/drivers/i965/brw_pipe_misc.c @@ -16,13 +16,45 @@ static void brw_set_polygon_stipple( struct pipe_context *pipe, for (i = 0; i < 32; i++) bps->stipple[i] = stip->stipple[i]; /* don't invert */ + + brw->state.dirty.mesa |= PIPE_NEW_POLYGON_STIPPLE; +} + + +static void brw_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->curr.scissor = *scissor; + brw->state.dirty.mesa |= PIPE_NEW_SCISSOR; +} + +static void brw_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->curr.viewport = *viewport; + brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT; } +static void brw_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->curr.ucp = *clip; + brw->state.dirty.mesa |= PIPE_NEW_CLIP; +} void brw_pipe_misc_init( struct brw_context *brw ) { brw->base.set_polygon_stipple = brw_set_polygon_stipple; + brw->base.set_scissor_state = brw_set_scissor_state; + brw->base.set_clip_state = brw_set_clip_state; + brw->base.set_viewport_state = brw_set_viewport_state; } -- cgit v1.2.3 From 0cf432c7a180a6b847fa49c97ea1c48d90a7d5f8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 19:03:06 +0000 Subject: i965g: initialize surface refcount --- src/gallium/drivers/i965/brw_screen_surface.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c index b4ad91278b..04a6fc7b66 100644 --- a/src/gallium/drivers/i965/brw_screen_surface.c +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -130,6 +130,8 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen, if (surface == NULL) return NULL; + pipe_reference_init(&surface->base.reference, 1); + /* XXX: ignoring render-to-slice-of-3d-texture */ assert(id.bits.zslice == 0); -- cgit v1.2.3 From 4e335a213acd535af81dd0c4b448003eb81db0cf Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 19:32:44 +0000 Subject: i965g: add missing is_*_referenced callbacks --- src/gallium/drivers/i965/brw_pipe_flush.c | 24 +++++++++++++++++ src/gallium/drivers/i965/brw_screen.h | 12 +++++++++ src/gallium/drivers/i965/brw_screen_buffers.c | 12 +++++++++ src/gallium/drivers/i965/brw_screen_texture.c | 39 +++++++++++++++++++++++++++ 4 files changed, 87 insertions(+) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index 9b52b56eae..6ae3c57765 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -2,6 +2,7 @@ #include "util/u_upload_mgr.h" #include "brw_context.h" +#include "brw_screen.h" #include "brw_batchbuffer.h" @@ -44,10 +45,33 @@ brw_flush( struct pipe_context *pipe, *fence = NULL; } +static unsigned brw_is_buffer_referenced(struct pipe_context *pipe, + struct pipe_buffer *buffer) +{ + struct brw_context *brw = brw_context(pipe); + + return brw_is_buffer_referenced_by_bo( brw->brw_screen, + buffer, + brw->batch->buf ); +} + +static unsigned brw_is_texture_referenced(struct pipe_context *pipe, + struct pipe_texture *texture, + unsigned face, + unsigned level) +{ + struct brw_context *brw = brw_context(pipe); + + return brw_is_texture_referenced_by_bo( brw->brw_screen, + texture, face, level, + brw->batch->buf ); +} void brw_pipe_flush_init( struct brw_context *brw ) { brw->base.flush = brw_flush; + brw->base.is_buffer_referenced = brw_is_buffer_referenced; + brw->base.is_texture_referenced = brw_is_texture_referenced; } diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index dda516ee68..820c6a6679 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -183,4 +183,16 @@ void brw_screen_tex_surface_init( struct brw_screen *brw_screen ); void brw_screen_buffer_init(struct brw_screen *brw_screen); +boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen, + struct pipe_texture *texture, + unsigned face, + unsigned level, + struct brw_winsys_buffer *bo ); + +boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen, + struct pipe_buffer *buffer, + struct brw_winsys_buffer *bo ); + + + #endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c index 0bf885ce8c..c0f19d64aa 100644 --- a/src/gallium/drivers/i965/brw_screen_buffers.c +++ b/src/gallium/drivers/i965/brw_screen_buffers.c @@ -131,6 +131,18 @@ brw_user_buffer_create(struct pipe_screen *screen, return &buf->base; } + +boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen, + struct pipe_buffer *buffer, + struct brw_winsys_buffer *bo ) +{ + struct brw_buffer *buf = brw_buffer(buffer); + if (buf->bo == NULL) + return FALSE; + + return brw_screen->sws->bo_references( bo, buf->bo ); +} + void brw_screen_buffer_init(struct brw_screen *brw_screen) { diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index fe3e57da90..c318b07f97 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -325,6 +325,45 @@ static boolean brw_is_format_supported( struct pipe_screen *screen, } +boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen, + struct pipe_texture *texture, + unsigned face, + unsigned level, + struct brw_winsys_buffer *bo ) +{ + struct brw_texture *tex = brw_texture(texture); + struct brw_surface *surf; + int i; + + /* XXX: this is subject to false positives if the underlying + * texture BO is referenced, we can't tell whether the sub-region + * we care about participates in that. + */ + if (brw_screen->sws->bo_references( bo, tex->bo )) + return TRUE; + + /* Find any view on this texture for this face/level and see if it + * is referenced: + */ + for (i = 0; i < 2; i++) { + foreach (surf, &tex->views[i]) { + if (surf->bo == tex->bo) + continue; + + if (surf->id.bits.face != face || + surf->id.bits.level != level) + continue; + + if (brw_screen->sws->bo_references( bo, surf->bo)) + return TRUE; + } + } + + return FALSE; +} + + + void brw_screen_tex_init( struct brw_screen *brw_screen ) { brw_screen->base.is_format_supported = brw_is_format_supported; -- cgit v1.2.3 From 19119517ce00f7710c6cd627c75e7eef765021c2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 19:41:02 +0000 Subject: i965g: add constant buffer setter --- src/gallium/drivers/i965/brw_pipe_shader.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 2422f77f34..8e10edb459 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -159,11 +159,33 @@ static void brw_delete_vs_state( struct pipe_context *pipe, void *prog ) } +static void brw_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct brw_context *brw = brw_context(pipe); + assert(index == 0); + + if (shader == PIPE_SHADER_FRAGMENT) { + pipe_buffer_reference( &brw->curr.fragment_constants, + buf->buffer ); + + brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_CONSTANTS; + } + else { + pipe_buffer_reference( &brw->curr.vertex_constants, + buf->buffer ); + + brw->state.dirty.mesa |= PIPE_NEW_VERTEX_CONSTANTS; + } +} void brw_pipe_shader_init( struct brw_context *brw ) { + brw->base.set_constant_buffer = brw_set_constant_buffer; + brw->base.create_vs_state = brw_create_vs_state; brw->base.bind_vs_state = brw_bind_vs_state; brw->base.delete_vs_state = brw_delete_vs_state; @@ -175,4 +197,6 @@ void brw_pipe_shader_init( struct brw_context *brw ) void brw_pipe_shader_cleanup( struct brw_context *brw ) { + pipe_buffer_reference( &brw->curr.fragment_constants, NULL ); + pipe_buffer_reference( &brw->curr.vertex_constants, NULL ); } -- cgit v1.2.3 From e18f223da710a6e1f6a08d346951ea66c6a1de99 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 20:26:41 +0000 Subject: i965g: hook up pipe_clear functions --- src/gallium/drivers/i965/Makefile | 1 + src/gallium/drivers/i965/brw_batchbuffer.h | 5 +- src/gallium/drivers/i965/brw_context.c | 2 + src/gallium/drivers/i965/brw_context.h | 2 + src/gallium/drivers/i965/brw_pipe_clear.c | 222 +++++++++++++++++++++++++++++ src/gallium/drivers/i965/brw_screen.h | 6 + 6 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 src/gallium/drivers/i965/brw_pipe_clear.c (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index d7262cf07c..870d67b13d 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -34,6 +34,7 @@ C_SOURCES = \ brw_pipe_misc.c \ brw_pipe_sampler.c \ brw_pipe_vertex.c \ + brw_pipe_clear.c \ brw_pipe_rast.c \ brw_sf.c \ brw_sf_emit.c \ diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index 04ca6265ed..61374ffb00 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -1,10 +1,13 @@ #ifndef BRW_BATCHBUFFER_H #define BRW_BATCHBUFFER_H +#include "util/u_debug.h" + +#include "pipe/p_error.h" + #include "brw_types.h" #include "brw_winsys.h" #include "brw_reg.h" -#include "util/u_debug.h" #define BATCH_SZ 16384 #define BATCH_RESERVED 16 diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index 0692412b32..5accc858a9 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -60,6 +60,7 @@ static void brw_destroy_context( struct pipe_context *pipe ) brw_pipe_sampler_cleanup( brw ); brw_pipe_shader_cleanup( brw ); brw_pipe_vertex_cleanup( brw ); + brw_pipe_clear_cleanup( brw ); FREE(brw->wm.compile_data); @@ -120,6 +121,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen) brw_pipe_sampler_init( brw ); brw_pipe_shader_init( brw ); brw_pipe_vertex_init( brw ); + brw_pipe_clear_init( brw ); brw_init_state( brw ); brw_draw_init( brw ); diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index b6f77d1253..e32452f49a 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -764,6 +764,7 @@ void brw_pipe_rast_init( struct brw_context *brw ); void brw_pipe_sampler_init( struct brw_context *brw ); void brw_pipe_shader_init( struct brw_context *brw ); void brw_pipe_vertex_init( struct brw_context *brw ); +void brw_pipe_clear_init( struct brw_context *brw ); void brw_pipe_blend_cleanup( struct brw_context *brw ); void brw_pipe_depth_stencil_cleanup( struct brw_context *brw ); @@ -775,6 +776,7 @@ void brw_pipe_rast_cleanup( struct brw_context *brw ); void brw_pipe_sampler_cleanup( struct brw_context *brw ); void brw_pipe_shader_cleanup( struct brw_context *brw ); void brw_pipe_vertex_cleanup( struct brw_context *brw ); +void brw_pipe_clear_cleanup( struct brw_context *brw ); void brw_context_flush( struct brw_context *brw ); diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c new file mode 100644 index 0000000000..f48175c0f7 --- /dev/null +++ b/src/gallium/drivers/i965/brw_pipe_clear.c @@ -0,0 +1,222 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_pack_color.h" + +#include "pipe/p_error.h" +#include "pipe/p_state.h" + +#include "brw_batchbuffer.h" +#include "brw_screen.h" +#include "brw_context.h" + +#define MASK16 0xffff +#define MASK24 0xffffff + + +/** + * Use blitting to clear the renderbuffers named by 'flags'. + * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field + * since that might include software renderbuffers or renderbuffers + * which we're clearing with triangles. + * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear + */ +static enum pipe_error +try_clear( struct brw_context *brw, + struct brw_surface *surface, + unsigned value ) +{ + uint32_t BR13, CMD; + int x1 = 0; + int y1 = 0; + int x2 = surface->base.width; + int y2 = surface->base.height; + int pitch = surface->pitch; + int cpp = surface->cpp; + + if (x2 == 0 || y2 == 0) + return 0; + + debug_printf("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + (void *)surface->bo, pitch * cpp, + surface->draw_offset, + x1, y1, x2 - x1, y2 - y1); + + BR13 = 0xf0 << 16; + CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_RGB | XY_BLT_WRITE_ALPHA; + + /* Setup the blit command */ + if (cpp == 4) { + BR13 |= BR13_8888; + CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + } + else { + assert(cpp == 2); + BR13 |= BR13_565; + } + + assert(surface->tiling != BRW_TILING_Y); + + if (surface->tiling == BRW_TILING_X) { + CMD |= XY_DST_TILED; + pitch /= 4; + } + + BR13 |= (pitch * cpp); + + BEGIN_BATCH(6, 0); + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((y1 << 16) | x1); + OUT_BATCH((y2 << 16) | x2); + OUT_RELOC(surface->bo, + BRW_USAGE_BLIT_DEST, + surface->draw_offset); + OUT_BATCH(value); + ADVANCE_BATCH(); + + return 0; +} + + + + +static void color_clear(struct brw_context *brw, + struct brw_surface *bsurface, + const float *rgba ) +{ + enum pipe_error ret; + unsigned value; + + util_pack_color( rgba, bsurface->base.format, &value ); + + if (bsurface->cpp == 2) + value |= value << 16; + + ret = try_clear( brw, bsurface, value ); + + if (ret != 0) { + brw_context_flush( brw ); + ret = try_clear( brw, bsurface, value ); + assert( ret == 0 ); + } +} + +static void zstencil_clear(struct brw_context *brw, + struct brw_surface *bsurface, + double depth, + unsigned stencil ) +{ + enum pipe_error ret; + unsigned value; + + switch (bsurface->base.format) { + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + value = ((unsigned)(depth * MASK24) & MASK24); + break; + case PIPE_FORMAT_Z16_UNORM: + value = ((unsigned)(depth * MASK16) & MASK16); + break; + default: + assert(0); + return; + } + + switch (bsurface->base.format) { + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + value = (value << 8) | stencil; + break; + + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + value = value | (stencil << 24); + break; + + case PIPE_FORMAT_Z16_UNORM: + value = value | (value << 16); + break; + + default: + break; + } + + ret = try_clear( brw, bsurface, value ); + + if (ret != 0) { + brw_context_flush( brw ); + ret = try_clear( brw, bsurface, value ); + assert( ret == 0 ); + } +} + + + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + */ +static void brw_clear(struct pipe_context *pipe, + unsigned buffers, + const float *rgba, + double depth, + unsigned stencil) +{ + struct brw_context *brw = brw_context( pipe ); + int i; + + if (buffers & PIPE_CLEAR_COLOR) { + for (i = 0; i < brw->curr.fb.nr_cbufs; i++) { + color_clear( brw, + brw_surface(brw->curr.fb.cbufs[i]), + rgba ); + } + } + + if (buffers & PIPE_CLEAR_DEPTHSTENCIL) { + if (brw->curr.fb.zsbuf) { + zstencil_clear( brw, + brw_surface(brw->curr.fb.zsbuf), + depth, stencil ); + } + } +} + + +void brw_pipe_clear_init( struct brw_context *brw ) +{ + brw->base.clear = brw_clear; +} + + +void brw_pipe_clear_cleanup( struct brw_context *brw ) +{ +} diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index 820c6a6679..f7267cc78a 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -89,7 +89,13 @@ union brw_surface_id { struct brw_surface { struct pipe_surface base; + union brw_surface_id id; + unsigned cpp; + unsigned pitch; + unsigned draw_offset; + unsigned tiling; + struct brw_surface_state ss; struct brw_winsys_buffer *bo; struct brw_surface *next, *prev; -- cgit v1.2.3 From c5ed7b6e76a71d34e4a42ebfca092bd99cb39438 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 20:36:48 +0000 Subject: i965g: plumb in some surface state --- src/gallium/drivers/i965/brw_context.c | 18 +++++++++++++----- src/gallium/drivers/i965/brw_pipe_clear.c | 4 ++-- src/gallium/drivers/i965/brw_screen_surface.c | 15 ++++++++++++++- 3 files changed, 29 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index 5accc858a9..cd8963bebc 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -39,6 +39,7 @@ #include "brw_state.h" #include "brw_batchbuffer.h" #include "brw_winsys.h" +#include "brw_screen.h" static void brw_destroy_context( struct pipe_context *pipe ) @@ -46,6 +47,8 @@ static void brw_destroy_context( struct pipe_context *pipe ) struct brw_context *brw = brw_context(pipe); int i; + brw_context_flush( brw ); + brw_batchbuffer_free( brw->batch ); brw_destroy_state(brw); brw_draw_cleanup( brw ); @@ -101,15 +104,12 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen) if (!brw) { debug_printf("%s: failed to alloc context\n", __FUNCTION__); - return GL_FALSE; + return NULL; } - /* We want the GLSL compiler to emit code that uses condition codes */ - //ctx->Shader.EmitCondCodes = GL_TRUE; - //ctx->Shader.EmitNVTempInitialization = GL_TRUE; - brw->base.screen = screen; brw->base.destroy = brw_destroy_context; + brw->sws = brw_screen(screen)->sws; brw_pipe_blend_init( brw ); brw_pipe_depth_stencil_init( brw ); @@ -133,7 +133,15 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen) make_empty_list(&brw->query.active_head); + brw->batch = brw_batchbuffer_alloc( brw->sws ); + if (brw->batch == NULL) + goto fail; return &brw->base; + +fail: + if (brw->batch) + brw_batchbuffer_free( brw->batch ); + return NULL; } diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c index f48175c0f7..69bc95e51a 100644 --- a/src/gallium/drivers/i965/brw_pipe_clear.c +++ b/src/gallium/drivers/i965/brw_pipe_clear.c @@ -64,7 +64,7 @@ try_clear( struct brw_context *brw, debug_printf("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, (void *)surface->bo, pitch * cpp, - surface->draw_offset, + surface->base.offset, x1, y1, x2 - x1, y2 - y1); BR13 = 0xf0 << 16; @@ -96,7 +96,7 @@ try_clear( struct brw_context *brw, OUT_BATCH((y2 << 16) | x2); OUT_RELOC(surface->bo, BRW_USAGE_BLIT_DEST, - surface->draw_offset); + surface->base.offset); OUT_BATCH(value); ADVANCE_BATCH(); diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c index 04a6fc7b66..1c408e9f2e 100644 --- a/src/gallium/drivers/i965/brw_screen_surface.c +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -35,6 +35,7 @@ #include "pipe/p_screen.h" #include "brw_screen.h" #include "brw_defines.h" +#include "brw_winsys.h" enum { BRW_VIEW_LINEAR, @@ -145,6 +146,12 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen, surface->base.face = id.bits.face; surface->base.level = id.bits.level; surface->id = id; + surface->cpp = tex->cpp; + surface->pitch = tex->pitch; + surface->tiling = tex->tiling; + + surface->bo = tex->bo; + brw_screen->sws->bo_reference(surface->bo); pipe_texture_reference( &surface->base.texture, &tex->base ); @@ -234,10 +241,16 @@ static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen, } -static void brw_tex_surface_destroy( struct pipe_surface *surface ) +static void brw_tex_surface_destroy( struct pipe_surface *surf ) { + struct brw_surface *surface = brw_surface(surf); + struct brw_screen *screen = brw_screen(surf->texture->screen); + /* Unreference texture, shared buffer: */ + screen->sws->bo_unreference(surface->bo); + pipe_texture_reference( &surface->base.texture, NULL ); + FREE(surface); } -- cgit v1.2.3 From b8bb48f4528227e36400cd1599a82bb73415ef60 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 21:05:34 +0000 Subject: i965g: fix up batchbuffer confusion --- src/gallium/drivers/i965/brw_batchbuffer.c | 35 +++++++++++++++++++----------- src/gallium/drivers/i965/brw_batchbuffer.h | 3 ++- src/gallium/drivers/i965/brw_pipe_flush.c | 3 ++- 3 files changed, 26 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index fd6b34cb8a..bfb7175f75 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -36,25 +36,26 @@ #include "brw_debug.h" #include "brw_structs.h" -#define USE_LOCAL_BUFFER 1 +#define USE_MALLOC_BUFFER 1 #define ALWAYS_EMIT_MI_FLUSH 1 void brw_batchbuffer_reset(struct brw_batchbuffer *batch) { - if (batch->buf != NULL) { + if (batch->buf) { batch->sws->bo_unreference(batch->buf); batch->buf = NULL; } - if (USE_LOCAL_BUFFER && !batch->buffer) - batch->buffer = MALLOC(BRW_BATCH_SIZE); + if (batch->use_malloc_buffer && !batch->malloc_buffer) + batch->malloc_buffer = MALLOC(BRW_BATCH_SIZE); batch->buf = batch->sws->bo_alloc(batch->sws, BRW_BUFFER_TYPE_BATCH, BRW_BATCH_SIZE, 4096); - if (batch->buffer) - batch->map = batch->buffer; + + if (batch->malloc_buffer) + batch->map = batch->malloc_buffer; else batch->map = batch->sws->bo_map(batch->buf, GL_TRUE); @@ -67,6 +68,7 @@ brw_batchbuffer_alloc(struct brw_winsys_screen *sws) { struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer); + batch->use_malloc_buffer = USE_MALLOC_BUFFER; batch->sws = sws; brw_batchbuffer_reset(batch); @@ -76,16 +78,16 @@ brw_batchbuffer_alloc(struct brw_winsys_screen *sws) void brw_batchbuffer_free(struct brw_batchbuffer *batch) { - if (batch->map) { + if (batch->malloc_buffer) { + FREE(batch->malloc_buffer); + batch->map = NULL; + } + else if (batch->map) { batch->sws->bo_unmap(batch->buf); batch->map = NULL; } - batch->sws->bo_unreference(batch->buf); - batch->buf = NULL; - - FREE(batch->buffer); FREE(batch); } @@ -127,8 +129,15 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, batch->ptr += 4; used = batch->ptr - batch->map; - batch->sws->bo_unmap(batch->buf); - batch->map = NULL; + if (batch->use_malloc_buffer) { + batch->sws->bo_subdata(batch->buf, 0, used, batch->map ); + batch->map = NULL; + } + else { + batch->sws->bo_unmap(batch->buf); + batch->map = NULL; + } + batch->ptr = NULL; batch->sws->bo_exec(batch->buf, used ); diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index 61374ffb00..1828324cc0 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -35,7 +35,8 @@ struct brw_batchbuffer { * XXX: is this still necessary? * XXX: if so, can this be hidden inside the GEM-specific winsys code? */ - uint8_t *buffer; + boolean use_malloc_buffer; + uint8_t *malloc_buffer; /** * Values exported to speed up the writing the batchbuffer, diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index 6ae3c57765..9dff2beeb1 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -42,7 +42,8 @@ brw_flush( struct pipe_context *pipe, struct pipe_fence_handle **fence ) { brw_context_flush( brw_context( pipe ) ); - *fence = NULL; + if (fence) + *fence = NULL; } static unsigned brw_is_buffer_referenced(struct pipe_context *pipe, -- cgit v1.2.3 From 951fdac566c3f2124f82aa94da08f55a10608f25 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 21:35:29 +0000 Subject: i965g: pull in a copy of intel_decode.c for now With the stubbed out, non-hardware xlib winsys, trivial/clear runs and prints a plausible command stream --- src/gallium/drivers/i965/Makefile | 3 +- src/gallium/drivers/i965/brw_batchbuffer.c | 18 +- src/gallium/drivers/i965/brw_batchbuffer.h | 4 +- src/gallium/drivers/i965/brw_context.c | 2 +- src/gallium/drivers/i965/intel_decode.c | 1790 ++++++++++++++++++++++++++++ src/gallium/drivers/i965/intel_decode.h | 29 + 6 files changed, 1835 insertions(+), 11 deletions(-) create mode 100644 src/gallium/drivers/i965/intel_decode.c create mode 100644 src/gallium/drivers/i965/intel_decode.h (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 870d67b13d..2188a1d4bc 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -66,6 +66,7 @@ C_SOURCES = \ brw_screen_tex_layout.c \ brw_screen_texture.c \ brw_screen_surface.c \ - brw_batchbuffer.c + brw_batchbuffer.c \ + intel_decode.c include ../../Makefile.template diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index bfb7175f75..64d6754df5 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -35,6 +35,7 @@ #include "brw_winsys.h" #include "brw_debug.h" #include "brw_structs.h" +#include "intel_decode.h" #define USE_MALLOC_BUFFER 1 #define ALWAYS_EMIT_MI_FLUSH 1 @@ -47,9 +48,6 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch) batch->buf = NULL; } - if (batch->use_malloc_buffer && !batch->malloc_buffer) - batch->malloc_buffer = MALLOC(BRW_BATCH_SIZE); - batch->buf = batch->sws->bo_alloc(batch->sws, BRW_BUFFER_TYPE_BATCH, BRW_BATCH_SIZE, 4096); @@ -64,12 +62,18 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch) } struct brw_batchbuffer * -brw_batchbuffer_alloc(struct brw_winsys_screen *sws) +brw_batchbuffer_alloc(struct brw_winsys_screen *sws, + struct brw_chipset chipset) { struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer); batch->use_malloc_buffer = USE_MALLOC_BUFFER; + if (batch->use_malloc_buffer) { + batch->malloc_buffer = MALLOC(BRW_BATCH_SIZE); + } + batch->sws = sws; + batch->chipset = chipset; brw_batchbuffer_reset(batch); return batch; @@ -142,18 +146,16 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, batch->sws->bo_exec(batch->buf, used ); -#if 0 - if (BRW_DEBUG & DEBUG_BATCH) { + if (1 /*BRW_DEBUG & DEBUG_BATCH*/) { void *ptr = batch->sws->bo_map(batch->buf, GL_FALSE); intel_decode(ptr, used / 4, - batch->buf->offset, + batch->buf->offset[0], batch->chipset.pci_id); batch->sws->bo_unmap(batch->buf); } -#endif if (BRW_DEBUG & DEBUG_SYNC) { /* Abuse map/unmap to achieve wait-for-fence. diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index 1828324cc0..b051638296 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -28,6 +28,7 @@ struct brw_batchbuffer { struct brw_winsys_screen *sws; struct brw_winsys_buffer *buf; + struct brw_chipset chipset; /* Main-memory copy of the batch-buffer, built up incrementally & * then copied as one to the true buffer. @@ -57,7 +58,8 @@ struct brw_batchbuffer { /*@}*/ }; -struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws ); +struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws, + struct brw_chipset chipset ); void brw_batchbuffer_free(struct brw_batchbuffer *batch); diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index cd8963bebc..aaf7d1834e 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -133,7 +133,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen) make_empty_list(&brw->query.active_head); - brw->batch = brw_batchbuffer_alloc( brw->sws ); + brw->batch = brw_batchbuffer_alloc( brw->sws, brw->chipset ); if (brw->batch == NULL) goto fail; diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c new file mode 100644 index 0000000000..1fb1b66cc8 --- /dev/null +++ b/src/gallium/drivers/i965/intel_decode.c @@ -0,0 +1,1790 @@ +/* -*- c-basic-offset: 4 -*- */ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +/** @file intel_decode.c + * This file contains code to print out batchbuffer contents in a + * human-readable format. + * + * The current version only supports i915 packets, and only pretty-prints a + * subset of them. The intention is for it to make just a best attempt to + * decode, but never crash in the process. + */ + +#include +#include +#include +#include + +#include "intel_decode.h" + +/*#include "intel_chipset.h"*/ +#define IS_965(x) 1 /* XXX */ +#define IS_9XX(x) 1 /* XXX */ + +#define BUFFER_FAIL(_count, _len, _name) do { \ + fprintf(out, "Buffer size too small in %s (%d < %d)\n", \ + (_name), (_count), (_len)); \ + (*failures)++; \ + return count; \ +} while (0) + +static FILE *out; +static uint32_t saved_s2 = 0, saved_s4 = 0; +static char saved_s2_set = 0, saved_s4_set = 0; + +static float +int_as_float(uint32_t intval) +{ + union intfloat { + uint32_t i; + float f; + } uval; + + uval.i = intval; + return uval.f; +} + +static void +instr_out(uint32_t *data, uint32_t hw_offset, unsigned int index, + char *fmt, ...) +{ + va_list va; + + fprintf(out, "0x%08x: 0x%08x:%s ", hw_offset + index * 4, data[index], + index == 0 ? "" : " "); + va_start(va, fmt); + vfprintf(out, fmt, va); + va_end(va); +} + + +static int +decode_mi(uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode; + + struct { + uint32_t opcode; + int len_mask; + int min_len; + int max_len; + char *name; + } opcodes_mi[] = { + { 0x08, 0, 1, 1, "MI_ARB_ON_OFF" }, + { 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" }, + { 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" }, + { 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" }, + { 0x04, 0, 1, 1, "MI_FLUSH" }, + { 0x22, 0, 3, 3, "MI_LOAD_REGISTER_IMM" }, + { 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" }, + { 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" }, + { 0x00, 0, 1, 1, "MI_NOOP" }, + { 0x11, 0x3f, 2, 2, "MI_OVERLAY_FLIP" }, + { 0x07, 0, 1, 1, "MI_REPORT_HEAD" }, + { 0x18, 0x3f, 2, 2, "MI_SET_CONTEXT" }, + { 0x20, 0x3f, 3, 4, "MI_STORE_DATA_IMM" }, + { 0x21, 0x3f, 3, 4, "MI_STORE_DATA_INDEX" }, + { 0x24, 0x3f, 3, 3, "MI_STORE_REGISTER_MEM" }, + { 0x02, 0, 1, 1, "MI_USER_INTERRUPT" }, + { 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" }, + }; + + + for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]); + opcode++) { + if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) { + unsigned int len = 1, i; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_mi[opcode].name); + if (opcodes_mi[opcode].max_len > 1) { + len = (data[0] & opcodes_mi[opcode].len_mask) + 2; + if (len < opcodes_mi[opcode].min_len || + len > opcodes_mi[opcode].max_len) + { + fprintf(out, "Bad length (%d) in %s, [%d, %d]\n", + len, opcodes_mi[opcode].name, + opcodes_mi[opcode].min_len, + opcodes_mi[opcode].max_len); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_mi[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + + return len; + } + } + + instr_out(data, hw_offset, 0, "MI UNKNOWN\n"); + (*failures)++; + return 1; +} + +static int +decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode, len; + char *format = NULL; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_2d[] = { + { 0x40, 5, 5, "COLOR_BLT" }, + { 0x43, 6, 6, "SRC_COPY_BLT" }, + { 0x01, 8, 8, "XY_SETUP_BLT" }, + { 0x11, 9, 9, "XY_SETUP_MONO_PATTERN_SL_BLT" }, + { 0x03, 3, 3, "XY_SETUP_CLIP_BLT" }, + { 0x24, 2, 2, "XY_PIXEL_BLT" }, + { 0x25, 3, 3, "XY_SCANLINES_BLT" }, + { 0x26, 4, 4, "Y_TEXT_BLT" }, + { 0x31, 5, 134, "XY_TEXT_IMMEDIATE_BLT" }, + { 0x50, 6, 6, "XY_COLOR_BLT" }, + { 0x51, 6, 6, "XY_PAT_BLT" }, + { 0x76, 8, 8, "XY_PAT_CHROMA_BLT" }, + { 0x72, 7, 135, "XY_PAT_BLT_IMMEDIATE" }, + { 0x77, 9, 137, "XY_PAT_CHROMA_BLT_IMMEDIATE" }, + { 0x52, 9, 9, "XY_MONO_PAT_BLT" }, + { 0x59, 7, 7, "XY_MONO_PAT_FIXED_BLT" }, + { 0x53, 8, 8, "XY_SRC_COPY_BLT" }, + { 0x54, 8, 8, "XY_MONO_SRC_COPY_BLT" }, + { 0x71, 9, 137, "XY_MONO_SRC_COPY_IMMEDIATE_BLT" }, + { 0x55, 9, 9, "XY_FULL_BLT" }, + { 0x55, 9, 137, "XY_FULL_IMMEDIATE_PATTERN_BLT" }, + { 0x56, 9, 9, "XY_FULL_MONO_SRC_BLT" }, + { 0x75, 10, 138, "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT" }, + { 0x57, 12, 12, "XY_FULL_MONO_PATTERN_BLT" }, + { 0x58, 12, 12, "XY_FULL_MONO_PATTERN_MONO_SRC_BLT" }, + }; + + switch ((data[0] & 0x1fc00000) >> 22) { + case 0x50: + instr_out(data, hw_offset, 0, + "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n", + (data[0] & (1 << 20)) ? "en" : "dis", + (data[0] & (1 << 21)) ? "en" : "dis", + (data[0] >> 11) & 1); + + len = (data[0] & 0x000000ff) + 2; + if (len != 6) + fprintf(out, "Bad count in XY_COLOR_BLT\n"); + if (count < 6) + BUFFER_FAIL(count, len, "XY_COLOR_BLT"); + + switch ((data[1] >> 24) & 0x3) { + case 0: + format="8"; + break; + case 1: + format="565"; + break; + case 2: + format="1555"; + break; + case 3: + format="8888"; + break; + } + + instr_out(data, hw_offset, 1, "format %s, pitch %d, " + "clipping %sabled\n", format, + (short)(data[1] & 0xffff), + data[1] & (1 << 30) ? "en" : "dis"); + instr_out(data, hw_offset, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + instr_out(data, hw_offset, 3, "(%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + instr_out(data, hw_offset, 4, "offset 0x%08x\n", data[4]); + instr_out(data, hw_offset, 5, "color\n"); + return len; + case 0x53: + instr_out(data, hw_offset, 0, + "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, " + "src tile %d, dst tile %d)\n", + (data[0] & (1 << 20)) ? "en" : "dis", + (data[0] & (1 << 21)) ? "en" : "dis", + (data[0] >> 15) & 1, + (data[0] >> 11) & 1); + + len = (data[0] & 0x000000ff) + 2; + if (len != 8) + fprintf(out, "Bad count in XY_SRC_COPY_BLT\n"); + if (count < 8) + BUFFER_FAIL(count, len, "XY_SRC_COPY_BLT"); + + switch ((data[1] >> 24) & 0x3) { + case 0: + format="8"; + break; + case 1: + format="565"; + break; + case 2: + format="1555"; + break; + case 3: + format="8888"; + break; + } + + instr_out(data, hw_offset, 1, "format %s, dst pitch %d, " + "clipping %sabled\n", format, + (short)(data[1] & 0xffff), + data[1] & (1 << 30) ? "en" : "dis"); + instr_out(data, hw_offset, 2, "dst (%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + instr_out(data, hw_offset, 3, "dst (%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]); + instr_out(data, hw_offset, 5, "src (%d,%d)\n", + data[5] & 0xffff, data[5] >> 16); + instr_out(data, hw_offset, 6, "src pitch %d\n", + (short)(data[6] & 0xffff)); + instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]); + return len; + } + + for (opcode = 0; opcode < sizeof(opcodes_2d) / sizeof(opcodes_2d[0]); + opcode++) { + if ((data[0] & 0x1fc00000) >> 22 == opcodes_2d[opcode].opcode) { + unsigned int i; + + len = 1; + instr_out(data, hw_offset, 0, "%s\n", opcodes_2d[opcode].name); + if (opcodes_2d[opcode].max_len > 1) { + len = (data[0] & 0x000000ff) + 2; + if (len < opcodes_2d[opcode].min_len || + len > opcodes_2d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", opcodes_2d[opcode].name); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_2d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + + return len; + } + } + + instr_out(data, hw_offset, 0, "2D UNKNOWN\n"); + (*failures)++; + return 1; +} + +static int +decode_3d_1c(uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + switch ((data[0] & 0x00f80000) >> 19) { + case 0x11: + instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n"); + return 1; + case 0x10: + instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n"); + return 1; + case 0x01: + instr_out(data, hw_offset, 0, "3DSTATE_MAP_COORD_SET_I830\n"); + return 1; + case 0x0a: + instr_out(data, hw_offset, 0, "3DSTATE_MAP_CUBE_I830\n"); + return 1; + case 0x05: + instr_out(data, hw_offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n"); + return 1; + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +/** Sets the string dstname to describe the destination of the PS instruction */ +static void +i915_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask) +{ + uint32_t a0 = data[i]; + int dst_nr = (a0 >> 14) & 0xf; + char dstmask[8]; + char *sat; + + if (do_mask) { + if (((a0 >> 10) & 0xf) == 0xf) { + dstmask[0] = 0; + } else { + int dstmask_index = 0; + + dstmask[dstmask_index++] = '.'; + if (a0 & (1 << 10)) + dstmask[dstmask_index++] = 'x'; + if (a0 & (1 << 11)) + dstmask[dstmask_index++] = 'y'; + if (a0 & (1 << 12)) + dstmask[dstmask_index++] = 'z'; + if (a0 & (1 << 13)) + dstmask[dstmask_index++] = 'w'; + dstmask[dstmask_index++] = 0; + } + + if (a0 & (1 << 22)) + sat = ".sat"; + else + sat = ""; + } else { + dstmask[0] = 0; + sat = ""; + } + + switch ((a0 >> 19) & 0x7) { + case 0: + if (dst_nr > 15) + fprintf(out, "bad destination reg R%d\n", dst_nr); + sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat); + break; + case 4: + if (dst_nr > 0) + fprintf(out, "bad destination reg oC%d\n", dst_nr); + sprintf(dstname, "oC%s%s", dstmask, sat); + break; + case 5: + if (dst_nr > 0) + fprintf(out, "bad destination reg oD%d\n", dst_nr); + sprintf(dstname, "oD%s%s", dstmask, sat); + break; + case 6: + if (dst_nr > 2) + fprintf(out, "bad destination reg U%d\n", dst_nr); + sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat); + break; + default: + sprintf(dstname, "RESERVED"); + break; + } +} + +static char * +i915_get_channel_swizzle(uint32_t select) +{ + switch (select & 0x7) { + case 0: + return (select & 8) ? "-x" : "x"; + case 1: + return (select & 8) ? "-y" : "y"; + case 2: + return (select & 8) ? "-z" : "z"; + case 3: + return (select & 8) ? "-w" : "w"; + case 4: + return (select & 8) ? "-0" : "0"; + case 5: + return (select & 8) ? "-1" : "1"; + default: + return (select & 8) ? "-bad" : "bad"; + } +} + +static void +i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name) +{ + switch (src_type) { + case 0: + sprintf(name, "R%d", src_nr); + if (src_nr > 15) + fprintf(out, "bad src reg %s\n", name); + break; + case 1: + if (src_nr < 8) + sprintf(name, "T%d", src_nr); + else if (src_nr == 8) + sprintf(name, "DIFFUSE"); + else if (src_nr == 9) + sprintf(name, "SPECULAR"); + else if (src_nr == 10) + sprintf(name, "FOG"); + else { + fprintf(out, "bad src reg T%d\n", src_nr); + sprintf(name, "RESERVED"); + } + break; + case 2: + sprintf(name, "C%d", src_nr); + if (src_nr > 31) + fprintf(out, "bad src reg %s\n", name); + break; + case 4: + sprintf(name, "oC"); + if (src_nr > 0) + fprintf(out, "bad src reg oC%d\n", src_nr); + break; + case 5: + sprintf(name, "oD"); + if (src_nr > 0) + fprintf(out, "bad src reg oD%d\n", src_nr); + break; + case 6: + sprintf(name, "U%d", src_nr); + if (src_nr > 2) + fprintf(out, "bad src reg %s\n", name); + break; + default: + fprintf(out, "bad src reg type %d\n", src_type); + sprintf(name, "RESERVED"); + break; + } +} + +static void +i915_get_instruction_src0(uint32_t *data, int i, char *srcname) +{ + uint32_t a0 = data[i]; + uint32_t a1 = data[i + 1]; + int src_nr = (a0 >> 2) & 0x1f; + char *swizzle_x = i915_get_channel_swizzle((a1 >> 28) & 0xf); + char *swizzle_y = i915_get_channel_swizzle((a1 >> 24) & 0xf); + char *swizzle_z = i915_get_channel_swizzle((a1 >> 20) & 0xf); + char *swizzle_w = i915_get_channel_swizzle((a1 >> 16) & 0xf); + char swizzle[100]; + + i915_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +i915_get_instruction_src1(uint32_t *data, int i, char *srcname) +{ + uint32_t a1 = data[i + 1]; + uint32_t a2 = data[i + 2]; + int src_nr = (a1 >> 8) & 0x1f; + char *swizzle_x = i915_get_channel_swizzle((a1 >> 4) & 0xf); + char *swizzle_y = i915_get_channel_swizzle((a1 >> 0) & 0xf); + char *swizzle_z = i915_get_channel_swizzle((a2 >> 28) & 0xf); + char *swizzle_w = i915_get_channel_swizzle((a2 >> 24) & 0xf); + char swizzle[100]; + + i915_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +i915_get_instruction_src2(uint32_t *data, int i, char *srcname) +{ + uint32_t a2 = data[i + 2]; + int src_nr = (a2 >> 16) & 0x1f; + char *swizzle_x = i915_get_channel_swizzle((a2 >> 12) & 0xf); + char *swizzle_y = i915_get_channel_swizzle((a2 >> 8) & 0xf); + char *swizzle_z = i915_get_channel_swizzle((a2 >> 4) & 0xf); + char *swizzle_w = i915_get_channel_swizzle((a2 >> 0) & 0xf); + char swizzle[100]; + + i915_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +i915_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name) +{ + switch (src_type) { + case 0: + sprintf(name, "R%d", src_nr); + if (src_nr > 15) + fprintf(out, "bad src reg %s\n", name); + break; + case 1: + if (src_nr < 8) + sprintf(name, "T%d", src_nr); + else if (src_nr == 8) + sprintf(name, "DIFFUSE"); + else if (src_nr == 9) + sprintf(name, "SPECULAR"); + else if (src_nr == 10) + sprintf(name, "FOG"); + else { + fprintf(out, "bad src reg T%d\n", src_nr); + sprintf(name, "RESERVED"); + } + break; + case 4: + sprintf(name, "oC"); + if (src_nr > 0) + fprintf(out, "bad src reg oC%d\n", src_nr); + break; + case 5: + sprintf(name, "oD"); + if (src_nr > 0) + fprintf(out, "bad src reg oD%d\n", src_nr); + break; + default: + fprintf(out, "bad src reg type %d\n", src_type); + sprintf(name, "RESERVED"); + break; + } +} + +static void +i915_decode_alu1(uint32_t *data, uint32_t hw_offset, + int i, char *instr_prefix, char *op_name) +{ + char dst[100], src0[100]; + + i915_get_instruction_dst(data, i, dst, 1); + i915_get_instruction_src0(data, i, src0); + + instr_out(data, hw_offset, i++, "%s: %s %s, %s\n", instr_prefix, + op_name, dst, src0); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); +} + +static void +i915_decode_alu2(uint32_t *data, uint32_t hw_offset, + int i, char *instr_prefix, char *op_name) +{ + char dst[100], src0[100], src1[100]; + + i915_get_instruction_dst(data, i, dst, 1); + i915_get_instruction_src0(data, i, src0); + i915_get_instruction_src1(data, i, src1); + + instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s\n", instr_prefix, + op_name, dst, src0, src1); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); +} + +static void +i915_decode_alu3(uint32_t *data, uint32_t hw_offset, + int i, char *instr_prefix, char *op_name) +{ + char dst[100], src0[100], src1[100], src2[100]; + + i915_get_instruction_dst(data, i, dst, 1); + i915_get_instruction_src0(data, i, src0); + i915_get_instruction_src1(data, i, src1); + i915_get_instruction_src2(data, i, src2); + + instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix, + op_name, dst, src0, src1, src2); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); +} + +static void +i915_decode_tex(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix, + char *tex_name) +{ + uint32_t t0 = data[i]; + uint32_t t1 = data[i + 1]; + char dst_name[100]; + char addr_name[100]; + int sampler_nr; + + i915_get_instruction_dst(data, i, dst_name, 0); + i915_get_instruction_addr((t1 >> 24) & 0x7, + (t1 >> 17) & 0xf, + addr_name); + sampler_nr = t0 & 0xf; + + instr_out(data, hw_offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix, + tex_name, dst_name, sampler_nr, addr_name); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); +} + +static void +i915_decode_dcl(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix) +{ + uint32_t d0 = data[i]; + char *sampletype; + int dcl_nr = (d0 >> 14) & 0xf; + char *dcl_x = d0 & (1 << 10) ? "x" : ""; + char *dcl_y = d0 & (1 << 11) ? "y" : ""; + char *dcl_z = d0 & (1 << 12) ? "z" : ""; + char *dcl_w = d0 & (1 << 13) ? "w" : ""; + char dcl_mask[10]; + + switch ((d0 >> 19) & 0x3) { + case 1: + sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w); + if (strcmp(dcl_mask, ".") == 0) + fprintf(out, "bad (empty) dcl mask\n"); + + if (dcl_nr > 10) + fprintf(out, "bad T%d dcl register number\n", dcl_nr); + if (dcl_nr < 8) { + if (strcmp(dcl_mask, ".x") != 0 && + strcmp(dcl_mask, ".xy") != 0 && + strcmp(dcl_mask, ".xz") != 0 && + strcmp(dcl_mask, ".w") != 0 && + strcmp(dcl_mask, ".xyzw") != 0) { + fprintf(out, "bad T%d.%s dcl mask\n", dcl_nr, dcl_mask); + } + instr_out(data, hw_offset, i++, "%s: DCL T%d%s\n", instr_prefix, + dcl_nr, dcl_mask); + } else { + if (strcmp(dcl_mask, ".xz") == 0) + fprintf(out, "errataed bad dcl mask %s\n", dcl_mask); + else if (strcmp(dcl_mask, ".xw") == 0) + fprintf(out, "errataed bad dcl mask %s\n", dcl_mask); + else if (strcmp(dcl_mask, ".xzw") == 0) + fprintf(out, "errataed bad dcl mask %s\n", dcl_mask); + + if (dcl_nr == 8) { + instr_out(data, hw_offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix, + dcl_mask); + } else if (dcl_nr == 9) { + instr_out(data, hw_offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix, + dcl_mask); + } else if (dcl_nr == 10) { + instr_out(data, hw_offset, i++, "%s: DCL FOG%s\n", instr_prefix, + dcl_mask); + } + } + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + break; + case 3: + switch ((d0 >> 22) & 0x3) { + case 0: + sampletype = "2D"; + break; + case 1: + sampletype = "CUBE"; + break; + case 2: + sampletype = "3D"; + break; + default: + sampletype = "RESERVED"; + break; + } + if (dcl_nr > 15) + fprintf(out, "bad S%d dcl register number\n", dcl_nr); + instr_out(data, hw_offset, i++, "%s: DCL S%d %s\n", instr_prefix, + dcl_nr, sampletype); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + break; + default: + instr_out(data, hw_offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + } +} + +static void +i915_decode_instruction(uint32_t *data, uint32_t hw_offset, + int i, char *instr_prefix) +{ + switch ((data[i] >> 24) & 0x1f) { + case 0x0: + instr_out(data, hw_offset, i++, "%s: NOP\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + break; + case 0x01: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "ADD"); + break; + case 0x02: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOV"); + break; + case 0x03: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "MUL"); + break; + case 0x04: + i915_decode_alu3(data, hw_offset, i, instr_prefix, "MAD"); + break; + case 0x05: + i915_decode_alu3(data, hw_offset, i, instr_prefix, "DP2ADD"); + break; + case 0x06: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP3"); + break; + case 0x07: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP4"); + break; + case 0x08: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "FRC"); + break; + case 0x09: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "RCP"); + break; + case 0x0a: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "RSQ"); + break; + case 0x0b: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "EXP"); + break; + case 0x0c: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "LOG"); + break; + case 0x0d: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "CMP"); + break; + case 0x0e: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "MIN"); + break; + case 0x0f: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "MAX"); + break; + case 0x10: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "FLR"); + break; + case 0x11: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOD"); + break; + case 0x12: + i915_decode_alu1(data, hw_offset, i, instr_prefix, "TRC"); + break; + case 0x13: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "SGE"); + break; + case 0x14: + i915_decode_alu2(data, hw_offset, i, instr_prefix, "SLT"); + break; + case 0x15: + i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLD"); + break; + case 0x16: + i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDP"); + break; + case 0x17: + i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDB"); + break; + case 0x19: + i915_decode_dcl(data, hw_offset, i, instr_prefix); + break; + default: + instr_out(data, hw_offset, i++, "%s: unknown\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + instr_out(data, hw_offset, i++, "%s\n", instr_prefix); + break; + } +} + +static int +decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830) +{ + unsigned int len, i, c, opcode, word, map, sampler, instr; + char *format; + + struct { + uint32_t opcode; + int i830_only; + int min_len; + int max_len; + char *name; + } opcodes_3d_1d[] = { + { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" }, + { 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" }, + { 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" }, + { 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" }, + { 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" }, + { 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" }, + { 0x98, 0, 2, 2, "3DSTATE_DEFAULT_Z" }, + { 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" }, + { 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" }, + { 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" }, + { 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" }, + { 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" }, + { 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" }, + { 0x8f, 0, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" }, + { 0x81, 0, 3, 3, "3DSTATE_SCISSOR_RECTANGLE" }, + { 0x83, 0, 2, 2, "3DSTATE_SPAN_STIPPLE" }, + { 0x8c, 1, 2, 2, "3DSTATE_MAP_COORD_TRANSFORM_I830" }, + { 0x8b, 1, 2, 2, "3DSTATE_MAP_VERTEX_TRANSFORM_I830" }, + { 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" }, + { 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" }, + { 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" }, + }; + + switch ((data[0] & 0x00ff0000) >> 16) { + case 0x07: + /* This instruction is unusual. A 0 length means just 1 DWORD instead of + * 2. The 0 length is specified in one place to be unsupported, but + * stated to be required in another, and 0 length LOAD_INDIRECTs appear + * to cause no harm at least. + */ + instr_out(data, hw_offset, 0, "3DSTATE_LOAD_INDIRECT\n"); + len = (data[0] & 0x000000ff) + 1; + i = 1; + if (data[0] & (0x01 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "SIS.0\n"); + instr_out(data, hw_offset, i++, "SIS.1\n"); + } + if (data[0] & (0x02 << 8)) { + if (i + 1 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "DIS.0\n"); + } + if (data[0] & (0x04 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "SSB.0\n"); + instr_out(data, hw_offset, i++, "SSB.1\n"); + } + if (data[0] & (0x08 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "MSB.0\n"); + instr_out(data, hw_offset, i++, "MSB.1\n"); + } + if (data[0] & (0x10 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "PSP.0\n"); + instr_out(data, hw_offset, i++, "PSP.1\n"); + } + if (data[0] & (0x20 << 8)) { + if (i + 2 >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT"); + instr_out(data, hw_offset, i++, "PSC.0\n"); + instr_out(data, hw_offset, i++, "PSC.1\n"); + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n"); + (*failures)++; + return len; + } + return len; + case 0x04: + instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 0; word <= 7; word++) { + if (data[0] & (1 << (4 + word))) { + if (i >= count) + BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1"); + + /* save vertex state for decode */ + if (word == 2) { + saved_s2_set = 1; + saved_s2 = data[i]; + } + if (word == 4) { + saved_s4_set = 1; + saved_s4 = data[i]; + } + + instr_out(data, hw_offset, i++, "S%d\n", word); + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n"); + (*failures)++; + } + return len; + case 0x00: + instr_out(data, hw_offset, 0, "3DSTATE_MAP_STATE\n"); + len = (data[0] & 0x0000003f) + 2; + instr_out(data, hw_offset, 1, "mask\n"); + + i = 2; + for (map = 0; map <= 15; map++) { + if (data[1] & (1 << map)) { + if (i + 3 >= count) + BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE"); + instr_out(data, hw_offset, i++, "map %d MS2\n", map); + instr_out(data, hw_offset, i++, "map %d MS3\n", map); + instr_out(data, hw_offset, i++, "map %d MS4\n", map); + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_MAP_STATE\n"); + (*failures)++; + return len; + } + return len; + case 0x06: + instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n"); + len = (data[0] & 0x000000ff) + 2; + + i = 2; + for (c = 0; c <= 31; c++) { + if (data[1] & (1 << c)) { + if (i + 4 >= count) + BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_CONSTANTS"); + instr_out(data, hw_offset, i, "C%d.X = %f\n", + c, int_as_float(data[i])); + i++; + instr_out(data, hw_offset, i, "C%d.Y = %f\n", + c, int_as_float(data[i])); + i++; + instr_out(data, hw_offset, i, "C%d.Z = %f\n", + c, int_as_float(data[i])); + i++; + instr_out(data, hw_offset, i, "C%d.W = %f\n", + c, int_as_float(data[i])); + i++; + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_CONSTANTS\n"); + (*failures)++; + } + return len; + case 0x05: + instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n"); + len = (data[0] & 0x000000ff) + 2; + if ((len - 1) % 3 != 0 || len > 370) { + fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_PROGRAM\n"); + (*failures)++; + } + i = 1; + for (instr = 0; instr < (len - 1) / 3; instr++) { + char instr_prefix[10]; + + if (i + 3 >= count) + BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_PROGRAM"); + sprintf(instr_prefix, "PS%03d", instr); + i915_decode_instruction(data, hw_offset, i, instr_prefix); + i += 3; + } + return len; + case 0x01: + if (i830) + break; + instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n"); + instr_out(data, hw_offset, 1, "mask\n"); + len = (data[0] & 0x0000003f) + 2; + i = 2; + for (sampler = 0; sampler <= 15; sampler++) { + if (data[1] & (1 << sampler)) { + if (i + 3 >= count) + BUFFER_FAIL(count, len, "3DSTATE_SAMPLER_STATE"); + instr_out(data, hw_offset, i++, "sampler %d SS2\n", + sampler); + instr_out(data, hw_offset, i++, "sampler %d SS3\n", + sampler); + instr_out(data, hw_offset, i++, "sampler %d SS4\n", + sampler); + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_SAMPLER_STATE\n"); + (*failures)++; + } + return len; + case 0x85: + len = (data[0] & 0x0000000f) + 2; + + if (len != 2) + fprintf(out, "Bad count in 3DSTATE_DEST_BUFFER_VARIABLES\n"); + if (count < 2) + BUFFER_FAIL(count, len, "3DSTATE_DEST_BUFFER_VARIABLES"); + + instr_out(data, hw_offset, 0, + "3DSTATE_DEST_BUFFER_VARIABLES\n"); + + switch ((data[1] >> 8) & 0xf) { + case 0x0: format = "g8"; break; + case 0x1: format = "x1r5g5b5"; break; + case 0x2: format = "r5g6b5"; break; + case 0x3: format = "a8r8g8b8"; break; + case 0x4: format = "ycrcb_swapy"; break; + case 0x5: format = "ycrcb_normal"; break; + case 0x6: format = "ycrcb_swapuv"; break; + case 0x7: format = "ycrcb_swapuvy"; break; + case 0x8: format = "a4r4g4b4"; break; + case 0x9: format = "a1r5g5b5"; break; + case 0xa: format = "a2r10g10b10"; break; + default: format = "BAD"; break; + } + instr_out(data, hw_offset, 1, "%s format, early Z %sabled\n", + format, + (data[1] & (1 << 31)) ? "en" : "dis"); + return len; + } + + for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]); + opcode++) + { + if (opcodes_3d_1d[opcode].i830_only && !i830) + continue; + + if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) { + len = 1; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name); + if (opcodes_3d_1d[opcode].max_len > 1) { + len = (data[0] & 0x0000ffff) + 2; + if (len < opcodes_3d_1d[opcode].min_len || + len > opcodes_3d_1d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", + opcodes_3d_1d[opcode].name); + (*failures)++; + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_3d_1d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + + return len; + } + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +static int +decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset, + int *failures) +{ + char immediate = (data[0] & (1 << 23)) == 0; + unsigned int len, i; + char *primtype; + + switch ((data[0] >> 18) & 0xf) { + case 0x0: primtype = "TRILIST"; break; + case 0x1: primtype = "TRISTRIP"; break; + case 0x2: primtype = "TRISTRIP_REVERSE"; break; + case 0x3: primtype = "TRIFAN"; break; + case 0x4: primtype = "POLYGON"; break; + case 0x5: primtype = "LINELIST"; break; + case 0x6: primtype = "LINESTRIP"; break; + case 0x7: primtype = "RECTLIST"; break; + case 0x8: primtype = "POINTLIST"; break; + case 0x9: primtype = "DIB"; break; + case 0xa: primtype = "CLEAR_RECT"; break; + default: primtype = "unknown"; break; + } + + /* XXX: 3DPRIM_DIB not supported */ + if (immediate) { + len = (data[0] & 0x0003ffff) + 2; + instr_out(data, hw_offset, 0, "3DPRIMITIVE inline %s\n", primtype); + if (count < len) + BUFFER_FAIL(count, len, "3DPRIMITIVE inline"); + if (!saved_s2_set || !saved_s4_set) { + fprintf(out, "unknown vertex format\n"); + for (i = 1; i < len; i++) { + instr_out(data, hw_offset, i, + " vertex data (%f float)\n", + int_as_float(data[i])); + } + } else { + unsigned int vertex = 0; + for (i = 1; i < len;) { + unsigned int tc; + +#define VERTEX_OUT(fmt, ...) do { \ + if (i < len) \ + instr_out(data, hw_offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \ + else \ + fprintf(out, " missing data in V%d\n", vertex); \ + i++; \ +} while (0) + + VERTEX_OUT("X = %f", int_as_float(data[i])); + VERTEX_OUT("Y = %f", int_as_float(data[i])); + switch (saved_s4 >> 6 & 0x7) { + case 0x1: + VERTEX_OUT("Z = %f", int_as_float(data[i])); + break; + case 0x2: + VERTEX_OUT("Z = %f", int_as_float(data[i])); + VERTEX_OUT("W = %f", int_as_float(data[i])); + break; + case 0x3: + break; + case 0x4: + VERTEX_OUT("W = %f", int_as_float(data[i])); + break; + default: + fprintf(out, "bad S4 position mask\n"); + } + + if (saved_s4 & (1 << 10)) { + VERTEX_OUT("color = (A=0x%02x, R=0x%02x, G=0x%02x, " + "B=0x%02x)", + data[i] >> 24, + (data[i] >> 16) & 0xff, + (data[i] >> 8) & 0xff, + data[i] & 0xff); + } + if (saved_s4 & (1 << 11)) { + VERTEX_OUT("spec = (A=0x%02x, R=0x%02x, G=0x%02x, " + "B=0x%02x)", + data[i] >> 24, + (data[i] >> 16) & 0xff, + (data[i] >> 8) & 0xff, + data[i] & 0xff); + } + if (saved_s4 & (1 << 12)) + VERTEX_OUT("width = 0x%08x)", data[i]); + + for (tc = 0; tc <= 7; tc++) { + switch ((saved_s2 >> (tc * 4)) & 0xf) { + case 0x0: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i])); + break; + case 0x1: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i])); + break; + case 0x2: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i])); + VERTEX_OUT("T%d.W = %f", tc, int_as_float(data[i])); + break; + case 0x3: + VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i])); + break; + case 0x4: + VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]); + break; + case 0x5: + VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]); + VERTEX_OUT("T%d.ZW = 0x%08x half-float", tc, data[i]); + break; + case 0xf: + break; + default: + fprintf(out, "bad S2.T%d format\n", tc); + } + } + vertex++; + } + } + } else { + /* indirect vertices */ + len = data[0] & 0x0000ffff; /* index count */ + if (data[0] & (1 << 17)) { + /* random vertex access */ + if (count < (len + 1) / 2 + 1) { + BUFFER_FAIL(count, (len + 1) / 2 + 1, + "3DPRIMITIVE random indirect"); + } + instr_out(data, hw_offset, 0, + "3DPRIMITIVE random indirect %s (%d)\n", primtype, len); + if (len == 0) { + /* vertex indices continue until 0xffff is found */ + for (i = 1; i < count; i++) { + if ((data[i] & 0xffff) == 0xffff) { + instr_out(data, hw_offset, i, + " indices: (terminator)\n"); + return i; + } else if ((data[i] >> 16) == 0xffff) { + instr_out(data, hw_offset, i, + " indices: 0x%04x, " + "(terminator)\n", + data[i] & 0xffff); + return i; + } else { + instr_out(data, hw_offset, i, + " indices: 0x%04x, 0x%04x\n", + data[i] & 0xffff, data[i] >> 16); + } + } + fprintf(out, + "3DPRIMITIVE: no terminator found in index buffer\n"); + (*failures)++; + return count; + } else { + /* fixed size vertex index buffer */ + for (i = 0; i < len; i += 2) { + if (i * 2 == len - 1) { + instr_out(data, hw_offset, i, + " indices: 0x%04x\n", + data[i] & 0xffff); + } else { + instr_out(data, hw_offset, i, + " indices: 0x%04x, 0x%04x\n", + data[i] & 0xffff, data[i] >> 16); + } + } + } + return (len + 1) / 2 + 1; + } else { + /* sequential vertex access */ + if (count < 2) + BUFFER_FAIL(count, 2, "3DPRIMITIVE seq indirect"); + instr_out(data, hw_offset, 0, + "3DPRIMITIVE sequential indirect %s, %d starting from " + "%d\n", primtype, len, data[1] & 0xffff); + instr_out(data, hw_offset, 1, " start\n"); + return 2; + } + } + + return len; +} + +static int +decode_3d(uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_3d[] = { + { 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" }, + { 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" }, + { 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" }, + { 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" }, + { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, + { 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" }, + { 0x0d, 1, 1, "3DSTATE_MODES_4" }, + { 0x0c, 1, 1, "3DSTATE_MODES_5" }, + { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, + }; + + switch ((data[0] & 0x1f000000) >> 24) { + case 0x1f: + return decode_3d_primitive(data, count, hw_offset, failures); + case 0x1d: + return decode_3d_1d(data, count, hw_offset, failures, 0); + case 0x1c: + return decode_3d_1c(data, count, hw_offset, failures); + } + + for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); + opcode++) { + if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { + unsigned int len = 1, i; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); + if (opcodes_3d[opcode].max_len > 1) { + len = (data[0] & 0xff) + 2; + if (len < opcodes_3d[opcode].min_len || + len > opcodes_3d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + return len; + } + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static const char * +get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_965_prim_type(uint32_t data) +{ + uint32_t primtype = (data >> 10) & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + +static int +decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode, len; + int i; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_3d[] = { + { 0x6000, 3, 3, "URB_FENCE" }, + { 0x6001, 2, 2, "CS_URB_STATE" }, + { 0x6002, 2, 2, "CONSTANT_BUFFER" }, + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + }; + + len = (data[0] & 0x0000ffff) + 2; + + switch ((data[0] & 0xffff0000) >> 16) { + case 0x6101: + if (len != 6) + fprintf(out, "Bad count in STATE_BASE_ADDRESS\n"); + if (count < 6) + BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS"); + + instr_out(data, hw_offset, 0, + "STATE_BASE_ADDRESS\n"); + + if (data[1] & 1) { + instr_out(data, hw_offset, 1, "General state at 0x%08x\n", + data[1] & ~1); + } else + instr_out(data, hw_offset, 1, "General state not updated\n"); + + if (data[2] & 1) { + instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n", + data[2] & ~1); + } else + instr_out(data, hw_offset, 2, "Surface state not updated\n"); + + if (data[3] & 1) { + instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n", + data[3] & ~1); + } else + instr_out(data, hw_offset, 3, "Indirect state not updated\n"); + + if (data[4] & 1) { + instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n", + data[4] & ~1); + } else + instr_out(data, hw_offset, 4, "General state not updated\n"); + + if (data[5] & 1) { + instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n", + data[5] & ~1); + } else + instr_out(data, hw_offset, 5, "Indirect state not updated\n"); + + return len; + case 0x7800: + if (len != 7) + fprintf(out, "Bad count in 3DSTATE_PIPELINED_POINTERS\n"); + if (count < 7) + BUFFER_FAIL(count, len, "3DSTATE_PIPELINED_POINTERS"); + + instr_out(data, hw_offset, 0, + "3DSTATE_PIPELINED_POINTERS\n"); + instr_out(data, hw_offset, 1, "VS state\n"); + instr_out(data, hw_offset, 2, "GS state\n"); + instr_out(data, hw_offset, 3, "Clip state\n"); + instr_out(data, hw_offset, 4, "SF state\n"); + instr_out(data, hw_offset, 5, "WM state\n"); + instr_out(data, hw_offset, 6, "CC state\n"); + return len; + case 0x7801: + if (len != 6) + fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n"); + if (count < 6) + BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS"); + + instr_out(data, hw_offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + instr_out(data, hw_offset, 1, "VS binding table\n"); + instr_out(data, hw_offset, 2, "GS binding table\n"); + instr_out(data, hw_offset, 3, "Clip binding table\n"); + instr_out(data, hw_offset, 4, "SF binding table\n"); + instr_out(data, hw_offset, 5, "WM binding table\n"); + + return len; + + case 0x7808: + len = (data[0] & 0xff) + 2; + if ((len - 1) % 4 != 0) + fprintf(out, "Bad count in 3DSTATE_VERTEX_BUFFERS\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_VERTEX_BUFFERS"); + instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + instr_out(data, hw_offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 27, + data[i] & (1 << 26) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + instr_out(data, hw_offset, i++, "buffer address\n"); + instr_out(data, hw_offset, i++, "max index\n"); + instr_out(data, hw_offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + len = (data[0] & 0xff) + 2; + if ((len + 1) % 2 != 0) + fprintf(out, "Bad count in 3DSTATE_VERTEX_ELEMENTS\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_VERTEX_ELEMENTS"); + instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + for (i = 1; i < len;) { + instr_out(data, hw_offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 27, + data[i] & (1 << 26) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + instr_out(data, hw_offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + return len; + + case 0x780a: + len = (data[0] & 0xff) + 2; + if (len != 3) + fprintf(out, "Bad count in 3DSTATE_INDEX_BUFFER\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_INDEX_BUFFER"); + instr_out(data, hw_offset, 0, "3DSTATE_INDEX_BUFFER\n"); + instr_out(data, hw_offset, 1, "beginning buffer address\n"); + instr_out(data, hw_offset, 2, "ending buffer address\n"); + return len; + + case 0x7900: + if (len != 4) + fprintf(out, "Bad count in 3DSTATE_DRAWING_RECTANGLE\n"); + if (count < 4) + BUFFER_FAIL(count, len, "3DSTATE_DRAWING_RECTANGLE"); + + instr_out(data, hw_offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + instr_out(data, hw_offset, 1, "top left: %d,%d\n", + data[1] & 0xffff, + (data[1] >> 16) & 0xffff); + instr_out(data, hw_offset, 2, "bottom right: %d,%d\n", + data[2] & 0xffff, + (data[2] >> 16) & 0xffff); + instr_out(data, hw_offset, 3, "origin: %d,%d\n", + (int)data[3] & 0xffff, + ((int)data[3] >> 16) & 0xffff); + + return len; + + case 0x7905: + if (len != 5 && len != 6) + fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER"); + + instr_out(data, hw_offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); + instr_out(data, hw_offset, 1, "%s, %s, pitch = %d bytes, %stiled\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not "); + instr_out(data, hw_offset, 2, "depth offset\n"); + instr_out(data, hw_offset, 3, "%dx%d\n", + ((data[3] & 0x0007ffc0) >> 6) + 1, + ((data[3] & 0xfff80000) >> 19) + 1); + instr_out(data, hw_offset, 4, "volume depth\n"); + if (len == 6) + instr_out(data, hw_offset, 5, "\n"); + + return len; + + case 0x7b00: + len = (data[0] & 0xff) + 2; + if (len != 6) + fprintf(out, "Bad count in 3DPRIMITIVE\n"); + if (count < len) + BUFFER_FAIL(count, len, "3DPRIMITIVE"); + + instr_out(data, hw_offset, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(data[0]), + (data[0] & (1 << 15)) ? "random" : "sequential"); + instr_out(data, hw_offset, 1, "vertex count\n"); + instr_out(data, hw_offset, 2, "start vertex\n"); + instr_out(data, hw_offset, 3, "instance count\n"); + instr_out(data, hw_offset, 4, "start instance\n"); + instr_out(data, hw_offset, 5, "index bias\n"); + return len; + } + + for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); + opcode++) { + if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) { + unsigned int i; + len = 1; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); + if (opcodes_3d[opcode].max_len > 1) { + len = (data[0] & 0xff) + 2; + if (len < opcodes_3d[opcode].min_len || + len > opcodes_3d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + return len; + } + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +static int +decode_3d_i830(uint32_t *data, int count, uint32_t hw_offset, int *failures) +{ + unsigned int opcode; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_3d[] = { + { 0x02, 1, 1, "3DSTATE_MODES_3" }, + { 0x03, 1, 1, "3DSTATE_ENABLES_1"}, + { 0x04, 1, 1, "3DSTATE_ENABLES_2"}, + { 0x05, 1, 1, "3DSTATE_VFT0"}, + { 0x06, 1, 1, "3DSTATE_AA"}, + { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, + { 0x08, 1, 1, "3DSTATE_MODES_1" }, + { 0x09, 1, 1, "3DSTATE_STENCIL_TEST" }, + { 0x0a, 1, 1, "3DSTATE_VFT1"}, + { 0x0b, 1, 1, "3DSTATE_INDPT_ALPHA_BLEND" }, + { 0x0c, 1, 1, "3DSTATE_MODES_5" }, + { 0x0d, 1, 1, "3DSTATE_MAP_BLEND_OP" }, + { 0x0e, 1, 1, "3DSTATE_MAP_BLEND_ARG" }, + { 0x0f, 1, 1, "3DSTATE_MODES_2" }, + { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, + { 0x16, 1, 1, "3DSTATE_MODES_4" }, + }; + + switch ((data[0] & 0x1f000000) >> 24) { + case 0x1f: + return decode_3d_primitive(data, count, hw_offset, failures); + case 0x1d: + return decode_3d_1d(data, count, hw_offset, failures, 1); + case 0x1c: + return decode_3d_1c(data, count, hw_offset, failures); + } + + for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]); + opcode++) { + if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) { + unsigned int len = 1, i; + + instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name); + if (opcodes_3d[opcode].max_len > 1) { + len = (data[0] & 0xff) + 2; + if (len < opcodes_3d[opcode].min_len || + len > opcodes_3d[opcode].max_len) + { + fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name); + } + } + + for (i = 1; i < len; i++) { + if (i >= count) + BUFFER_FAIL(count, len, opcodes_3d[opcode].name); + instr_out(data, hw_offset, i, "dword %d\n", i); + } + return len; + } + } + + instr_out(data, hw_offset, 0, "3D UNKNOWN\n"); + (*failures)++; + return 1; +} + +/** + * Decodes an i830-i915 batch buffer, writing the output to stdout. + * + * \param data batch buffer contents + * \param count number of DWORDs to decode in the batch buffer + * \param hw_offset hardware address for the buffer + */ +int +intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid) +{ + int index = 0; + int failures = 0; + + out = stderr; + + while (index < count) { + switch ((data[index] & 0xe0000000) >> 29) { + case 0x0: + index += decode_mi(data + index, count - index, + hw_offset + index * 4, &failures); + break; + case 0x2: + index += decode_2d(data + index, count - index, + hw_offset + index * 4, &failures); + break; + case 0x3: + if (IS_965(devid)) { + index += decode_3d_965(data + index, count - index, + hw_offset + index * 4, &failures); + } else if (IS_9XX(devid)) { + index += decode_3d(data + index, count - index, + hw_offset + index * 4, &failures); + } else { + index += decode_3d_i830(data + index, count - index, + hw_offset + index * 4, &failures); + } + break; + default: + instr_out(data, hw_offset, index, "UNKNOWN\n"); + failures++; + index++; + break; + } + fflush(out); + } + + return failures; +} + +void intel_decode_context_reset(void) +{ + saved_s2_set = 0; + saved_s4_set = 1; +} + diff --git a/src/gallium/drivers/i965/intel_decode.h b/src/gallium/drivers/i965/intel_decode.h new file mode 100644 index 0000000000..c50644a46b --- /dev/null +++ b/src/gallium/drivers/i965/intel_decode.h @@ -0,0 +1,29 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * + */ + +int intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid); +void intel_decode_context_reset(void); -- cgit v1.2.3 From cc8105d7402511c7d0ea8a07faaa8d149d9249f2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 23:09:23 +0000 Subject: i965g: hook up some vertex state funcs --- src/gallium/drivers/i965/brw_context.h | 4 ++-- src/gallium/drivers/i965/brw_pipe_vertex.c | 38 ++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index e32452f49a..d033cb0f91 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -501,12 +501,12 @@ struct brw_context const struct brw_depth_stencil_state *zstencil; const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS]; - const struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - unsigned num_vertex_elements; unsigned num_samplers; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned num_vertex_elements; unsigned num_textures; unsigned num_vertex_buffers; diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index 0b69718fd8..97e9a23688 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -1,9 +1,47 @@ #include "brw_context.h" +static void brw_set_vertex_elements( struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements ) +{ + struct brw_context *brw = brw_context(pipe); + + memcpy(brw->curr.vertex_element, elements, count * sizeof(elements[0])); + brw->curr.num_vertex_elements = count; + + brw->state.dirty.mesa |= PIPE_NEW_VERTEX_ELEMENT; +} + + +static void brw_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct brw_context *brw = brw_context(pipe); + + /* XXX: don't we need to take some references here? It's a bit + * awkward to do so, though. + */ + memcpy(brw->curr.vertex_buffer, buffers, count * sizeof(buffers[0])); + brw->curr.num_vertex_buffers = count; + + brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER; +} + +static void brw_set_edgeflags( struct pipe_context *pipe, + const unsigned *bitfield ) +{ + /* XXX */ +} + + void brw_pipe_vertex_init( struct brw_context *brw ) { + brw->base.set_vertex_buffers = brw_set_vertex_buffers; + brw->base.set_vertex_elements = brw_set_vertex_elements; + brw->base.set_edgeflags = brw_set_edgeflags; } -- cgit v1.2.3 From 1b611f99b430333e840908b42471a721689b2529 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 23:18:07 +0000 Subject: i965g: hook up some missing vertex shader code --- src/gallium/drivers/i965/brw_pipe_shader.c | 11 +++++++++-- src/gallium/drivers/i965/brw_vs_emit.c | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 8e10edb459..2833f2bce0 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -128,12 +128,19 @@ static void *brw_create_vs_state( struct pipe_context *pipe, vs->id = brw->program_id++; //vs->has_flow_control = brw_wm_has_flow_control(vs); - /* Tell the draw module about this shader: - */ + vs->tokens = tgsi_dup_tokens(shader->tokens); + if (vs->tokens == NULL) + goto fail; + + tgsi_scan_shader(vs->tokens, &vs->info); /* Done: */ return (void *)vs; + +fail: + FREE(vs); + return NULL; } diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index bcc5c5f713..95e2b8e2cb 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -90,7 +90,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* XXX: immediates can go elsewhere if necessary: */ assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + - c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 > BRW_MAX_GRF); + c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 <= BRW_MAX_GRF); c->vp->use_const_buffer = GL_FALSE; } -- cgit v1.2.3 From bf4a518cf27910fe2bb828fd43de5472e5e51760 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 23:22:48 +0000 Subject: i965g: clean up wm init_registers func --- src/gallium/drivers/i965/brw_wm_pass2.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c index d3d678a5e6..a5574bd1a3 100644 --- a/src/gallium/drivers/i965/brw_wm_pass2.c +++ b/src/gallium/drivers/i965/brw_wm_pass2.c @@ -69,30 +69,32 @@ static void prealloc_reg(struct brw_wm_compile *c, */ static void init_registers( struct brw_wm_compile *c ) { - GLuint nr_interp_regs = 0; - GLuint i = 0; + GLuint reg = 0; GLuint j; for (j = 0; j < c->grf_limit; j++) c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN; + /* Pre-allocate incoming payload regs: + */ for (j = 0; j < c->key.nr_depth_regs; j++) - prealloc_reg(c, &c->payload.depth[j], i++); + prealloc_reg(c, &c->payload.depth[j], reg++); for (j = 0; j < c->nr_creg; j++) - prealloc_reg(c, &c->creg[j], i++); + prealloc_reg(c, &c->creg[j], reg++); - for (j = 0; j < c->key.vp_nr_outputs; j++) { - prealloc_reg(c, &c->payload.input_interp[j], i++); - } + for (j = 0; j < c->key.vp_nr_outputs; j++) + prealloc_reg(c, &c->payload.input_interp[j], reg++); - assert(nr_interp_regs >= 1); + assert(c->key.vp_nr_outputs >= 1); c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; c->prog_data.urb_read_length = c->key.vp_nr_outputs * 2; c->prog_data.curb_read_length = c->nr_creg * 2; - c->max_wm_grf = i * 2; + /* Note this allocation: + */ + c->max_wm_grf = reg * 2; } -- cgit v1.2.3 From 518171a887437e4d3fc2c8cea871862afb63c11c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 23:27:30 +0000 Subject: i965g: init pointer to null, avoid segfault --- src/gallium/drivers/i965/brw_draw_upload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index f0b7c741c0..6e12e8f4b3 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -202,7 +202,7 @@ static int brw_prepare_vertices(struct brw_context *brw) for (i = 0; i < brw->curr.num_vertex_buffers; i++) { struct pipe_vertex_buffer *vb = &brw->curr.vertex_buffer[i]; struct brw_winsys_buffer *bo; - struct pipe_buffer *upload_buf; + struct pipe_buffer *upload_buf = NULL; unsigned offset; if (BRW_DEBUG & DEBUG_VERTS) -- cgit v1.2.3 From e3e084c66089704a36f28dfb2bc4b17e5c5ce046 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 23:27:50 +0000 Subject: i965g: initialize winsys pointer in surface cache --- src/gallium/drivers/i965/brw_state_cache.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c index 1cb1b5e721..071a942e5c 100644 --- a/src/gallium/drivers/i965/brw_state_cache.c +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -462,6 +462,7 @@ brw_init_surface_cache(struct brw_context *brw) struct brw_cache *cache = &brw->surface_cache; cache->brw = brw; + cache->sws = brw->sws; cache->size = 7; cache->n_items = 0; -- cgit v1.2.3 From 7a49bd6d15d7778db637340d695095dafb43a7fe Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 23:30:52 +0000 Subject: i965g: remove redundant screen pointer in brw context struct --- src/gallium/drivers/i965/brw_context.h | 1 - src/gallium/drivers/i965/brw_curbe.c | 2 +- src/gallium/drivers/i965/brw_draw.c | 4 ++-- src/gallium/drivers/i965/brw_pipe_flush.c | 6 ++++-- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index d033cb0f91..0c1dcf8a14 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -483,7 +483,6 @@ struct brw_context struct pipe_context base; struct brw_chipset chipset; - struct brw_screen *brw_screen; struct brw_winsys_screen *sws; struct brw_batchbuffer *batch; diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 5763173bca..f62b0b0d5e 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -224,7 +224,7 @@ static int prepare_curbe_buffer(struct brw_context *brw) if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT]; - struct pipe_screen *screen = &brw->brw_screen->base; + struct pipe_screen *screen = brw->base.screen; const GLfloat *value = screen->buffer_map( screen, brw->curr.vertex_constants, diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index a2bed6256b..6d6b1c7c5c 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -258,14 +258,14 @@ boolean brw_draw_init( struct brw_context *brw ) /* Create helpers for uploading data in user buffers: */ - brw->vb.upload_vertex = u_upload_create( &brw->brw_screen->base, + brw->vb.upload_vertex = u_upload_create( brw->base.screen, 128 * 1024, 64, PIPE_BUFFER_USAGE_VERTEX ); if (brw->vb.upload_vertex == NULL) return FALSE; - brw->vb.upload_index = u_upload_create( &brw->brw_screen->base, + brw->vb.upload_index = u_upload_create( brw->base.screen, 128 * 1024, 64, PIPE_BUFFER_USAGE_INDEX ); diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c index 9dff2beeb1..fdc4814b22 100644 --- a/src/gallium/drivers/i965/brw_pipe_flush.c +++ b/src/gallium/drivers/i965/brw_pipe_flush.c @@ -50,8 +50,9 @@ static unsigned brw_is_buffer_referenced(struct pipe_context *pipe, struct pipe_buffer *buffer) { struct brw_context *brw = brw_context(pipe); + struct brw_screen *bscreen = brw_screen(brw->base.screen); - return brw_is_buffer_referenced_by_bo( brw->brw_screen, + return brw_is_buffer_referenced_by_bo( bscreen, buffer, brw->batch->buf ); } @@ -62,8 +63,9 @@ static unsigned brw_is_texture_referenced(struct pipe_context *pipe, unsigned level) { struct brw_context *brw = brw_context(pipe); + struct brw_screen *bscreen = brw_screen(brw->base.screen); - return brw_is_texture_referenced_by_bo( brw->brw_screen, + return brw_is_texture_referenced_by_bo( bscreen, texture, face, level, brw->batch->buf ); } -- cgit v1.2.3 From e84e86ecb2e83b756a0153d315f946d60d695a54 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 23:33:08 +0000 Subject: i965g: fix some asserts --- src/gallium/drivers/i965/brw_state_upload.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index 4132c6ac69..a71af4d2b9 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -193,8 +193,8 @@ enum pipe_error brw_validate_state( struct brw_context *brw ) { const struct brw_fragment_shader *fp = brw->curr.fragment_shader; if (fp) { - assert(fp->info.file_max[TGSI_FILE_SAMPLER] < brw->curr.num_samplers && - fp->info.texture_max < brw->curr.num_textures); + assert(fp->info.file_max[TGSI_FILE_SAMPLER] < (int)brw->curr.num_samplers); + assert(fp->info.texture_max <= brw->curr.num_textures); } } -- cgit v1.2.3 From 220566d8dc4ff023ef833fd6519ab7b187e598d2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 23:37:52 +0000 Subject: i965g: consolidate some includes --- src/gallium/drivers/i965/brw_batchbuffer.c | 2 -- src/gallium/drivers/i965/brw_batchbuffer.h | 2 -- src/gallium/drivers/i965/brw_draw_upload.c | 1 - src/gallium/drivers/i965/brw_pipe_clear.c | 1 - src/gallium/drivers/i965/brw_winsys.h | 7 ++++--- src/gallium/drivers/i965/brw_wm.c | 2 -- src/gallium/drivers/i965/brw_wm_fp.c | 1 - src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 5 +++-- 8 files changed, 7 insertions(+), 14 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index 64d6754df5..673bd1ed44 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -27,8 +27,6 @@ #include "util/u_memory.h" -#include "pipe/p_error.h" - #include "brw_batchbuffer.h" //#include "brw_decode.h" #include "brw_reg.h" diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index b051638296..781cd698e4 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -3,8 +3,6 @@ #include "util/u_debug.h" -#include "pipe/p_error.h" - #include "brw_types.h" #include "brw_winsys.h" #include "brw_reg.h" diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index 6e12e8f4b3..acebd44080 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -26,7 +26,6 @@ **************************************************************************/ #include "pipe/p_context.h" -#include "pipe/p_error.h" #include "util/u_upload_mgr.h" #include "util/u_math.h" diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c index 69bc95e51a..34cad62977 100644 --- a/src/gallium/drivers/i965/brw_pipe_clear.c +++ b/src/gallium/drivers/i965/brw_pipe_clear.c @@ -27,7 +27,6 @@ #include "util/u_pack_color.h" -#include "pipe/p_error.h" #include "pipe/p_state.h" #include "brw_batchbuffer.h" diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index b2ba3e86f9..f5ce9d13d7 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -27,6 +27,7 @@ #define BRW_WINSYS_H #include "pipe/p_compiler.h" +#include "pipe/p_error.h" struct brw_winsys; struct pipe_fence_handle; @@ -123,9 +124,9 @@ struct brw_winsys_screen { /* XXX: couldn't this be handled by returning true/false on * bo_emit_reloc? */ - boolean (*check_aperture_space)( struct brw_winsys_screen *iws, - struct brw_winsys_buffer **buffers, - unsigned count ); + enum pipe_error (*check_aperture_space)( struct brw_winsys_screen *iws, + struct brw_winsys_buffer **buffers, + unsigned count ); /** * Map a buffer. diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 90780272da..815ae8c51a 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -28,8 +28,6 @@ * Authors: * Keith Whitwell */ -#include "pipe/p_error.h" - #include "tgsi/tgsi_info.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 58f1d35b7d..bba448815b 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -31,7 +31,6 @@ #include "pipe/p_shader_tokens.h" -#include "pipe/p_error.h" #include "util/u_math.h" #include "util/u_memory.h" diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c index 08fce4b20b..71d8f4bafc 100644 --- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c +++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c @@ -35,6 +35,7 @@ #include "util/u_memory.h" #include "util/u_math.h" +#include "pipe/p_error.h" #include "pipe/p_context.h" #include "xm_winsys.h" @@ -226,7 +227,7 @@ xlib_brw_bo_references(struct brw_winsys_buffer *a, return TRUE; } -static boolean +static enum pipe_error xlib_brw_check_aperture_space( struct brw_winsys_screen *iws, struct brw_winsys_buffer **buffers, unsigned count ) @@ -241,7 +242,7 @@ xlib_brw_check_aperture_space( struct brw_winsys_screen *iws, __FUNCTION__, count, (tot_size + 1023) / 1024); - return TRUE; + return PIPE_OK; } static void * -- cgit v1.2.3 From c2e51effe6228aa2fe6610c695b494e86490bc80 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 23:41:30 +0000 Subject: i965g: fix order of calculation of brw->wm.nr_surfaces --- src/gallium/drivers/i965/brw_wm_surface_state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index 6c29db045f..b055dde20c 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -215,14 +215,14 @@ static int prepare_wm_surfaces(struct brw_context *brw ) } #endif - brw->sws->bo_unreference(brw->wm.bind_bo); - brw->wm.bind_bo = brw_wm_get_binding_table(brw); - if (brw->wm.nr_surfaces != nr_surfaces) { brw->wm.nr_surfaces = nr_surfaces; brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; } + brw->sws->bo_unreference(brw->wm.bind_bo); + brw->wm.bind_bo = brw_wm_get_binding_table(brw); + return 0; } -- cgit v1.2.3 From ffc24463913071bbb0fa9cab9a05ea7a089c56a0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Nov 2009 23:44:16 +0000 Subject: i965g: enable line stipple packet emit With this change, trivial/tri manages to build and emit a fairly unconvincing command buffer (to the debug winsys), and then crashes. --- src/gallium/drivers/i965/brw_misc_state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index e786ea1100..b562eb7152 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -37,6 +37,7 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_screen.h" +#include "brw_pipe_rast.h" @@ -342,7 +343,7 @@ const struct brw_tracked_state brw_polygon_stipple = { static int upload_line_stipple(struct brw_context *brw) { - struct brw_line_stipple *bls = NULL; //brw->curr.rast->bls; + struct brw_line_stipple *bls = &brw->curr.rast->bls; BRW_CACHED_BATCH_STRUCT(brw, bls); return 0; } -- cgit v1.2.3 From 643bb3419d7d342436cc54603e51467153d5d030 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 5 Nov 2009 00:38:51 +0000 Subject: i965g: Fix debug check --- src/gallium/drivers/i965/brw_draw_upload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index acebd44080..4fa7d549eb 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -426,7 +426,7 @@ static int brw_prepare_indices(struct brw_context *brw) if (index_buffer == NULL) return 0; - if (DEBUG & DEBUG_VERTS) + if (BRW_DEBUG & DEBUG_VERTS) debug_printf("%s: index_size:%d index_buffer->size:%d\n", __FUNCTION__, brw->curr.index_size, -- cgit v1.2.3 From 47cbbb7253f89ff165c4953758efaaca19adf16f Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 5 Nov 2009 00:42:30 +0000 Subject: i965g: Builds with scons But there are some missing symbols, "nm -u i965_dri.so" [SNIP] U brw_surface_bo U brw_surface_pitch U brw_texture_blanket_winsys_buffer U brw_texture_get_winsys_buffer U brw_update_dirty_counts [SNIP] --- SConstruct | 8 ++-- src/gallium/drivers/i965/SConscript | 77 +++++++++++++++++++++++++++++++++++++ src/gallium/winsys/drm/SConscript | 5 +++ 3 files changed, 86 insertions(+), 4 deletions(-) create mode 100644 src/gallium/drivers/i965/SConscript (limited to 'src/gallium/drivers/i965') diff --git a/SConstruct b/SConstruct index d53f4401e5..d4db812db5 100644 --- a/SConstruct +++ b/SConstruct @@ -32,10 +32,10 @@ import common default_statetrackers = 'mesa' if common.default_platform in ('linux', 'freebsd', 'darwin'): - default_drivers = 'softpipe,failover,i915,trace,identity,llvmpipe' + default_drivers = 'softpipe,failover,i915,i965,trace,identity,llvmpipe' default_winsys = 'xlib' elif common.default_platform in ('winddk',): - default_drivers = 'softpipe,i915,trace,identity' + default_drivers = 'softpipe,i915,i965,trace,identity' default_winsys = 'all' else: default_drivers = 'all' @@ -46,9 +46,9 @@ common.AddOptions(opts) opts.Add(ListVariable('statetrackers', 'state trackers to build', default_statetrackers, ['mesa', 'python', 'xorg'])) opts.Add(ListVariable('drivers', 'pipe drivers to build', default_drivers, - ['softpipe', 'failover', 'i915', 'cell', 'trace', 'r300', 'identity', 'llvmpipe'])) + ['softpipe', 'failover', 'i915', 'i965', 'cell', 'trace', 'r300', 'identity', 'llvmpipe'])) opts.Add(ListVariable('winsys', 'winsys drivers to build', default_winsys, - ['xlib', 'intel', 'gdi', 'radeon'])) + ['xlib', 'intel', 'i965', 'gdi', 'radeon'])) opts.Add(EnumVariable('MSVS_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0'))) diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript new file mode 100644 index 0000000000..c517b08ec5 --- /dev/null +++ b/src/gallium/drivers/i965/SConscript @@ -0,0 +1,77 @@ +Import('*') + +env = env.Clone() + +i965 = env.ConvenienceLibrary( + target = 'i965', + source = [ + 'brw_batchbuffer.c', + 'brw_cc.c', + 'brw_clip.c', + 'brw_clip_line.c', + 'brw_clip_point.c', + 'brw_clip_state.c', + 'brw_clip_tri.c', + 'brw_clip_unfilled.c', + 'brw_clip_util.c', + 'brw_context.c', + 'brw_curbe.c', + 'brw_disasm.c', + 'brw_draw.c', + 'brw_draw_upload.c', + 'brw_eu.c', + 'brw_eu_debug.c', + 'brw_eu_emit.c', + 'brw_eu_util.c', + 'brw_gs.c', + 'brw_gs_emit.c', + 'brw_gs_state.c', + 'brw_misc_state.c', + 'brw_pipe_blend.c', + 'brw_pipe_clear.c', + 'brw_pipe_depth.c', + 'brw_pipe_fb.c', + 'brw_pipe_flush.c', + 'brw_pipe_misc.c', + 'brw_pipe_query.c', + 'brw_pipe_rast.c', + 'brw_pipe_sampler.c', + 'brw_pipe_shader.c', + 'brw_pipe_vertex.c', + 'brw_screen_buffers.c', + 'brw_screen.c', + 'brw_screen_surface.c', + 'brw_screen_tex_layout.c', + 'brw_screen_texture.c', + 'brw_sf.c', + 'brw_sf_emit.c', + 'brw_sf_state.c', + 'brw_state_batch.c', + 'brw_state_cache.c', +# 'brw_state_debug.c', + 'brw_state_dump.c', + 'brw_state_upload.c', + 'brw_swtnl.c', + 'brw_urb.c', + 'brw_util.c', + 'brw_vs.c', + 'brw_vs_emit.c', + 'brw_vs_state.c', + 'brw_vs_surface_state.c', + 'brw_wm.c', +# 'brw_wm_constant_buffer.c', + 'brw_wm_debug.c', + 'brw_wm_emit.c', + 'brw_wm_fp.c', +# 'brw_wm_glsl.c', + 'brw_wm_iz.c', + 'brw_wm_pass0.c', + 'brw_wm_pass1.c', + 'brw_wm_pass2.c', + 'brw_wm_sampler_state.c', + 'brw_wm_state.c', + 'brw_wm_surface_state.c', + 'intel_decode.c', + ]) + +Export('i965') diff --git a/src/gallium/winsys/drm/SConscript b/src/gallium/winsys/drm/SConscript index a9e9f2682a..ba389d8ed3 100644 --- a/src/gallium/winsys/drm/SConscript +++ b/src/gallium/winsys/drm/SConscript @@ -53,6 +53,11 @@ if env['dri']: 'intel/SConscript', ]) + if 'i965' in env['winsys']: + SConscript([ + 'i965/SConscript', + ]) + if 'radeon' in env['winsys']: SConscript([ 'radeon/SConscript', -- cgit v1.2.3 From 2475e5db679a70c4a3868dc07037d009865a6694 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 08:00:33 +0000 Subject: i965g: fix compiler warning --- src/gallium/drivers/i965/brw_misc_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index b562eb7152..8e35f9ad1d 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -343,7 +343,7 @@ const struct brw_tracked_state brw_polygon_stipple = { static int upload_line_stipple(struct brw_context *brw) { - struct brw_line_stipple *bls = &brw->curr.rast->bls; + const struct brw_line_stipple *bls = &brw->curr.rast->bls; BRW_CACHED_BATCH_STRUCT(brw, bls); return 0; } -- cgit v1.2.3 From b2bf5f98d923b8d52473e069576fc6514c0ffd0a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 08:01:48 +0000 Subject: i965g: use pipe_error return value for brw_batchbuffer_require_space trivial/tri runs without crashing (on debug winsys) but still produces obviously incorrect command buffers. --- src/gallium/drivers/i965/brw_batchbuffer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index 781cd698e4..1f04826aea 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -103,19 +103,19 @@ brw_batchbuffer_emit_dword(struct brw_batchbuffer *batch, GLuint dword) batch->ptr += 4; } -static INLINE boolean +static INLINE enum pipe_error brw_batchbuffer_require_space(struct brw_batchbuffer *batch, GLuint sz) { assert(sz < batch->size - 8); if (brw_batchbuffer_space(batch) < sz) { assert(0); - return FALSE; + return PIPE_ERROR_OUT_OF_MEMORY; } #ifdef DEBUG batch->emit.end_ptr = batch->ptr + sz; #endif - return TRUE; + return 0; } /* Here are the crusty old macros, to be removed: -- cgit v1.2.3 From 6ac38232ee1ebde5ed390e3ccc22cba59ad00854 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 10:59:02 +0000 Subject: i965g: add data type tags to aid dumping/decoding --- src/gallium/drivers/i965/brw_batchbuffer.c | 17 +++-- src/gallium/drivers/i965/brw_context.h | 40 ++++++----- src/gallium/drivers/i965/brw_curbe.c | 1 + src/gallium/drivers/i965/brw_pipe_query.c | 2 +- src/gallium/drivers/i965/brw_screen_buffers.c | 13 ++-- src/gallium/drivers/i965/brw_screen_texture.c | 18 ++--- src/gallium/drivers/i965/brw_state_cache.c | 23 +++---- src/gallium/drivers/i965/brw_state_dump.c | 14 ++-- src/gallium/drivers/i965/brw_winsys.h | 69 ++++++++++++------- src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 95 +++++++++++++++++---------- 10 files changed, 182 insertions(+), 110 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index 673bd1ed44..ca612e5ed0 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -53,7 +53,9 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch) if (batch->malloc_buffer) batch->map = batch->malloc_buffer; else - batch->map = batch->sws->bo_map(batch->buf, GL_TRUE); + batch->map = batch->sws->bo_map(batch->buf, + BRW_DATA_OTHER, + GL_TRUE); batch->size = BRW_BATCH_SIZE; batch->ptr = batch->map; @@ -132,7 +134,10 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, used = batch->ptr - batch->map; if (batch->use_malloc_buffer) { - batch->sws->bo_subdata(batch->buf, 0, used, batch->map ); + batch->sws->bo_subdata(batch->buf, + BRW_DATA_OTHER, + 0, used, + batch->map ); batch->map = NULL; } else { @@ -145,7 +150,9 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, batch->sws->bo_exec(batch->buf, used ); if (1 /*BRW_DEBUG & DEBUG_BATCH*/) { - void *ptr = batch->sws->bo_map(batch->buf, GL_FALSE); + void *ptr = batch->sws->bo_map(batch->buf, + BRW_DATA_OTHER, + GL_FALSE); intel_decode(ptr, used / 4, @@ -162,7 +169,9 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, * interface. */ debug_printf("waiting for idle\n"); - batch->sws->bo_map(batch->buf, GL_TRUE); + batch->sws->bo_map(batch->buf, + BRW_DATA_OTHER, + GL_TRUE); batch->sws->bo_unmap(batch->buf); } diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 0c1dcf8a14..09d34615c7 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -347,25 +347,27 @@ struct brw_vs_ouput_sizes { #define SURF_INDEX_VERT_CONST_BUFFER 0 +/* Bit of a hack to align these with the winsys buffer_data_type enum. + */ enum brw_cache_id { - BRW_CC_VP, - BRW_CC_UNIT, - BRW_WM_PROG, - BRW_SAMPLER_DEFAULT_COLOR, - BRW_SAMPLER, - BRW_WM_UNIT, - BRW_SF_PROG, - BRW_SF_VP, - BRW_SF_UNIT, - BRW_VS_UNIT, - BRW_VS_PROG, - BRW_GS_UNIT, - BRW_GS_PROG, - BRW_CLIP_VP, - BRW_CLIP_UNIT, - BRW_CLIP_PROG, - BRW_SS_SURFACE, - BRW_SS_SURF_BIND, + BRW_CC_VP = BRW_DATA_GS_CC_VP, + BRW_CC_UNIT = BRW_DATA_GS_CC_UNIT, + BRW_WM_PROG = BRW_DATA_GS_WM_PROG, + BRW_SAMPLER_DEFAULT_COLOR = BRW_DATA_GS_SAMPLER_DEFAULT_COLOR, + BRW_SAMPLER = BRW_DATA_GS_SAMPLER, + BRW_WM_UNIT = BRW_DATA_GS_WM_UNIT, + BRW_SF_PROG = BRW_DATA_GS_SF_PROG, + BRW_SF_VP = BRW_DATA_GS_SF_VP, + BRW_SF_UNIT = BRW_DATA_GS_SF_UNIT, + BRW_VS_UNIT = BRW_DATA_GS_VS_UNIT, + BRW_VS_PROG = BRW_DATA_GS_VS_PROG, + BRW_GS_UNIT = BRW_DATA_GS_GS_UNIT, + BRW_GS_PROG = BRW_DATA_GS_GS_PROG, + BRW_CLIP_VP = BRW_DATA_GS_CLIP_VP, + BRW_CLIP_UNIT = BRW_DATA_GS_CLIP_UNIT, + BRW_CLIP_PROG = BRW_DATA_GS_CLIP_PROG, + BRW_SS_SURFACE = BRW_DATA_SS_SURFACE, + BRW_SS_SURF_BIND = BRW_DATA_SS_SURF_BIND, BRW_MAX_CACHE }; @@ -399,6 +401,8 @@ struct brw_cache { struct brw_cache_item **items; GLuint size, n_items; + enum brw_buffer_type buffer_type; + GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */ GLuint aux_size[BRW_MAX_CACHE]; char *name[BRW_MAX_CACHE]; diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index f62b0b0d5e..1e2e232204 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -289,6 +289,7 @@ static int prepare_curbe_buffer(struct brw_context *brw) */ brw->sws->bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, + BRW_DATA_OTHER, bufsz, buf); } diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c index d3e173f5ec..3370ebd262 100644 --- a/src/gallium/drivers/i965/brw_pipe_query.c +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -63,7 +63,7 @@ brw_query_get_result(struct pipe_context *pipe, if (brw->sws->bo_is_busy(query->bo) && !wait) return FALSE; - map = brw->sws->bo_map(query->bo, GL_FALSE); + map = brw->sws->bo_map(query->bo, BRW_DATA_OTHER, GL_FALSE); if (map == NULL) return FALSE; diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c index c0f19d64aa..ba54740225 100644 --- a/src/gallium/drivers/i965/brw_screen_buffers.c +++ b/src/gallium/drivers/i965/brw_screen_buffers.c @@ -24,6 +24,7 @@ brw_buffer_map( struct pipe_screen *screen, return buf->user_buffer; return sws->bo_map( buf->bo, + BRW_DATA_OTHER, (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE ); } @@ -64,7 +65,7 @@ brw_buffer_create(struct pipe_screen *screen, struct brw_screen *bscreen = brw_screen(screen); struct brw_winsys_screen *sws = bscreen->sws; struct brw_buffer *buf; - unsigned usage_type; + unsigned buffer_type; buf = CALLOC_STRUCT(brw_buffer); if (!buf) @@ -84,24 +85,24 @@ brw_buffer_create(struct pipe_screen *screen, case PIPE_BUFFER_USAGE_VERTEX: case PIPE_BUFFER_USAGE_INDEX: case (PIPE_BUFFER_USAGE_VERTEX|PIPE_BUFFER_USAGE_INDEX): - usage_type = BRW_BUFFER_TYPE_VERTEX; + buffer_type = BRW_BUFFER_TYPE_VERTEX; break; case PIPE_BUFFER_USAGE_PIXEL: - usage_type = BRW_BUFFER_TYPE_PIXEL; + buffer_type = BRW_BUFFER_TYPE_PIXEL; break; case PIPE_BUFFER_USAGE_CONSTANT: - usage_type = BRW_BUFFER_TYPE_SHADER_CONSTANTS; + buffer_type = BRW_BUFFER_TYPE_SHADER_CONSTANTS; break; default: - usage_type = BRW_BUFFER_TYPE_GENERIC; + buffer_type = BRW_BUFFER_TYPE_GENERIC; break; } buf->bo = sws->bo_alloc( sws, - usage_type, + buffer_type, size, alignment ); diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index c318b07f97..ba6dc7dfde 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -186,6 +186,7 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, { struct brw_screen *bscreen = brw_screen(screen); struct brw_texture *tex; + enum brw_buffer_type buffer_type; tex = CALLOC_STRUCT(brw_texture); if (tex == NULL) @@ -226,21 +227,16 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, goto fail; - if (templ->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { - } - else if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY)) { - } - else if (templ->tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) { - } - else if (templ->tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) { + if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_PRIMARY)) { + buffer_type = BRW_BUFFER_TYPE_SCANOUT; } - - if (templ->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) { + else { + buffer_type = BRW_BUFFER_TYPE_TEXTURE; } tex->bo = bscreen->sws->bo_alloc( bscreen->sws, - BRW_USAGE_SAMPLER, + buffer_type, tex->pitch * tex->total_height * tex->cpp, 64 ); diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c index 071a942e5c..cbd1f02d77 100644 --- a/src/gallium/drivers/i965/brw_state_cache.c +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -228,7 +228,7 @@ brw_upload_cache( struct brw_cache *cache, * these various entities. */ bo = cache->sws->bo_alloc(cache->sws, - BRW_BUFFER_TYPE_STATE_CACHE, + cache->buffer_type, data_size, 1 << 6); @@ -273,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache, data_size, cache_id); /* Copy data to the buffer */ - cache->sws->bo_subdata(bo, 0, data_size, data); + cache->sws->bo_subdata(bo, + cache_id, + 0, data_size, data); update_cache_last(cache, cache_id, bo); @@ -332,11 +334,6 @@ brw_cache_data(struct brw_cache *cache, reloc_bufs, nr_reloc_bufs); } -enum pool_type { - DW_SURFACE_STATE, - DW_GENERAL_STATE -}; - static void brw_init_cache_id(struct brw_cache *cache, @@ -352,13 +349,15 @@ brw_init_cache_id(struct brw_cache *cache, static void -brw_init_non_surface_cache(struct brw_context *brw) +brw_init_general_state_cache(struct brw_context *brw) { struct brw_cache *cache = &brw->cache; cache->brw = brw; cache->sws = brw->sws; + cache->buffer_type = BRW_BUFFER_TYPE_GENERAL_STATE; + cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) @@ -457,13 +456,15 @@ brw_init_non_surface_cache(struct brw_context *brw) static void -brw_init_surface_cache(struct brw_context *brw) +brw_init_surface_state_cache(struct brw_context *brw) { struct brw_cache *cache = &brw->surface_cache; cache->brw = brw; cache->sws = brw->sws; + cache->buffer_type = BRW_BUFFER_TYPE_SURFACE_STATE; + cache->size = 7; cache->n_items = 0; cache->items = (struct brw_cache_item **) @@ -486,8 +487,8 @@ brw_init_surface_cache(struct brw_context *brw) void brw_init_caches(struct brw_context *brw) { - brw_init_non_surface_cache(brw); - brw_init_surface_cache(brw); + brw_init_general_state_cache(brw); + brw_init_surface_state_cache(brw); } diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c index 345e42a6b2..388331ee62 100644 --- a/src/gallium/drivers/i965/brw_state_dump.c +++ b/src/gallium/drivers/i965/brw_state_dump.c @@ -65,7 +65,7 @@ state_struct_out(struct brw_winsys_screen *sws, if (buffer == NULL) return; - data = sws->bo_map(buffer, GL_FALSE); + data = sws->bo_map(buffer, BRW_DATA_OTHER, GL_FALSE); for (i = 0; i < state_size / 4; i++) { state_out(name, data, buffer->offset[0], i, "dword %d\n", i); @@ -114,7 +114,9 @@ static void dump_wm_surface_state(struct brw_context *brw) debug_printf(" WM SS%d: NULL\n", i); continue; } - surf = (struct brw_surface_state *)brw->sws->bo_map(surf_bo, GL_FALSE); + surf = (struct brw_surface_state *)brw->sws->bo_map(surf_bo, + BRW_DATA_OTHER, + GL_FALSE); surfoff = surf_bo->offset[0]; sprintf(name, "WM SS%d", i); @@ -144,7 +146,9 @@ static void dump_sf_viewport_state(struct brw_context *brw) if (brw->sf.vp_bo == NULL) return; - vp = (struct brw_sf_viewport *)brw->sws->bo_map(brw->sf.vp_bo, GL_FALSE); + vp = (struct brw_sf_viewport *)brw->sws->bo_map(brw->sf.vp_bo, + BRW_DATA_OTHER, + GL_FALSE); vp_off = brw->sf.vp_bo->offset[0]; state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); @@ -172,7 +176,9 @@ static void brw_debug_prog(struct brw_winsys_screen *sws, if (prog == NULL) return; - data = (uint32_t *)sws->bo_map(prog, GL_FALSE); + data = (uint32_t *)sws->bo_map(prog, + BRW_DATA_OTHER, + GL_FALSE); for (i = 0; i < prog->size / 4 / 4; i++) { debug_printf("%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index f5ce9d13d7..d941fbcebe 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -44,21 +44,6 @@ struct brw_winsys_buffer { unsigned size; }; -/* Describe the usage of a particular buffer in a relocation. The DRM - * winsys will translate these back to GEM read/write domain flags. - */ -enum brw_buffer_usage { - BRW_USAGE_STATE, /* INSTRUCTION, 0 */ - BRW_USAGE_QUERY_RESULT, /* INSTRUCTION, INSTRUCTION */ - BRW_USAGE_RENDER_TARGET, /* RENDER, 0 */ - BRW_USAGE_DEPTH_BUFFER, /* RENDER, RENDER */ - BRW_USAGE_BLIT_SOURCE, /* RENDER, 0 */ - BRW_USAGE_BLIT_DEST, /* RENDER, RENDER */ - BRW_USAGE_SAMPLER, /* SAMPLER, 0 */ - BRW_USAGE_VERTEX, /* VERTEX, 0 */ - BRW_USAGE_SCRATCH, /* 0, 0 */ - BRW_USAGE_MAX -}; /* Should be possible to validate usages above against buffer creation * types, below: @@ -73,12 +58,53 @@ enum brw_buffer_type BRW_BUFFER_TYPE_SHADER_CONSTANTS, BRW_BUFFER_TYPE_SHADER_SCRATCH, BRW_BUFFER_TYPE_BATCH, - BRW_BUFFER_TYPE_STATE_CACHE, + BRW_BUFFER_TYPE_GENERAL_STATE, + BRW_BUFFER_TYPE_SURFACE_STATE, BRW_BUFFER_TYPE_PIXEL, /* image uploads, pbo's, etc */ BRW_BUFFER_TYPE_GENERIC, /* unknown */ BRW_BUFFER_TYPE_MAX /* Count of possible values */ }; + +/* Describe the usage of a particular buffer in a relocation. The DRM + * winsys will translate these back to GEM read/write domain flags. + */ +enum brw_buffer_usage { + BRW_USAGE_STATE, /* INSTRUCTION, 0 */ + BRW_USAGE_QUERY_RESULT, /* INSTRUCTION, INSTRUCTION */ + BRW_USAGE_RENDER_TARGET, /* RENDER, 0 */ + BRW_USAGE_DEPTH_BUFFER, /* RENDER, RENDER */ + BRW_USAGE_BLIT_SOURCE, /* RENDER, 0 */ + BRW_USAGE_BLIT_DEST, /* RENDER, RENDER */ + BRW_USAGE_SAMPLER, /* SAMPLER, 0 */ + BRW_USAGE_VERTEX, /* VERTEX, 0 */ + BRW_USAGE_SCRATCH, /* 0, 0 */ + BRW_USAGE_MAX +}; + +enum brw_buffer_data_type { + BRW_DATA_GS_CC_VP, + BRW_DATA_GS_CC_UNIT, + BRW_DATA_GS_WM_PROG, + BRW_DATA_GS_SAMPLER_DEFAULT_COLOR, + BRW_DATA_GS_SAMPLER, + BRW_DATA_GS_WM_UNIT, + BRW_DATA_GS_SF_PROG, + BRW_DATA_GS_SF_VP, + BRW_DATA_GS_SF_UNIT, + BRW_DATA_GS_VS_UNIT, + BRW_DATA_GS_VS_PROG, + BRW_DATA_GS_GS_UNIT, + BRW_DATA_GS_GS_PROG, + BRW_DATA_GS_CLIP_VP, + BRW_DATA_GS_CLIP_UNIT, + BRW_DATA_GS_CLIP_PROG, + BRW_DATA_SS_SURFACE, + BRW_DATA_SS_SURF_BIND, + BRW_DATA_OTHER, + BRW_DATA_MAX +}; + struct brw_winsys_screen { @@ -113,9 +139,10 @@ struct brw_winsys_screen { unsigned bytes_used ); int (*bo_subdata)(struct brw_winsys_buffer *buffer, - size_t offset, - size_t size, - const void *data); + enum brw_buffer_data_type data_type, + size_t offset, + size_t size, + const void *data); boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer); boolean (*bo_references)(struct brw_winsys_buffer *a, @@ -132,6 +159,7 @@ struct brw_winsys_screen { * Map a buffer. */ void *(*bo_map)(struct brw_winsys_buffer *buffer, + enum brw_buffer_data_type data_type, boolean write); /** @@ -140,9 +168,6 @@ struct brw_winsys_screen { void (*bo_unmap)(struct brw_winsys_buffer *buffer); /*@}*/ - - - /** * Destroy the winsys. */ diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c index 71d8f4bafc..4fe5db4033 100644 --- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c +++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c @@ -82,31 +82,57 @@ xlib_brw_buffer( struct brw_winsys_buffer *buffer ) const char *names[BRW_BUFFER_TYPE_MAX] = { - "texture", - "scanout", - "vertex", - "curbe", - "query", - "shader_constants", - "wm_scratch", - "batch", - "state_cache", - "pixel", - "generic", + "TEXTURE", + "SCANOUT", + "VERTEX", + "CURBE", + "QUERY", + "SHADER_CONSTANTS", + "WM_SCRATCH", + "BATCH", + "GENERAL_STATE", + "SURFACE_STATE", + "PIXEL", + "GENERIC", }; const char *usages[BRW_USAGE_MAX] = { - "state", - "query_result", - "render_target", - "depth_buffer", - "blit_source", - "blit_dest", - "sampler", - "vertex", - "scratch" + "STATE", + "QUERY_RESULT", + "RENDER_TARGET", + "DEPTH_BUFFER", + "BLIT_SOURCE", + "BLIT_DEST", + "SAMPLER", + "VERTEX", + "SCRATCH" }; + +const char *data_types[BRW_DATA_MAX] = +{ + "GS: CC_VP", + "GS: CC_UNIT", + "GS: WM_PROG", + "GS: SAMPLER_DEFAULT_COLOR", + "GS: SAMPLER", + "GS: WM_UNIT", + "GS: SF_PROG", + "GS: SF_VP", + "GS: SF_UNIT", + "GS: VS_UNIT", + "GS: VS_PROG", + "GS: GS_UNIT", + "GS: GS_PROG", + "GS: CLIP_VP", + "GS: CLIP_UNIT", + "GS: CLIP_PROG", + "SS: SURFACE", + "SS: SURF_BIND", + "(untyped)" +}; + + static struct brw_winsys_buffer * xlib_brw_bo_alloc( struct brw_winsys_screen *sws, enum brw_buffer_type type, @@ -116,8 +142,8 @@ xlib_brw_bo_alloc( struct brw_winsys_screen *sws, struct xlib_brw_winsys *xbw = xlib_brw_winsys(sws); struct xlib_brw_buffer *buf; - debug_printf("%s type %d sz %d align %d\n", - __FUNCTION__, type, size, alignment ); + debug_printf("%s type %s sz %d align %d\n", + __FUNCTION__, names[type], size, alignment ); buf = CALLOC_STRUCT(xlib_brw_buffer); if (!buf) @@ -168,10 +194,10 @@ xlib_brw_bo_unreference( struct brw_winsys_buffer *buffer ) static int xlib_brw_bo_emit_reloc( struct brw_winsys_buffer *buffer, - enum brw_buffer_usage usage, - unsigned delta, - unsigned offset, - struct brw_winsys_buffer *buffer2) + enum brw_buffer_usage usage, + unsigned delta, + unsigned offset, + struct brw_winsys_buffer *buffer2) { struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); struct xlib_brw_buffer *buf2 = xlib_brw_buffer(buffer2); @@ -197,15 +223,16 @@ xlib_brw_bo_exec( struct brw_winsys_buffer *buffer, static int xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer, - size_t offset, - size_t size, - const void *data) + enum brw_buffer_data_type data_type, + size_t offset, + size_t size, + const void *data) { struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); - debug_printf("%s buf %p off %d sz %d data %p\n", + debug_printf("%s buf %p off %d sz %d data %p %s\n", __FUNCTION__, - (void *)buffer, offset, size, data); + (void *)buffer, offset, size, data, data_types[data_type]); memcpy(buf->virtual + offset, data, size); return 0; @@ -247,12 +274,14 @@ xlib_brw_check_aperture_space( struct brw_winsys_screen *iws, static void * xlib_brw_bo_map(struct brw_winsys_buffer *buffer, + enum brw_buffer_data_type data_type, boolean write) { struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); - debug_printf("%s %p %s\n", __FUNCTION__, (void *)buffer, - write ? "read/write" : "read"); + debug_printf("%s %p %s %s\n", __FUNCTION__, (void *)buffer, + write ? "read/write" : "read", + write ? data_types[data_type] : ""); buf->map_count++; return buf->virtual; -- cgit v1.2.3 From 9069c791d065e513e05611e60f19305a850fb2f5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 5 Nov 2009 12:22:01 +0000 Subject: i965g: Dumper for i965 structures. --- src/gallium/drivers/i965/Makefile | 1 + src/gallium/drivers/i965/SConscript | 1 + src/gallium/drivers/i965/brw_structs_dump.c | 1511 ++++++++++++++++++++++++++ src/gallium/drivers/i965/brw_structs_dump.h | 276 +++++ src/gallium/drivers/i965/brw_structs_dump.py | 284 +++++ 5 files changed, 2073 insertions(+) create mode 100644 src/gallium/drivers/i965/brw_structs_dump.c create mode 100644 src/gallium/drivers/i965/brw_structs_dump.h create mode 100755 src/gallium/drivers/i965/brw_structs_dump.py (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 2188a1d4bc..6c0d3541d7 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -43,6 +43,7 @@ C_SOURCES = \ brw_state_cache.c \ brw_state_dump.c \ brw_state_upload.c \ + brw_structs_dump.c \ brw_swtnl.c \ brw_urb.c \ brw_util.c \ diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript index c517b08ec5..d38ad6fe7e 100644 --- a/src/gallium/drivers/i965/SConscript +++ b/src/gallium/drivers/i965/SConscript @@ -43,6 +43,7 @@ i965 = env.ConvenienceLibrary( 'brw_screen_surface.c', 'brw_screen_tex_layout.c', 'brw_screen_texture.c', + 'brw_structs_dump.c', 'brw_sf.c', 'brw_sf_emit.c', 'brw_sf_state.c', diff --git a/src/gallium/drivers/i965/brw_structs_dump.c b/src/gallium/drivers/i965/brw_structs_dump.c new file mode 100644 index 0000000000..a8b96c6418 --- /dev/null +++ b/src/gallium/drivers/i965/brw_structs_dump.c @@ -0,0 +1,1511 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +/** + * @file + * Dump i965 data structures. + * + * Generated automatically from brw_structs.h by brw_structs_dump.py. + */ + +#include "util/u_debug.h" + +#include "brw_types.h" +#include "brw_structs.h" +#include "brw_structs_dump.h" + +void +brw_dump_3d_control(const struct brw_3d_control *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.notify_enable = 0x%x\n", (*ptr).header.notify_enable); + debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad); + debug_printf("\t\t.header.wc_flush_enable = 0x%x\n", (*ptr).header.wc_flush_enable); + debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable); + debug_printf("\t\t.header.operation = 0x%x\n", (*ptr).header.operation); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.dest.pad = 0x%x\n", (*ptr).dest.pad); + debug_printf("\t\t.dest.dest_addr_type = 0x%x\n", (*ptr).dest.dest_addr_type); + debug_printf("\t\t.dest.dest_addr = 0x%x\n", (*ptr).dest.dest_addr); + debug_printf("\t\t.dword2 = 0x%x\n", (*ptr).dword2); + debug_printf("\t\t.dword3 = 0x%x\n", (*ptr).dword3); +} + +void +brw_dump_3d_primitive(const struct brw_3d_primitive *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad); + debug_printf("\t\t.header.topology = 0x%x\n", (*ptr).header.topology); + debug_printf("\t\t.header.indexed = 0x%x\n", (*ptr).header.indexed); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.verts_per_instance = 0x%x\n", (*ptr).verts_per_instance); + debug_printf("\t\t.start_vert_location = 0x%x\n", (*ptr).start_vert_location); + debug_printf("\t\t.instance_count = 0x%x\n", (*ptr).instance_count); + debug_printf("\t\t.start_instance_location = 0x%x\n", (*ptr).start_instance_location); + debug_printf("\t\t.base_vert_location = 0x%x\n", (*ptr).base_vert_location); +} + +void +brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_scope); + debug_printf("\t\t.bits0.pad0 = 0x%x\n", (*ptr).bits0.pad0); + debug_printf("\t\t.bits0.aa_coverage_bias = 0x%x\n", (*ptr).bits0.aa_coverage_bias); + debug_printf("\t\t.bits0.pad1 = 0x%x\n", (*ptr).bits0.pad1); + debug_printf("\t\t.bits1.aa_coverage_endcap_slope = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_slope); + debug_printf("\t\t.bits1.pad0 = 0x%x\n", (*ptr).bits1.pad0); + debug_printf("\t\t.bits1.aa_coverage_endcap_bias = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_bias); + debug_printf("\t\t.bits1.pad1 = 0x%x\n", (*ptr).bits1.pad1); +} + +void +brw_dump_binding_table_pointers(const struct brw_binding_table_pointers *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.vs = 0x%x\n", (*ptr).vs); + debug_printf("\t\t.gs = 0x%x\n", (*ptr).gs); + debug_printf("\t\t.clp = 0x%x\n", (*ptr).clp); + debug_printf("\t\t.sf = 0x%x\n", (*ptr).sf); + debug_printf("\t\t.wm = 0x%x\n", (*ptr).wm); +} + +void +brw_dump_blend_constant_color(const struct brw_blend_constant_color *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.blend_constant_color[0] = %f\n", (*ptr).blend_constant_color[0]); + debug_printf("\t\t.blend_constant_color[1] = %f\n", (*ptr).blend_constant_color[1]); + debug_printf("\t\t.blend_constant_color[2] = %f\n", (*ptr).blend_constant_color[2]); + debug_printf("\t\t.blend_constant_color[3] = %f\n", (*ptr).blend_constant_color[3]); +} + +void +brw_dump_cc0(const struct brw_cc0 *ptr) +{ + debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); + debug_printf("\t\t.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_pass_op); + debug_printf("\t\t.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_fail_op); + debug_printf("\t\t.bf_stencil_fail_op = 0x%x\n", (*ptr).bf_stencil_fail_op); + debug_printf("\t\t.bf_stencil_func = 0x%x\n", (*ptr).bf_stencil_func); + debug_printf("\t\t.bf_stencil_enable = 0x%x\n", (*ptr).bf_stencil_enable); + debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1); + debug_printf("\t\t.stencil_write_enable = 0x%x\n", (*ptr).stencil_write_enable); + debug_printf("\t\t.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).stencil_pass_depth_pass_op); + debug_printf("\t\t.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).stencil_pass_depth_fail_op); + debug_printf("\t\t.stencil_fail_op = 0x%x\n", (*ptr).stencil_fail_op); + debug_printf("\t\t.stencil_func = 0x%x\n", (*ptr).stencil_func); + debug_printf("\t\t.stencil_enable = 0x%x\n", (*ptr).stencil_enable); +} + +void +brw_dump_cc1(const struct brw_cc1 *ptr) +{ + debug_printf("\t\t.bf_stencil_ref = 0x%x\n", (*ptr).bf_stencil_ref); + debug_printf("\t\t.stencil_write_mask = 0x%x\n", (*ptr).stencil_write_mask); + debug_printf("\t\t.stencil_test_mask = 0x%x\n", (*ptr).stencil_test_mask); + debug_printf("\t\t.stencil_ref = 0x%x\n", (*ptr).stencil_ref); +} + +void +brw_dump_cc2(const struct brw_cc2 *ptr) +{ + debug_printf("\t\t.logicop_enable = 0x%x\n", (*ptr).logicop_enable); + debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); + debug_printf("\t\t.depth_write_enable = 0x%x\n", (*ptr).depth_write_enable); + debug_printf("\t\t.depth_test_function = 0x%x\n", (*ptr).depth_test_function); + debug_printf("\t\t.depth_test = 0x%x\n", (*ptr).depth_test); + debug_printf("\t\t.bf_stencil_write_mask = 0x%x\n", (*ptr).bf_stencil_write_mask); + debug_printf("\t\t.bf_stencil_test_mask = 0x%x\n", (*ptr).bf_stencil_test_mask); +} + +void +brw_dump_cc3(const struct brw_cc3 *ptr) +{ + debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); + debug_printf("\t\t.alpha_test_func = 0x%x\n", (*ptr).alpha_test_func); + debug_printf("\t\t.alpha_test = 0x%x\n", (*ptr).alpha_test); + debug_printf("\t\t.blend_enable = 0x%x\n", (*ptr).blend_enable); + debug_printf("\t\t.ia_blend_enable = 0x%x\n", (*ptr).ia_blend_enable); + debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1); + debug_printf("\t\t.alpha_test_format = 0x%x\n", (*ptr).alpha_test_format); + debug_printf("\t\t.pad2 = 0x%x\n", (*ptr).pad2); +} + +void +brw_dump_cc4(const struct brw_cc4 *ptr) +{ + debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); + debug_printf("\t\t.cc_viewport_state_offset = 0x%x\n", (*ptr).cc_viewport_state_offset); +} + +void +brw_dump_cc5(const struct brw_cc5 *ptr) +{ + debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); + debug_printf("\t\t.ia_dest_blend_factor = 0x%x\n", (*ptr).ia_dest_blend_factor); + debug_printf("\t\t.ia_src_blend_factor = 0x%x\n", (*ptr).ia_src_blend_factor); + debug_printf("\t\t.ia_blend_function = 0x%x\n", (*ptr).ia_blend_function); + debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable); + debug_printf("\t\t.logicop_func = 0x%x\n", (*ptr).logicop_func); + debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1); + debug_printf("\t\t.dither_enable = 0x%x\n", (*ptr).dither_enable); +} + +void +brw_dump_cc6(const struct brw_cc6 *ptr) +{ + debug_printf("\t\t.clamp_post_alpha_blend = 0x%x\n", (*ptr).clamp_post_alpha_blend); + debug_printf("\t\t.clamp_pre_alpha_blend = 0x%x\n", (*ptr).clamp_pre_alpha_blend); + debug_printf("\t\t.clamp_range = 0x%x\n", (*ptr).clamp_range); + debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); + debug_printf("\t\t.y_dither_offset = 0x%x\n", (*ptr).y_dither_offset); + debug_printf("\t\t.x_dither_offset = 0x%x\n", (*ptr).x_dither_offset); + debug_printf("\t\t.dest_blend_factor = 0x%x\n", (*ptr).dest_blend_factor); + debug_printf("\t\t.src_blend_factor = 0x%x\n", (*ptr).src_blend_factor); + debug_printf("\t\t.blend_function = 0x%x\n", (*ptr).blend_function); +} + +void +brw_dump_cc7(const struct brw_cc7 *ptr) +{ + debug_printf("\t\t.alpha_ref.f = %f\n", (*ptr).alpha_ref.f); + debug_printf("\t\t.alpha_ref.ub[0] = 0x%x\n", (*ptr).alpha_ref.ub[0]); + debug_printf("\t\t.alpha_ref.ub[1] = 0x%x\n", (*ptr).alpha_ref.ub[1]); + debug_printf("\t\t.alpha_ref.ub[2] = 0x%x\n", (*ptr).alpha_ref.ub[2]); + debug_printf("\t\t.alpha_ref.ub[3] = 0x%x\n", (*ptr).alpha_ref.ub[3]); +} + +void +brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr) +{ + debug_printf("\t\t.cc0.pad0 = 0x%x\n", (*ptr).cc0.pad0); + debug_printf("\t\t.cc0.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_pass_op); + debug_printf("\t\t.cc0.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_fail_op); + debug_printf("\t\t.cc0.bf_stencil_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_fail_op); + debug_printf("\t\t.cc0.bf_stencil_func = 0x%x\n", (*ptr).cc0.bf_stencil_func); + debug_printf("\t\t.cc0.bf_stencil_enable = 0x%x\n", (*ptr).cc0.bf_stencil_enable); + debug_printf("\t\t.cc0.pad1 = 0x%x\n", (*ptr).cc0.pad1); + debug_printf("\t\t.cc0.stencil_write_enable = 0x%x\n", (*ptr).cc0.stencil_write_enable); + debug_printf("\t\t.cc0.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_pass_op); + debug_printf("\t\t.cc0.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_fail_op); + debug_printf("\t\t.cc0.stencil_fail_op = 0x%x\n", (*ptr).cc0.stencil_fail_op); + debug_printf("\t\t.cc0.stencil_func = 0x%x\n", (*ptr).cc0.stencil_func); + debug_printf("\t\t.cc0.stencil_enable = 0x%x\n", (*ptr).cc0.stencil_enable); + debug_printf("\t\t.cc1.bf_stencil_ref = 0x%x\n", (*ptr).cc1.bf_stencil_ref); + debug_printf("\t\t.cc1.stencil_write_mask = 0x%x\n", (*ptr).cc1.stencil_write_mask); + debug_printf("\t\t.cc1.stencil_test_mask = 0x%x\n", (*ptr).cc1.stencil_test_mask); + debug_printf("\t\t.cc1.stencil_ref = 0x%x\n", (*ptr).cc1.stencil_ref); + debug_printf("\t\t.cc2.logicop_enable = 0x%x\n", (*ptr).cc2.logicop_enable); + debug_printf("\t\t.cc2.pad0 = 0x%x\n", (*ptr).cc2.pad0); + debug_printf("\t\t.cc2.depth_write_enable = 0x%x\n", (*ptr).cc2.depth_write_enable); + debug_printf("\t\t.cc2.depth_test_function = 0x%x\n", (*ptr).cc2.depth_test_function); + debug_printf("\t\t.cc2.depth_test = 0x%x\n", (*ptr).cc2.depth_test); + debug_printf("\t\t.cc2.bf_stencil_write_mask = 0x%x\n", (*ptr).cc2.bf_stencil_write_mask); + debug_printf("\t\t.cc2.bf_stencil_test_mask = 0x%x\n", (*ptr).cc2.bf_stencil_test_mask); + debug_printf("\t\t.cc3.pad0 = 0x%x\n", (*ptr).cc3.pad0); + debug_printf("\t\t.cc3.alpha_test_func = 0x%x\n", (*ptr).cc3.alpha_test_func); + debug_printf("\t\t.cc3.alpha_test = 0x%x\n", (*ptr).cc3.alpha_test); + debug_printf("\t\t.cc3.blend_enable = 0x%x\n", (*ptr).cc3.blend_enable); + debug_printf("\t\t.cc3.ia_blend_enable = 0x%x\n", (*ptr).cc3.ia_blend_enable); + debug_printf("\t\t.cc3.pad1 = 0x%x\n", (*ptr).cc3.pad1); + debug_printf("\t\t.cc3.alpha_test_format = 0x%x\n", (*ptr).cc3.alpha_test_format); + debug_printf("\t\t.cc3.pad2 = 0x%x\n", (*ptr).cc3.pad2); + debug_printf("\t\t.cc4.pad0 = 0x%x\n", (*ptr).cc4.pad0); + debug_printf("\t\t.cc4.cc_viewport_state_offset = 0x%x\n", (*ptr).cc4.cc_viewport_state_offset); + debug_printf("\t\t.cc5.pad0 = 0x%x\n", (*ptr).cc5.pad0); + debug_printf("\t\t.cc5.ia_dest_blend_factor = 0x%x\n", (*ptr).cc5.ia_dest_blend_factor); + debug_printf("\t\t.cc5.ia_src_blend_factor = 0x%x\n", (*ptr).cc5.ia_src_blend_factor); + debug_printf("\t\t.cc5.ia_blend_function = 0x%x\n", (*ptr).cc5.ia_blend_function); + debug_printf("\t\t.cc5.statistics_enable = 0x%x\n", (*ptr).cc5.statistics_enable); + debug_printf("\t\t.cc5.logicop_func = 0x%x\n", (*ptr).cc5.logicop_func); + debug_printf("\t\t.cc5.pad1 = 0x%x\n", (*ptr).cc5.pad1); + debug_printf("\t\t.cc5.dither_enable = 0x%x\n", (*ptr).cc5.dither_enable); + debug_printf("\t\t.cc6.clamp_post_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_post_alpha_blend); + debug_printf("\t\t.cc6.clamp_pre_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_pre_alpha_blend); + debug_printf("\t\t.cc6.clamp_range = 0x%x\n", (*ptr).cc6.clamp_range); + debug_printf("\t\t.cc6.pad0 = 0x%x\n", (*ptr).cc6.pad0); + debug_printf("\t\t.cc6.y_dither_offset = 0x%x\n", (*ptr).cc6.y_dither_offset); + debug_printf("\t\t.cc6.x_dither_offset = 0x%x\n", (*ptr).cc6.x_dither_offset); + debug_printf("\t\t.cc6.dest_blend_factor = 0x%x\n", (*ptr).cc6.dest_blend_factor); + debug_printf("\t\t.cc6.src_blend_factor = 0x%x\n", (*ptr).cc6.src_blend_factor); + debug_printf("\t\t.cc6.blend_function = 0x%x\n", (*ptr).cc6.blend_function); + debug_printf("\t\t.cc7.alpha_ref.f = %f\n", (*ptr).cc7.alpha_ref.f); + debug_printf("\t\t.cc7.alpha_ref.ub[0] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[0]); + debug_printf("\t\t.cc7.alpha_ref.ub[1] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[1]); + debug_printf("\t\t.cc7.alpha_ref.ub[2] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[2]); + debug_printf("\t\t.cc7.alpha_ref.ub[3] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[3]); +} + +void +brw_dump_cc_viewport(const struct brw_cc_viewport *ptr) +{ + debug_printf("\t\t.min_depth = %f\n", (*ptr).min_depth); + debug_printf("\t\t.max_depth = %f\n", (*ptr).max_depth); +} + +void +brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr) +{ + debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0); + debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); + debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1); + debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); + debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0); + debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); + debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1); + debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); + debug_printf("\t\t.thread1.pad2 = 0x%x\n", (*ptr).thread1.pad2); + debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); + debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3); + debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); + debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); + debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); + debug_printf("\t\t.thread1.pad4 = 0x%x\n", (*ptr).thread1.pad4); + debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); + debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); + debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0); + debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); + debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); + debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); + debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0); + debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); + debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1); + debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); + debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2); + debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); + debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3); + debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0); + debug_printf("\t\t.thread4.gs_output_stats = 0x%x\n", (*ptr).thread4.gs_output_stats); + debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable); + debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries); + debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1); + debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size); + debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2); + debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads); + debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3); + debug_printf("\t\t.clip5.pad0 = 0x%x\n", (*ptr).clip5.pad0); + debug_printf("\t\t.clip5.clip_mode = 0x%x\n", (*ptr).clip5.clip_mode); + debug_printf("\t\t.clip5.userclip_enable_flags = 0x%x\n", (*ptr).clip5.userclip_enable_flags); + debug_printf("\t\t.clip5.userclip_must_clip = 0x%x\n", (*ptr).clip5.userclip_must_clip); + debug_printf("\t\t.clip5.negative_w_clip_test = 0x%x\n", (*ptr).clip5.negative_w_clip_test); + debug_printf("\t\t.clip5.guard_band_enable = 0x%x\n", (*ptr).clip5.guard_band_enable); + debug_printf("\t\t.clip5.viewport_z_clip_enable = 0x%x\n", (*ptr).clip5.viewport_z_clip_enable); + debug_printf("\t\t.clip5.viewport_xy_clip_enable = 0x%x\n", (*ptr).clip5.viewport_xy_clip_enable); + debug_printf("\t\t.clip5.vertex_position_space = 0x%x\n", (*ptr).clip5.vertex_position_space); + debug_printf("\t\t.clip5.api_mode = 0x%x\n", (*ptr).clip5.api_mode); + debug_printf("\t\t.clip5.pad2 = 0x%x\n", (*ptr).clip5.pad2); + debug_printf("\t\t.clip6.pad0 = 0x%x\n", (*ptr).clip6.pad0); + debug_printf("\t\t.clip6.clipper_viewport_state_ptr = 0x%x\n", (*ptr).clip6.clipper_viewport_state_ptr); + debug_printf("\t\t.viewport_xmin = %f\n", (*ptr).viewport_xmin); + debug_printf("\t\t.viewport_xmax = %f\n", (*ptr).viewport_xmax); + debug_printf("\t\t.viewport_ymin = %f\n", (*ptr).viewport_ymin); + debug_printf("\t\t.viewport_ymax = %f\n", (*ptr).viewport_ymax); +} + +void +brw_dump_clipper_viewport(const struct brw_clipper_viewport *ptr) +{ + debug_printf("\t\t.xmin = %f\n", (*ptr).xmin); + debug_printf("\t\t.xmax = %f\n", (*ptr).xmax); + debug_printf("\t\t.ymin = %f\n", (*ptr).ymin); + debug_printf("\t\t.ymax = %f\n", (*ptr).ymax); +} + +void +brw_dump_constant_buffer(const struct brw_constant_buffer *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.valid = 0x%x\n", (*ptr).header.valid); + debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.buffer_length = 0x%x\n", (*ptr).bits0.buffer_length); + debug_printf("\t\t.bits0.buffer_address = 0x%x\n", (*ptr).bits0.buffer_address); +} + +void +brw_dump_cs_urb_state(const struct brw_cs_urb_state *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.nr_urb_entries = 0x%x\n", (*ptr).bits0.nr_urb_entries); + debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); + debug_printf("\t\t.bits0.urb_entry_size = 0x%x\n", (*ptr).bits0.urb_entry_size); + debug_printf("\t\t.bits0.pad0 = 0x%x\n", (*ptr).bits0.pad0); +} + +void +brw_dump_depthbuffer(const struct brw_depthbuffer *ptr) +{ + debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length); + debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode); + debug_printf("\t\t.header.dword = 0x%x\n", (*ptr).header.dword); + debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch); + debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format); + debug_printf("\t\t.dword1.bits.pad = 0x%x\n", (*ptr).dword1.bits.pad); + debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode); + debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable); + debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk); + debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface); + debug_printf("\t\t.dword1.bits.pad2 = 0x%x\n", (*ptr).dword1.bits.pad2); + debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type); + debug_printf("\t\t.dword1.dword = 0x%x\n", (*ptr).dword1.dword); + debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr); + debug_printf("\t\t.dword3.bits.pad = 0x%x\n", (*ptr).dword3.bits.pad); + debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout); + debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod); + debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width); + debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height); + debug_printf("\t\t.dword3.dword = 0x%x\n", (*ptr).dword3.dword); + debug_printf("\t\t.dword4.bits.pad = 0x%x\n", (*ptr).dword4.bits.pad); + debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element); + debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth); + debug_printf("\t\t.dword4.dword = 0x%x\n", (*ptr).dword4.dword); +} + +void +brw_dump_depthbuffer_g4x(const struct brw_depthbuffer_g4x *ptr) +{ + debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length); + debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode); + debug_printf("\t\t.header.dword = 0x%x\n", (*ptr).header.dword); + debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch); + debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format); + debug_printf("\t\t.dword1.bits.pad = 0x%x\n", (*ptr).dword1.bits.pad); + debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode); + debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable); + debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk); + debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface); + debug_printf("\t\t.dword1.bits.pad2 = 0x%x\n", (*ptr).dword1.bits.pad2); + debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type); + debug_printf("\t\t.dword1.dword = 0x%x\n", (*ptr).dword1.dword); + debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr); + debug_printf("\t\t.dword3.bits.pad = 0x%x\n", (*ptr).dword3.bits.pad); + debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout); + debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod); + debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width); + debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height); + debug_printf("\t\t.dword3.dword = 0x%x\n", (*ptr).dword3.dword); + debug_printf("\t\t.dword4.bits.pad = 0x%x\n", (*ptr).dword4.bits.pad); + debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element); + debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth); + debug_printf("\t\t.dword4.dword = 0x%x\n", (*ptr).dword4.dword); + debug_printf("\t\t.dword5.bits.xoffset = 0x%x\n", (*ptr).dword5.bits.xoffset); + debug_printf("\t\t.dword5.bits.yoffset = 0x%x\n", (*ptr).dword5.bits.yoffset); + debug_printf("\t\t.dword5.dword = 0x%x\n", (*ptr).dword5.dword); +} + +void +brw_dump_drawrect(const struct brw_drawrect *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.xmin = 0x%x\n", (*ptr).xmin); + debug_printf("\t\t.ymin = 0x%x\n", (*ptr).ymin); + debug_printf("\t\t.xmax = 0x%x\n", (*ptr).xmax); + debug_printf("\t\t.ymax = 0x%x\n", (*ptr).ymax); + debug_printf("\t\t.xorg = 0x%x\n", (*ptr).xorg); + debug_printf("\t\t.yorg = 0x%x\n", (*ptr).yorg); +} + +void +brw_dump_global_depth_offset_clamp(const struct brw_global_depth_offset_clamp *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.depth_offset_clamp = %f\n", (*ptr).depth_offset_clamp); +} + +void +brw_dump_gs_unit_state(const struct brw_gs_unit_state *ptr) +{ + debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0); + debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); + debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1); + debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); + debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable); + debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); + debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); + debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable); + debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); + debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0); + debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset); + debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1); + debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); + debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); + debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); + debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3); + debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); + debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); + debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0); + debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); + debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); + debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); + debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0); + debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); + debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1); + debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); + debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2); + debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); + debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3); + debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0); + debug_printf("\t\t.thread4.rendering_enable = 0x%x\n", (*ptr).thread4.rendering_enable); + debug_printf("\t\t.thread4.pad4 = 0x%x\n", (*ptr).thread4.pad4); + debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable); + debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries); + debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1); + debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size); + debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2); + debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads); + debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3); + debug_printf("\t\t.gs5.sampler_count = 0x%x\n", (*ptr).gs5.sampler_count); + debug_printf("\t\t.gs5.pad0 = 0x%x\n", (*ptr).gs5.pad0); + debug_printf("\t\t.gs5.sampler_state_pointer = 0x%x\n", (*ptr).gs5.sampler_state_pointer); + debug_printf("\t\t.gs6.max_vp_index = 0x%x\n", (*ptr).gs6.max_vp_index); + debug_printf("\t\t.gs6.pad0 = 0x%x\n", (*ptr).gs6.pad0); + debug_printf("\t\t.gs6.svbi_post_inc_value = 0x%x\n", (*ptr).gs6.svbi_post_inc_value); + debug_printf("\t\t.gs6.pad1 = 0x%x\n", (*ptr).gs6.pad1); + debug_printf("\t\t.gs6.svbi_post_inc_enable = 0x%x\n", (*ptr).gs6.svbi_post_inc_enable); + debug_printf("\t\t.gs6.svbi_payload = 0x%x\n", (*ptr).gs6.svbi_payload); + debug_printf("\t\t.gs6.discard_adjaceny = 0x%x\n", (*ptr).gs6.discard_adjaceny); + debug_printf("\t\t.gs6.reorder_enable = 0x%x\n", (*ptr).gs6.reorder_enable); + debug_printf("\t\t.gs6.pad2 = 0x%x\n", (*ptr).gs6.pad2); +} + +void +brw_dump_indexbuffer(const struct brw_indexbuffer *ptr) +{ + debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length); + debug_printf("\t\t.header.bits.index_format = 0x%x\n", (*ptr).header.bits.index_format); + debug_printf("\t\t.header.bits.cut_index_enable = 0x%x\n", (*ptr).header.bits.cut_index_enable); + debug_printf("\t\t.header.bits.pad = 0x%x\n", (*ptr).header.bits.pad); + debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode); + debug_printf("\t\t.header.dword = 0x%x\n", (*ptr).header.dword); + debug_printf("\t\t.buffer_start = 0x%x\n", (*ptr).buffer_start); + debug_printf("\t\t.buffer_end = 0x%x\n", (*ptr).buffer_end); +} + +void +brw_dump_line_stipple(const struct brw_line_stipple *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.pattern = 0x%x\n", (*ptr).bits0.pattern); + debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); + debug_printf("\t\t.bits1.repeat_count = 0x%x\n", (*ptr).bits1.repeat_count); + debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad); + debug_printf("\t\t.bits1.inverse_repeat_count = 0x%x\n", (*ptr).bits1.inverse_repeat_count); +} + +void +brw_dump_mi_flush(const struct brw_mi_flush *ptr) +{ + debug_printf("\t\t.flags = 0x%x\n", (*ptr).flags); + debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); + debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode); +} + +void +brw_dump_pipe_control(const struct brw_pipe_control *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.notify_enable = 0x%x\n", (*ptr).header.notify_enable); + debug_printf("\t\t.header.texture_cache_flush_enable = 0x%x\n", (*ptr).header.texture_cache_flush_enable); + debug_printf("\t\t.header.indirect_state_pointers_disable = 0x%x\n", (*ptr).header.indirect_state_pointers_disable); + debug_printf("\t\t.header.instruction_state_cache_flush_enable = 0x%x\n", (*ptr).header.instruction_state_cache_flush_enable); + debug_printf("\t\t.header.write_cache_flush_enable = 0x%x\n", (*ptr).header.write_cache_flush_enable); + debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable); + debug_printf("\t\t.header.post_sync_operation = 0x%x\n", (*ptr).header.post_sync_operation); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad); + debug_printf("\t\t.bits1.dest_addr_type = 0x%x\n", (*ptr).bits1.dest_addr_type); + debug_printf("\t\t.bits1.dest_addr = 0x%x\n", (*ptr).bits1.dest_addr); + debug_printf("\t\t.data0 = 0x%x\n", (*ptr).data0); + debug_printf("\t\t.data1 = 0x%x\n", (*ptr).data1); +} + +void +brw_dump_pipeline_select(const struct brw_pipeline_select *ptr) +{ + debug_printf("\t\t.header.pipeline_select = 0x%x\n", (*ptr).header.pipeline_select); + debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); +} + +void +brw_dump_pipelined_state_pointers(const struct brw_pipelined_state_pointers *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.vs.pad = 0x%x\n", (*ptr).vs.pad); + debug_printf("\t\t.vs.offset = 0x%x\n", (*ptr).vs.offset); + debug_printf("\t\t.gs.enable = 0x%x\n", (*ptr).gs.enable); + debug_printf("\t\t.gs.pad = 0x%x\n", (*ptr).gs.pad); + debug_printf("\t\t.gs.offset = 0x%x\n", (*ptr).gs.offset); + debug_printf("\t\t.clp.enable = 0x%x\n", (*ptr).clp.enable); + debug_printf("\t\t.clp.pad = 0x%x\n", (*ptr).clp.pad); + debug_printf("\t\t.clp.offset = 0x%x\n", (*ptr).clp.offset); + debug_printf("\t\t.sf.pad = 0x%x\n", (*ptr).sf.pad); + debug_printf("\t\t.sf.offset = 0x%x\n", (*ptr).sf.offset); + debug_printf("\t\t.wm.pad = 0x%x\n", (*ptr).wm.pad); + debug_printf("\t\t.wm.offset = 0x%x\n", (*ptr).wm.offset); + debug_printf("\t\t.cc.pad = 0x%x\n", (*ptr).cc.pad); + debug_printf("\t\t.cc.offset = 0x%x\n", (*ptr).cc.offset); +} + +void +brw_dump_polygon_stipple(const struct brw_polygon_stipple *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.stipple[0] = 0x%x\n", (*ptr).stipple[0]); + debug_printf("\t\t.stipple[1] = 0x%x\n", (*ptr).stipple[1]); + debug_printf("\t\t.stipple[2] = 0x%x\n", (*ptr).stipple[2]); + debug_printf("\t\t.stipple[3] = 0x%x\n", (*ptr).stipple[3]); + debug_printf("\t\t.stipple[4] = 0x%x\n", (*ptr).stipple[4]); + debug_printf("\t\t.stipple[5] = 0x%x\n", (*ptr).stipple[5]); + debug_printf("\t\t.stipple[6] = 0x%x\n", (*ptr).stipple[6]); + debug_printf("\t\t.stipple[7] = 0x%x\n", (*ptr).stipple[7]); + debug_printf("\t\t.stipple[8] = 0x%x\n", (*ptr).stipple[8]); + debug_printf("\t\t.stipple[9] = 0x%x\n", (*ptr).stipple[9]); + debug_printf("\t\t.stipple[10] = 0x%x\n", (*ptr).stipple[10]); + debug_printf("\t\t.stipple[11] = 0x%x\n", (*ptr).stipple[11]); + debug_printf("\t\t.stipple[12] = 0x%x\n", (*ptr).stipple[12]); + debug_printf("\t\t.stipple[13] = 0x%x\n", (*ptr).stipple[13]); + debug_printf("\t\t.stipple[14] = 0x%x\n", (*ptr).stipple[14]); + debug_printf("\t\t.stipple[15] = 0x%x\n", (*ptr).stipple[15]); + debug_printf("\t\t.stipple[16] = 0x%x\n", (*ptr).stipple[16]); + debug_printf("\t\t.stipple[17] = 0x%x\n", (*ptr).stipple[17]); + debug_printf("\t\t.stipple[18] = 0x%x\n", (*ptr).stipple[18]); + debug_printf("\t\t.stipple[19] = 0x%x\n", (*ptr).stipple[19]); + debug_printf("\t\t.stipple[20] = 0x%x\n", (*ptr).stipple[20]); + debug_printf("\t\t.stipple[21] = 0x%x\n", (*ptr).stipple[21]); + debug_printf("\t\t.stipple[22] = 0x%x\n", (*ptr).stipple[22]); + debug_printf("\t\t.stipple[23] = 0x%x\n", (*ptr).stipple[23]); + debug_printf("\t\t.stipple[24] = 0x%x\n", (*ptr).stipple[24]); + debug_printf("\t\t.stipple[25] = 0x%x\n", (*ptr).stipple[25]); + debug_printf("\t\t.stipple[26] = 0x%x\n", (*ptr).stipple[26]); + debug_printf("\t\t.stipple[27] = 0x%x\n", (*ptr).stipple[27]); + debug_printf("\t\t.stipple[28] = 0x%x\n", (*ptr).stipple[28]); + debug_printf("\t\t.stipple[29] = 0x%x\n", (*ptr).stipple[29]); + debug_printf("\t\t.stipple[30] = 0x%x\n", (*ptr).stipple[30]); + debug_printf("\t\t.stipple[31] = 0x%x\n", (*ptr).stipple[31]); +} + +void +brw_dump_polygon_stipple_offset(const struct brw_polygon_stipple_offset *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.y_offset = 0x%x\n", (*ptr).bits0.y_offset); + debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); + debug_printf("\t\t.bits0.x_offset = 0x%x\n", (*ptr).bits0.x_offset); + debug_printf("\t\t.bits0.pad0 = 0x%x\n", (*ptr).bits0.pad0); +} + +void +brw_dump_sampler_default_color(const struct brw_sampler_default_color *ptr) +{ + debug_printf("\t\t.color[0] = %f\n", (*ptr).color[0]); + debug_printf("\t\t.color[1] = %f\n", (*ptr).color[1]); + debug_printf("\t\t.color[2] = %f\n", (*ptr).color[2]); + debug_printf("\t\t.color[3] = %f\n", (*ptr).color[3]); +} + +void +brw_dump_sampler_state(const struct brw_sampler_state *ptr) +{ + debug_printf("\t\t.ss0.shadow_function = 0x%x\n", (*ptr).ss0.shadow_function); + debug_printf("\t\t.ss0.lod_bias = 0x%x\n", (*ptr).ss0.lod_bias); + debug_printf("\t\t.ss0.min_filter = 0x%x\n", (*ptr).ss0.min_filter); + debug_printf("\t\t.ss0.mag_filter = 0x%x\n", (*ptr).ss0.mag_filter); + debug_printf("\t\t.ss0.mip_filter = 0x%x\n", (*ptr).ss0.mip_filter); + debug_printf("\t\t.ss0.base_level = 0x%x\n", (*ptr).ss0.base_level); + debug_printf("\t\t.ss0.pad = 0x%x\n", (*ptr).ss0.pad); + debug_printf("\t\t.ss0.lod_preclamp = 0x%x\n", (*ptr).ss0.lod_preclamp); + debug_printf("\t\t.ss0.default_color_mode = 0x%x\n", (*ptr).ss0.default_color_mode); + debug_printf("\t\t.ss0.pad0 = 0x%x\n", (*ptr).ss0.pad0); + debug_printf("\t\t.ss0.disable = 0x%x\n", (*ptr).ss0.disable); + debug_printf("\t\t.ss1.r_wrap_mode = 0x%x\n", (*ptr).ss1.r_wrap_mode); + debug_printf("\t\t.ss1.t_wrap_mode = 0x%x\n", (*ptr).ss1.t_wrap_mode); + debug_printf("\t\t.ss1.s_wrap_mode = 0x%x\n", (*ptr).ss1.s_wrap_mode); + debug_printf("\t\t.ss1.pad = 0x%x\n", (*ptr).ss1.pad); + debug_printf("\t\t.ss1.max_lod = 0x%x\n", (*ptr).ss1.max_lod); + debug_printf("\t\t.ss1.min_lod = 0x%x\n", (*ptr).ss1.min_lod); + debug_printf("\t\t.ss2.pad = 0x%x\n", (*ptr).ss2.pad); + debug_printf("\t\t.ss2.default_color_pointer = 0x%x\n", (*ptr).ss2.default_color_pointer); + debug_printf("\t\t.ss3.pad = 0x%x\n", (*ptr).ss3.pad); + debug_printf("\t\t.ss3.max_aniso = 0x%x\n", (*ptr).ss3.max_aniso); + debug_printf("\t\t.ss3.chroma_key_mode = 0x%x\n", (*ptr).ss3.chroma_key_mode); + debug_printf("\t\t.ss3.chroma_key_index = 0x%x\n", (*ptr).ss3.chroma_key_index); + debug_printf("\t\t.ss3.chroma_key_enable = 0x%x\n", (*ptr).ss3.chroma_key_enable); + debug_printf("\t\t.ss3.monochrome_filter_width = 0x%x\n", (*ptr).ss3.monochrome_filter_width); + debug_printf("\t\t.ss3.monochrome_filter_height = 0x%x\n", (*ptr).ss3.monochrome_filter_height); +} + +void +brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr) +{ + debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0); + debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); + debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1); + debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); + debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable); + debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); + debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); + debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable); + debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); + debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0); + debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset); + debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1); + debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); + debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); + debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); + debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3); + debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); + debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); + debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0); + debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); + debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); + debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); + debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0); + debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); + debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1); + debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); + debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2); + debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); + debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3); + debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0); + debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable); + debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries); + debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1); + debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size); + debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2); + debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads); + debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3); + debug_printf("\t\t.sf5.front_winding = 0x%x\n", (*ptr).sf5.front_winding); + debug_printf("\t\t.sf5.viewport_transform = 0x%x\n", (*ptr).sf5.viewport_transform); + debug_printf("\t\t.sf5.pad0 = 0x%x\n", (*ptr).sf5.pad0); + debug_printf("\t\t.sf5.sf_viewport_state_offset = 0x%x\n", (*ptr).sf5.sf_viewport_state_offset); + debug_printf("\t\t.sf6.pad0 = 0x%x\n", (*ptr).sf6.pad0); + debug_printf("\t\t.sf6.dest_org_vbias = 0x%x\n", (*ptr).sf6.dest_org_vbias); + debug_printf("\t\t.sf6.dest_org_hbias = 0x%x\n", (*ptr).sf6.dest_org_hbias); + debug_printf("\t\t.sf6.scissor = 0x%x\n", (*ptr).sf6.scissor); + debug_printf("\t\t.sf6.disable_2x2_trifilter = 0x%x\n", (*ptr).sf6.disable_2x2_trifilter); + debug_printf("\t\t.sf6.disable_zero_pix_trifilter = 0x%x\n", (*ptr).sf6.disable_zero_pix_trifilter); + debug_printf("\t\t.sf6.point_rast_rule = 0x%x\n", (*ptr).sf6.point_rast_rule); + debug_printf("\t\t.sf6.line_endcap_aa_region_width = 0x%x\n", (*ptr).sf6.line_endcap_aa_region_width); + debug_printf("\t\t.sf6.line_width = 0x%x\n", (*ptr).sf6.line_width); + debug_printf("\t\t.sf6.fast_scissor_disable = 0x%x\n", (*ptr).sf6.fast_scissor_disable); + debug_printf("\t\t.sf6.cull_mode = 0x%x\n", (*ptr).sf6.cull_mode); + debug_printf("\t\t.sf6.aa_enable = 0x%x\n", (*ptr).sf6.aa_enable); + debug_printf("\t\t.sf7.point_size = 0x%x\n", (*ptr).sf7.point_size); + debug_printf("\t\t.sf7.use_point_size_state = 0x%x\n", (*ptr).sf7.use_point_size_state); + debug_printf("\t\t.sf7.subpixel_precision = 0x%x\n", (*ptr).sf7.subpixel_precision); + debug_printf("\t\t.sf7.sprite_point = 0x%x\n", (*ptr).sf7.sprite_point); + debug_printf("\t\t.sf7.pad0 = 0x%x\n", (*ptr).sf7.pad0); + debug_printf("\t\t.sf7.aa_line_distance_mode = 0x%x\n", (*ptr).sf7.aa_line_distance_mode); + debug_printf("\t\t.sf7.trifan_pv = 0x%x\n", (*ptr).sf7.trifan_pv); + debug_printf("\t\t.sf7.linestrip_pv = 0x%x\n", (*ptr).sf7.linestrip_pv); + debug_printf("\t\t.sf7.tristrip_pv = 0x%x\n", (*ptr).sf7.tristrip_pv); + debug_printf("\t\t.sf7.line_last_pixel_enable = 0x%x\n", (*ptr).sf7.line_last_pixel_enable); +} + +void +brw_dump_sf_viewport(const struct brw_sf_viewport *ptr) +{ + debug_printf("\t\t.viewport.m00 = %f\n", (*ptr).viewport.m00); + debug_printf("\t\t.viewport.m11 = %f\n", (*ptr).viewport.m11); + debug_printf("\t\t.viewport.m22 = %f\n", (*ptr).viewport.m22); + debug_printf("\t\t.viewport.m30 = %f\n", (*ptr).viewport.m30); + debug_printf("\t\t.viewport.m31 = %f\n", (*ptr).viewport.m31); + debug_printf("\t\t.viewport.m32 = %f\n", (*ptr).viewport.m32); + debug_printf("\t\t.scissor.xmin = 0x%x\n", (*ptr).scissor.xmin); + debug_printf("\t\t.scissor.ymin = 0x%x\n", (*ptr).scissor.ymin); + debug_printf("\t\t.scissor.xmax = 0x%x\n", (*ptr).scissor.xmax); + debug_printf("\t\t.scissor.ymax = 0x%x\n", (*ptr).scissor.ymax); +} + +void +brw_dump_ss0(const struct brw_ss0 *ptr) +{ + debug_printf("\t\t.shadow_function = 0x%x\n", (*ptr).shadow_function); + debug_printf("\t\t.lod_bias = 0x%x\n", (*ptr).lod_bias); + debug_printf("\t\t.min_filter = 0x%x\n", (*ptr).min_filter); + debug_printf("\t\t.mag_filter = 0x%x\n", (*ptr).mag_filter); + debug_printf("\t\t.mip_filter = 0x%x\n", (*ptr).mip_filter); + debug_printf("\t\t.base_level = 0x%x\n", (*ptr).base_level); + debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); + debug_printf("\t\t.lod_preclamp = 0x%x\n", (*ptr).lod_preclamp); + debug_printf("\t\t.default_color_mode = 0x%x\n", (*ptr).default_color_mode); + debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); + debug_printf("\t\t.disable = 0x%x\n", (*ptr).disable); +} + +void +brw_dump_ss1(const struct brw_ss1 *ptr) +{ + debug_printf("\t\t.r_wrap_mode = 0x%x\n", (*ptr).r_wrap_mode); + debug_printf("\t\t.t_wrap_mode = 0x%x\n", (*ptr).t_wrap_mode); + debug_printf("\t\t.s_wrap_mode = 0x%x\n", (*ptr).s_wrap_mode); + debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); + debug_printf("\t\t.max_lod = 0x%x\n", (*ptr).max_lod); + debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod); +} + +void +brw_dump_ss2(const struct brw_ss2 *ptr) +{ + debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); + debug_printf("\t\t.default_color_pointer = 0x%x\n", (*ptr).default_color_pointer); +} + +void +brw_dump_ss3(const struct brw_ss3 *ptr) +{ + debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); + debug_printf("\t\t.max_aniso = 0x%x\n", (*ptr).max_aniso); + debug_printf("\t\t.chroma_key_mode = 0x%x\n", (*ptr).chroma_key_mode); + debug_printf("\t\t.chroma_key_index = 0x%x\n", (*ptr).chroma_key_index); + debug_printf("\t\t.chroma_key_enable = 0x%x\n", (*ptr).chroma_key_enable); + debug_printf("\t\t.monochrome_filter_width = 0x%x\n", (*ptr).monochrome_filter_width); + debug_printf("\t\t.monochrome_filter_height = 0x%x\n", (*ptr).monochrome_filter_height); +} + +void +brw_dump_state_base_address(const struct brw_state_base_address *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.modify_enable = 0x%x\n", (*ptr).bits0.modify_enable); + debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); + debug_printf("\t\t.bits0.general_state_address = 0x%x\n", (*ptr).bits0.general_state_address); + debug_printf("\t\t.bits1.modify_enable = 0x%x\n", (*ptr).bits1.modify_enable); + debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad); + debug_printf("\t\t.bits1.surface_state_address = 0x%x\n", (*ptr).bits1.surface_state_address); + debug_printf("\t\t.bits2.modify_enable = 0x%x\n", (*ptr).bits2.modify_enable); + debug_printf("\t\t.bits2.pad = 0x%x\n", (*ptr).bits2.pad); + debug_printf("\t\t.bits2.indirect_object_state_address = 0x%x\n", (*ptr).bits2.indirect_object_state_address); + debug_printf("\t\t.bits3.modify_enable = 0x%x\n", (*ptr).bits3.modify_enable); + debug_printf("\t\t.bits3.pad = 0x%x\n", (*ptr).bits3.pad); + debug_printf("\t\t.bits3.general_state_upper_bound = 0x%x\n", (*ptr).bits3.general_state_upper_bound); + debug_printf("\t\t.bits4.modify_enable = 0x%x\n", (*ptr).bits4.modify_enable); + debug_printf("\t\t.bits4.pad = 0x%x\n", (*ptr).bits4.pad); + debug_printf("\t\t.bits4.indirect_object_state_upper_bound = 0x%x\n", (*ptr).bits4.indirect_object_state_upper_bound); +} + +void +brw_dump_state_prefetch(const struct brw_state_prefetch *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.prefetch_count = 0x%x\n", (*ptr).bits0.prefetch_count); + debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); + debug_printf("\t\t.bits0.prefetch_pointer = 0x%x\n", (*ptr).bits0.prefetch_pointer); +} + +void +brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr) +{ + debug_printf("\t\t.cube_pos_z = 0x%x\n", (*ptr).cube_pos_z); + debug_printf("\t\t.cube_neg_z = 0x%x\n", (*ptr).cube_neg_z); + debug_printf("\t\t.cube_pos_y = 0x%x\n", (*ptr).cube_pos_y); + debug_printf("\t\t.cube_neg_y = 0x%x\n", (*ptr).cube_neg_y); + debug_printf("\t\t.cube_pos_x = 0x%x\n", (*ptr).cube_pos_x); + debug_printf("\t\t.cube_neg_x = 0x%x\n", (*ptr).cube_neg_x); + debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); + debug_printf("\t\t.mipmap_layout_mode = 0x%x\n", (*ptr).mipmap_layout_mode); + debug_printf("\t\t.vert_line_stride_ofs = 0x%x\n", (*ptr).vert_line_stride_ofs); + debug_printf("\t\t.vert_line_stride = 0x%x\n", (*ptr).vert_line_stride); + debug_printf("\t\t.color_blend = 0x%x\n", (*ptr).color_blend); + debug_printf("\t\t.writedisable_blue = 0x%x\n", (*ptr).writedisable_blue); + debug_printf("\t\t.writedisable_green = 0x%x\n", (*ptr).writedisable_green); + debug_printf("\t\t.writedisable_red = 0x%x\n", (*ptr).writedisable_red); + debug_printf("\t\t.writedisable_alpha = 0x%x\n", (*ptr).writedisable_alpha); + debug_printf("\t\t.surface_format = 0x%x\n", (*ptr).surface_format); + debug_printf("\t\t.data_return_format = 0x%x\n", (*ptr).data_return_format); + debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); + debug_printf("\t\t.surface_type = 0x%x\n", (*ptr).surface_type); +} + +void +brw_dump_surf_ss1(const struct brw_surf_ss1 *ptr) +{ + debug_printf("\t\t.base_addr = 0x%x\n", (*ptr).base_addr); +} + +void +brw_dump_surf_ss2(const struct brw_surf_ss2 *ptr) +{ + debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); + debug_printf("\t\t.mip_count = 0x%x\n", (*ptr).mip_count); + debug_printf("\t\t.width = 0x%x\n", (*ptr).width); + debug_printf("\t\t.height = 0x%x\n", (*ptr).height); +} + +void +brw_dump_surf_ss3(const struct brw_surf_ss3 *ptr) +{ + debug_printf("\t\t.tile_walk = 0x%x\n", (*ptr).tile_walk); + debug_printf("\t\t.tiled_surface = 0x%x\n", (*ptr).tiled_surface); + debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); + debug_printf("\t\t.pitch = 0x%x\n", (*ptr).pitch); + debug_printf("\t\t.depth = 0x%x\n", (*ptr).depth); +} + +void +brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr) +{ + debug_printf("\t\t.multisample_position_palette_index = 0x%x\n", (*ptr).multisample_position_palette_index); + debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1); + debug_printf("\t\t.num_multisamples = 0x%x\n", (*ptr).num_multisamples); + debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); + debug_printf("\t\t.render_target_view_extent = 0x%x\n", (*ptr).render_target_view_extent); + debug_printf("\t\t.min_array_elt = 0x%x\n", (*ptr).min_array_elt); + debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod); +} + +void +brw_dump_surf_ss5(const struct brw_surf_ss5 *ptr) +{ + debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1); + debug_printf("\t\t.llc_mapping = 0x%x\n", (*ptr).llc_mapping); + debug_printf("\t\t.mlc_mapping = 0x%x\n", (*ptr).mlc_mapping); + debug_printf("\t\t.gfdt = 0x%x\n", (*ptr).gfdt); + debug_printf("\t\t.gfdt_src = 0x%x\n", (*ptr).gfdt_src); + debug_printf("\t\t.y_offset = 0x%x\n", (*ptr).y_offset); + debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); + debug_printf("\t\t.x_offset = 0x%x\n", (*ptr).x_offset); +} + +void +brw_dump_surface_state(const struct brw_surface_state *ptr) +{ + debug_printf("\t\t.ss0.cube_pos_z = 0x%x\n", (*ptr).ss0.cube_pos_z); + debug_printf("\t\t.ss0.cube_neg_z = 0x%x\n", (*ptr).ss0.cube_neg_z); + debug_printf("\t\t.ss0.cube_pos_y = 0x%x\n", (*ptr).ss0.cube_pos_y); + debug_printf("\t\t.ss0.cube_neg_y = 0x%x\n", (*ptr).ss0.cube_neg_y); + debug_printf("\t\t.ss0.cube_pos_x = 0x%x\n", (*ptr).ss0.cube_pos_x); + debug_printf("\t\t.ss0.cube_neg_x = 0x%x\n", (*ptr).ss0.cube_neg_x); + debug_printf("\t\t.ss0.pad = 0x%x\n", (*ptr).ss0.pad); + debug_printf("\t\t.ss0.mipmap_layout_mode = 0x%x\n", (*ptr).ss0.mipmap_layout_mode); + debug_printf("\t\t.ss0.vert_line_stride_ofs = 0x%x\n", (*ptr).ss0.vert_line_stride_ofs); + debug_printf("\t\t.ss0.vert_line_stride = 0x%x\n", (*ptr).ss0.vert_line_stride); + debug_printf("\t\t.ss0.color_blend = 0x%x\n", (*ptr).ss0.color_blend); + debug_printf("\t\t.ss0.writedisable_blue = 0x%x\n", (*ptr).ss0.writedisable_blue); + debug_printf("\t\t.ss0.writedisable_green = 0x%x\n", (*ptr).ss0.writedisable_green); + debug_printf("\t\t.ss0.writedisable_red = 0x%x\n", (*ptr).ss0.writedisable_red); + debug_printf("\t\t.ss0.writedisable_alpha = 0x%x\n", (*ptr).ss0.writedisable_alpha); + debug_printf("\t\t.ss0.surface_format = 0x%x\n", (*ptr).ss0.surface_format); + debug_printf("\t\t.ss0.data_return_format = 0x%x\n", (*ptr).ss0.data_return_format); + debug_printf("\t\t.ss0.pad0 = 0x%x\n", (*ptr).ss0.pad0); + debug_printf("\t\t.ss0.surface_type = 0x%x\n", (*ptr).ss0.surface_type); + debug_printf("\t\t.ss1.base_addr = 0x%x\n", (*ptr).ss1.base_addr); + debug_printf("\t\t.ss2.pad = 0x%x\n", (*ptr).ss2.pad); + debug_printf("\t\t.ss2.mip_count = 0x%x\n", (*ptr).ss2.mip_count); + debug_printf("\t\t.ss2.width = 0x%x\n", (*ptr).ss2.width); + debug_printf("\t\t.ss2.height = 0x%x\n", (*ptr).ss2.height); + debug_printf("\t\t.ss3.tile_walk = 0x%x\n", (*ptr).ss3.tile_walk); + debug_printf("\t\t.ss3.tiled_surface = 0x%x\n", (*ptr).ss3.tiled_surface); + debug_printf("\t\t.ss3.pad = 0x%x\n", (*ptr).ss3.pad); + debug_printf("\t\t.ss3.pitch = 0x%x\n", (*ptr).ss3.pitch); + debug_printf("\t\t.ss3.depth = 0x%x\n", (*ptr).ss3.depth); + debug_printf("\t\t.ss4.multisample_position_palette_index = 0x%x\n", (*ptr).ss4.multisample_position_palette_index); + debug_printf("\t\t.ss4.pad1 = 0x%x\n", (*ptr).ss4.pad1); + debug_printf("\t\t.ss4.num_multisamples = 0x%x\n", (*ptr).ss4.num_multisamples); + debug_printf("\t\t.ss4.pad0 = 0x%x\n", (*ptr).ss4.pad0); + debug_printf("\t\t.ss4.render_target_view_extent = 0x%x\n", (*ptr).ss4.render_target_view_extent); + debug_printf("\t\t.ss4.min_array_elt = 0x%x\n", (*ptr).ss4.min_array_elt); + debug_printf("\t\t.ss4.min_lod = 0x%x\n", (*ptr).ss4.min_lod); + debug_printf("\t\t.ss5.pad1 = 0x%x\n", (*ptr).ss5.pad1); + debug_printf("\t\t.ss5.llc_mapping = 0x%x\n", (*ptr).ss5.llc_mapping); + debug_printf("\t\t.ss5.mlc_mapping = 0x%x\n", (*ptr).ss5.mlc_mapping); + debug_printf("\t\t.ss5.gfdt = 0x%x\n", (*ptr).ss5.gfdt); + debug_printf("\t\t.ss5.gfdt_src = 0x%x\n", (*ptr).ss5.gfdt_src); + debug_printf("\t\t.ss5.y_offset = 0x%x\n", (*ptr).ss5.y_offset); + debug_printf("\t\t.ss5.pad0 = 0x%x\n", (*ptr).ss5.pad0); + debug_printf("\t\t.ss5.x_offset = 0x%x\n", (*ptr).ss5.x_offset); +} + +void +brw_dump_system_instruction_pointer(const struct brw_system_instruction_pointer *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); + debug_printf("\t\t.bits0.system_instruction_pointer = 0x%x\n", (*ptr).bits0.system_instruction_pointer); +} + +void +brw_dump_urb_fence(const struct brw_urb_fence *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.vs_realloc = 0x%x\n", (*ptr).header.vs_realloc); + debug_printf("\t\t.header.gs_realloc = 0x%x\n", (*ptr).header.gs_realloc); + debug_printf("\t\t.header.clp_realloc = 0x%x\n", (*ptr).header.clp_realloc); + debug_printf("\t\t.header.sf_realloc = 0x%x\n", (*ptr).header.sf_realloc); + debug_printf("\t\t.header.vfe_realloc = 0x%x\n", (*ptr).header.vfe_realloc); + debug_printf("\t\t.header.cs_realloc = 0x%x\n", (*ptr).header.cs_realloc); + debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.bits0.vs_fence = 0x%x\n", (*ptr).bits0.vs_fence); + debug_printf("\t\t.bits0.gs_fence = 0x%x\n", (*ptr).bits0.gs_fence); + debug_printf("\t\t.bits0.clp_fence = 0x%x\n", (*ptr).bits0.clp_fence); + debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); + debug_printf("\t\t.bits1.sf_fence = 0x%x\n", (*ptr).bits1.sf_fence); + debug_printf("\t\t.bits1.vf_fence = 0x%x\n", (*ptr).bits1.vf_fence); + debug_printf("\t\t.bits1.cs_fence = 0x%x\n", (*ptr).bits1.cs_fence); + debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad); +} + +void +brw_dump_urb_immediate(const struct brw_urb_immediate *ptr) +{ + debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode); + debug_printf("\t\t.offset = 0x%x\n", (*ptr).offset); + debug_printf("\t\t.swizzle_control = 0x%x\n", (*ptr).swizzle_control); + debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); + debug_printf("\t\t.allocate = 0x%x\n", (*ptr).allocate); + debug_printf("\t\t.used = 0x%x\n", (*ptr).used); + debug_printf("\t\t.complete = 0x%x\n", (*ptr).complete); + debug_printf("\t\t.response_length = 0x%x\n", (*ptr).response_length); + debug_printf("\t\t.msg_length = 0x%x\n", (*ptr).msg_length); + debug_printf("\t\t.msg_target = 0x%x\n", (*ptr).msg_target); + debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1); + debug_printf("\t\t.end_of_thread = 0x%x\n", (*ptr).end_of_thread); +} + +void +brw_dump_vb_array_state(const struct brw_vb_array_state *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.vb[0].vb0.pitch = 0x%x\n", (*ptr).vb[0].vb0.pitch); + debug_printf("\t\t.vb[0].vb0.pad = 0x%x\n", (*ptr).vb[0].vb0.pad); + debug_printf("\t\t.vb[0].vb0.access_type = 0x%x\n", (*ptr).vb[0].vb0.access_type); + debug_printf("\t\t.vb[0].vb0.vb_index = 0x%x\n", (*ptr).vb[0].vb0.vb_index); + debug_printf("\t\t.vb[0].start_addr = 0x%x\n", (*ptr).vb[0].start_addr); + debug_printf("\t\t.vb[0].max_index = 0x%x\n", (*ptr).vb[0].max_index); + debug_printf("\t\t.vb[0].instance_data_step_rate = 0x%x\n", (*ptr).vb[0].instance_data_step_rate); + debug_printf("\t\t.vb[1].vb0.pitch = 0x%x\n", (*ptr).vb[1].vb0.pitch); + debug_printf("\t\t.vb[1].vb0.pad = 0x%x\n", (*ptr).vb[1].vb0.pad); + debug_printf("\t\t.vb[1].vb0.access_type = 0x%x\n", (*ptr).vb[1].vb0.access_type); + debug_printf("\t\t.vb[1].vb0.vb_index = 0x%x\n", (*ptr).vb[1].vb0.vb_index); + debug_printf("\t\t.vb[1].start_addr = 0x%x\n", (*ptr).vb[1].start_addr); + debug_printf("\t\t.vb[1].max_index = 0x%x\n", (*ptr).vb[1].max_index); + debug_printf("\t\t.vb[1].instance_data_step_rate = 0x%x\n", (*ptr).vb[1].instance_data_step_rate); + debug_printf("\t\t.vb[2].vb0.pitch = 0x%x\n", (*ptr).vb[2].vb0.pitch); + debug_printf("\t\t.vb[2].vb0.pad = 0x%x\n", (*ptr).vb[2].vb0.pad); + debug_printf("\t\t.vb[2].vb0.access_type = 0x%x\n", (*ptr).vb[2].vb0.access_type); + debug_printf("\t\t.vb[2].vb0.vb_index = 0x%x\n", (*ptr).vb[2].vb0.vb_index); + debug_printf("\t\t.vb[2].start_addr = 0x%x\n", (*ptr).vb[2].start_addr); + debug_printf("\t\t.vb[2].max_index = 0x%x\n", (*ptr).vb[2].max_index); + debug_printf("\t\t.vb[2].instance_data_step_rate = 0x%x\n", (*ptr).vb[2].instance_data_step_rate); + debug_printf("\t\t.vb[3].vb0.pitch = 0x%x\n", (*ptr).vb[3].vb0.pitch); + debug_printf("\t\t.vb[3].vb0.pad = 0x%x\n", (*ptr).vb[3].vb0.pad); + debug_printf("\t\t.vb[3].vb0.access_type = 0x%x\n", (*ptr).vb[3].vb0.access_type); + debug_printf("\t\t.vb[3].vb0.vb_index = 0x%x\n", (*ptr).vb[3].vb0.vb_index); + debug_printf("\t\t.vb[3].start_addr = 0x%x\n", (*ptr).vb[3].start_addr); + debug_printf("\t\t.vb[3].max_index = 0x%x\n", (*ptr).vb[3].max_index); + debug_printf("\t\t.vb[3].instance_data_step_rate = 0x%x\n", (*ptr).vb[3].instance_data_step_rate); + debug_printf("\t\t.vb[4].vb0.pitch = 0x%x\n", (*ptr).vb[4].vb0.pitch); + debug_printf("\t\t.vb[4].vb0.pad = 0x%x\n", (*ptr).vb[4].vb0.pad); + debug_printf("\t\t.vb[4].vb0.access_type = 0x%x\n", (*ptr).vb[4].vb0.access_type); + debug_printf("\t\t.vb[4].vb0.vb_index = 0x%x\n", (*ptr).vb[4].vb0.vb_index); + debug_printf("\t\t.vb[4].start_addr = 0x%x\n", (*ptr).vb[4].start_addr); + debug_printf("\t\t.vb[4].max_index = 0x%x\n", (*ptr).vb[4].max_index); + debug_printf("\t\t.vb[4].instance_data_step_rate = 0x%x\n", (*ptr).vb[4].instance_data_step_rate); + debug_printf("\t\t.vb[5].vb0.pitch = 0x%x\n", (*ptr).vb[5].vb0.pitch); + debug_printf("\t\t.vb[5].vb0.pad = 0x%x\n", (*ptr).vb[5].vb0.pad); + debug_printf("\t\t.vb[5].vb0.access_type = 0x%x\n", (*ptr).vb[5].vb0.access_type); + debug_printf("\t\t.vb[5].vb0.vb_index = 0x%x\n", (*ptr).vb[5].vb0.vb_index); + debug_printf("\t\t.vb[5].start_addr = 0x%x\n", (*ptr).vb[5].start_addr); + debug_printf("\t\t.vb[5].max_index = 0x%x\n", (*ptr).vb[5].max_index); + debug_printf("\t\t.vb[5].instance_data_step_rate = 0x%x\n", (*ptr).vb[5].instance_data_step_rate); + debug_printf("\t\t.vb[6].vb0.pitch = 0x%x\n", (*ptr).vb[6].vb0.pitch); + debug_printf("\t\t.vb[6].vb0.pad = 0x%x\n", (*ptr).vb[6].vb0.pad); + debug_printf("\t\t.vb[6].vb0.access_type = 0x%x\n", (*ptr).vb[6].vb0.access_type); + debug_printf("\t\t.vb[6].vb0.vb_index = 0x%x\n", (*ptr).vb[6].vb0.vb_index); + debug_printf("\t\t.vb[6].start_addr = 0x%x\n", (*ptr).vb[6].start_addr); + debug_printf("\t\t.vb[6].max_index = 0x%x\n", (*ptr).vb[6].max_index); + debug_printf("\t\t.vb[6].instance_data_step_rate = 0x%x\n", (*ptr).vb[6].instance_data_step_rate); + debug_printf("\t\t.vb[7].vb0.pitch = 0x%x\n", (*ptr).vb[7].vb0.pitch); + debug_printf("\t\t.vb[7].vb0.pad = 0x%x\n", (*ptr).vb[7].vb0.pad); + debug_printf("\t\t.vb[7].vb0.access_type = 0x%x\n", (*ptr).vb[7].vb0.access_type); + debug_printf("\t\t.vb[7].vb0.vb_index = 0x%x\n", (*ptr).vb[7].vb0.vb_index); + debug_printf("\t\t.vb[7].start_addr = 0x%x\n", (*ptr).vb[7].start_addr); + debug_printf("\t\t.vb[7].max_index = 0x%x\n", (*ptr).vb[7].max_index); + debug_printf("\t\t.vb[7].instance_data_step_rate = 0x%x\n", (*ptr).vb[7].instance_data_step_rate); + debug_printf("\t\t.vb[8].vb0.pitch = 0x%x\n", (*ptr).vb[8].vb0.pitch); + debug_printf("\t\t.vb[8].vb0.pad = 0x%x\n", (*ptr).vb[8].vb0.pad); + debug_printf("\t\t.vb[8].vb0.access_type = 0x%x\n", (*ptr).vb[8].vb0.access_type); + debug_printf("\t\t.vb[8].vb0.vb_index = 0x%x\n", (*ptr).vb[8].vb0.vb_index); + debug_printf("\t\t.vb[8].start_addr = 0x%x\n", (*ptr).vb[8].start_addr); + debug_printf("\t\t.vb[8].max_index = 0x%x\n", (*ptr).vb[8].max_index); + debug_printf("\t\t.vb[8].instance_data_step_rate = 0x%x\n", (*ptr).vb[8].instance_data_step_rate); + debug_printf("\t\t.vb[9].vb0.pitch = 0x%x\n", (*ptr).vb[9].vb0.pitch); + debug_printf("\t\t.vb[9].vb0.pad = 0x%x\n", (*ptr).vb[9].vb0.pad); + debug_printf("\t\t.vb[9].vb0.access_type = 0x%x\n", (*ptr).vb[9].vb0.access_type); + debug_printf("\t\t.vb[9].vb0.vb_index = 0x%x\n", (*ptr).vb[9].vb0.vb_index); + debug_printf("\t\t.vb[9].start_addr = 0x%x\n", (*ptr).vb[9].start_addr); + debug_printf("\t\t.vb[9].max_index = 0x%x\n", (*ptr).vb[9].max_index); + debug_printf("\t\t.vb[9].instance_data_step_rate = 0x%x\n", (*ptr).vb[9].instance_data_step_rate); + debug_printf("\t\t.vb[10].vb0.pitch = 0x%x\n", (*ptr).vb[10].vb0.pitch); + debug_printf("\t\t.vb[10].vb0.pad = 0x%x\n", (*ptr).vb[10].vb0.pad); + debug_printf("\t\t.vb[10].vb0.access_type = 0x%x\n", (*ptr).vb[10].vb0.access_type); + debug_printf("\t\t.vb[10].vb0.vb_index = 0x%x\n", (*ptr).vb[10].vb0.vb_index); + debug_printf("\t\t.vb[10].start_addr = 0x%x\n", (*ptr).vb[10].start_addr); + debug_printf("\t\t.vb[10].max_index = 0x%x\n", (*ptr).vb[10].max_index); + debug_printf("\t\t.vb[10].instance_data_step_rate = 0x%x\n", (*ptr).vb[10].instance_data_step_rate); + debug_printf("\t\t.vb[11].vb0.pitch = 0x%x\n", (*ptr).vb[11].vb0.pitch); + debug_printf("\t\t.vb[11].vb0.pad = 0x%x\n", (*ptr).vb[11].vb0.pad); + debug_printf("\t\t.vb[11].vb0.access_type = 0x%x\n", (*ptr).vb[11].vb0.access_type); + debug_printf("\t\t.vb[11].vb0.vb_index = 0x%x\n", (*ptr).vb[11].vb0.vb_index); + debug_printf("\t\t.vb[11].start_addr = 0x%x\n", (*ptr).vb[11].start_addr); + debug_printf("\t\t.vb[11].max_index = 0x%x\n", (*ptr).vb[11].max_index); + debug_printf("\t\t.vb[11].instance_data_step_rate = 0x%x\n", (*ptr).vb[11].instance_data_step_rate); + debug_printf("\t\t.vb[12].vb0.pitch = 0x%x\n", (*ptr).vb[12].vb0.pitch); + debug_printf("\t\t.vb[12].vb0.pad = 0x%x\n", (*ptr).vb[12].vb0.pad); + debug_printf("\t\t.vb[12].vb0.access_type = 0x%x\n", (*ptr).vb[12].vb0.access_type); + debug_printf("\t\t.vb[12].vb0.vb_index = 0x%x\n", (*ptr).vb[12].vb0.vb_index); + debug_printf("\t\t.vb[12].start_addr = 0x%x\n", (*ptr).vb[12].start_addr); + debug_printf("\t\t.vb[12].max_index = 0x%x\n", (*ptr).vb[12].max_index); + debug_printf("\t\t.vb[12].instance_data_step_rate = 0x%x\n", (*ptr).vb[12].instance_data_step_rate); + debug_printf("\t\t.vb[13].vb0.pitch = 0x%x\n", (*ptr).vb[13].vb0.pitch); + debug_printf("\t\t.vb[13].vb0.pad = 0x%x\n", (*ptr).vb[13].vb0.pad); + debug_printf("\t\t.vb[13].vb0.access_type = 0x%x\n", (*ptr).vb[13].vb0.access_type); + debug_printf("\t\t.vb[13].vb0.vb_index = 0x%x\n", (*ptr).vb[13].vb0.vb_index); + debug_printf("\t\t.vb[13].start_addr = 0x%x\n", (*ptr).vb[13].start_addr); + debug_printf("\t\t.vb[13].max_index = 0x%x\n", (*ptr).vb[13].max_index); + debug_printf("\t\t.vb[13].instance_data_step_rate = 0x%x\n", (*ptr).vb[13].instance_data_step_rate); + debug_printf("\t\t.vb[14].vb0.pitch = 0x%x\n", (*ptr).vb[14].vb0.pitch); + debug_printf("\t\t.vb[14].vb0.pad = 0x%x\n", (*ptr).vb[14].vb0.pad); + debug_printf("\t\t.vb[14].vb0.access_type = 0x%x\n", (*ptr).vb[14].vb0.access_type); + debug_printf("\t\t.vb[14].vb0.vb_index = 0x%x\n", (*ptr).vb[14].vb0.vb_index); + debug_printf("\t\t.vb[14].start_addr = 0x%x\n", (*ptr).vb[14].start_addr); + debug_printf("\t\t.vb[14].max_index = 0x%x\n", (*ptr).vb[14].max_index); + debug_printf("\t\t.vb[14].instance_data_step_rate = 0x%x\n", (*ptr).vb[14].instance_data_step_rate); + debug_printf("\t\t.vb[15].vb0.pitch = 0x%x\n", (*ptr).vb[15].vb0.pitch); + debug_printf("\t\t.vb[15].vb0.pad = 0x%x\n", (*ptr).vb[15].vb0.pad); + debug_printf("\t\t.vb[15].vb0.access_type = 0x%x\n", (*ptr).vb[15].vb0.access_type); + debug_printf("\t\t.vb[15].vb0.vb_index = 0x%x\n", (*ptr).vb[15].vb0.vb_index); + debug_printf("\t\t.vb[15].start_addr = 0x%x\n", (*ptr).vb[15].start_addr); + debug_printf("\t\t.vb[15].max_index = 0x%x\n", (*ptr).vb[15].max_index); + debug_printf("\t\t.vb[15].instance_data_step_rate = 0x%x\n", (*ptr).vb[15].instance_data_step_rate); + debug_printf("\t\t.vb[16].vb0.pitch = 0x%x\n", (*ptr).vb[16].vb0.pitch); + debug_printf("\t\t.vb[16].vb0.pad = 0x%x\n", (*ptr).vb[16].vb0.pad); + debug_printf("\t\t.vb[16].vb0.access_type = 0x%x\n", (*ptr).vb[16].vb0.access_type); + debug_printf("\t\t.vb[16].vb0.vb_index = 0x%x\n", (*ptr).vb[16].vb0.vb_index); + debug_printf("\t\t.vb[16].start_addr = 0x%x\n", (*ptr).vb[16].start_addr); + debug_printf("\t\t.vb[16].max_index = 0x%x\n", (*ptr).vb[16].max_index); + debug_printf("\t\t.vb[16].instance_data_step_rate = 0x%x\n", (*ptr).vb[16].instance_data_step_rate); +} + +void +brw_dump_vertex_buffer_state(const struct brw_vertex_buffer_state *ptr) +{ + debug_printf("\t\t.vb0.pitch = 0x%x\n", (*ptr).vb0.pitch); + debug_printf("\t\t.vb0.pad = 0x%x\n", (*ptr).vb0.pad); + debug_printf("\t\t.vb0.access_type = 0x%x\n", (*ptr).vb0.access_type); + debug_printf("\t\t.vb0.vb_index = 0x%x\n", (*ptr).vb0.vb_index); + debug_printf("\t\t.start_addr = 0x%x\n", (*ptr).start_addr); + debug_printf("\t\t.max_index = 0x%x\n", (*ptr).max_index); + debug_printf("\t\t.instance_data_step_rate = 0x%x\n", (*ptr).instance_data_step_rate); +} + +void +brw_dump_vertex_element_packet(const struct brw_vertex_element_packet *ptr) +{ + debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); + debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); + debug_printf("\t\t.ve[0].ve0.src_offset = 0x%x\n", (*ptr).ve[0].ve0.src_offset); + debug_printf("\t\t.ve[0].ve0.pad = 0x%x\n", (*ptr).ve[0].ve0.pad); + debug_printf("\t\t.ve[0].ve0.src_format = 0x%x\n", (*ptr).ve[0].ve0.src_format); + debug_printf("\t\t.ve[0].ve0.pad0 = 0x%x\n", (*ptr).ve[0].ve0.pad0); + debug_printf("\t\t.ve[0].ve0.valid = 0x%x\n", (*ptr).ve[0].ve0.valid); + debug_printf("\t\t.ve[0].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[0].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[0].ve1.dst_offset = 0x%x\n", (*ptr).ve[0].ve1.dst_offset); + debug_printf("\t\t.ve[0].ve1.pad = 0x%x\n", (*ptr).ve[0].ve1.pad); + debug_printf("\t\t.ve[0].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent3); + debug_printf("\t\t.ve[0].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent2); + debug_printf("\t\t.ve[0].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent1); + debug_printf("\t\t.ve[0].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent0); + debug_printf("\t\t.ve[1].ve0.src_offset = 0x%x\n", (*ptr).ve[1].ve0.src_offset); + debug_printf("\t\t.ve[1].ve0.pad = 0x%x\n", (*ptr).ve[1].ve0.pad); + debug_printf("\t\t.ve[1].ve0.src_format = 0x%x\n", (*ptr).ve[1].ve0.src_format); + debug_printf("\t\t.ve[1].ve0.pad0 = 0x%x\n", (*ptr).ve[1].ve0.pad0); + debug_printf("\t\t.ve[1].ve0.valid = 0x%x\n", (*ptr).ve[1].ve0.valid); + debug_printf("\t\t.ve[1].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[1].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[1].ve1.dst_offset = 0x%x\n", (*ptr).ve[1].ve1.dst_offset); + debug_printf("\t\t.ve[1].ve1.pad = 0x%x\n", (*ptr).ve[1].ve1.pad); + debug_printf("\t\t.ve[1].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent3); + debug_printf("\t\t.ve[1].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent2); + debug_printf("\t\t.ve[1].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent1); + debug_printf("\t\t.ve[1].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent0); + debug_printf("\t\t.ve[2].ve0.src_offset = 0x%x\n", (*ptr).ve[2].ve0.src_offset); + debug_printf("\t\t.ve[2].ve0.pad = 0x%x\n", (*ptr).ve[2].ve0.pad); + debug_printf("\t\t.ve[2].ve0.src_format = 0x%x\n", (*ptr).ve[2].ve0.src_format); + debug_printf("\t\t.ve[2].ve0.pad0 = 0x%x\n", (*ptr).ve[2].ve0.pad0); + debug_printf("\t\t.ve[2].ve0.valid = 0x%x\n", (*ptr).ve[2].ve0.valid); + debug_printf("\t\t.ve[2].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[2].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[2].ve1.dst_offset = 0x%x\n", (*ptr).ve[2].ve1.dst_offset); + debug_printf("\t\t.ve[2].ve1.pad = 0x%x\n", (*ptr).ve[2].ve1.pad); + debug_printf("\t\t.ve[2].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent3); + debug_printf("\t\t.ve[2].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent2); + debug_printf("\t\t.ve[2].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent1); + debug_printf("\t\t.ve[2].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent0); + debug_printf("\t\t.ve[3].ve0.src_offset = 0x%x\n", (*ptr).ve[3].ve0.src_offset); + debug_printf("\t\t.ve[3].ve0.pad = 0x%x\n", (*ptr).ve[3].ve0.pad); + debug_printf("\t\t.ve[3].ve0.src_format = 0x%x\n", (*ptr).ve[3].ve0.src_format); + debug_printf("\t\t.ve[3].ve0.pad0 = 0x%x\n", (*ptr).ve[3].ve0.pad0); + debug_printf("\t\t.ve[3].ve0.valid = 0x%x\n", (*ptr).ve[3].ve0.valid); + debug_printf("\t\t.ve[3].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[3].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[3].ve1.dst_offset = 0x%x\n", (*ptr).ve[3].ve1.dst_offset); + debug_printf("\t\t.ve[3].ve1.pad = 0x%x\n", (*ptr).ve[3].ve1.pad); + debug_printf("\t\t.ve[3].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent3); + debug_printf("\t\t.ve[3].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent2); + debug_printf("\t\t.ve[3].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent1); + debug_printf("\t\t.ve[3].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent0); + debug_printf("\t\t.ve[4].ve0.src_offset = 0x%x\n", (*ptr).ve[4].ve0.src_offset); + debug_printf("\t\t.ve[4].ve0.pad = 0x%x\n", (*ptr).ve[4].ve0.pad); + debug_printf("\t\t.ve[4].ve0.src_format = 0x%x\n", (*ptr).ve[4].ve0.src_format); + debug_printf("\t\t.ve[4].ve0.pad0 = 0x%x\n", (*ptr).ve[4].ve0.pad0); + debug_printf("\t\t.ve[4].ve0.valid = 0x%x\n", (*ptr).ve[4].ve0.valid); + debug_printf("\t\t.ve[4].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[4].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[4].ve1.dst_offset = 0x%x\n", (*ptr).ve[4].ve1.dst_offset); + debug_printf("\t\t.ve[4].ve1.pad = 0x%x\n", (*ptr).ve[4].ve1.pad); + debug_printf("\t\t.ve[4].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent3); + debug_printf("\t\t.ve[4].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent2); + debug_printf("\t\t.ve[4].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent1); + debug_printf("\t\t.ve[4].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent0); + debug_printf("\t\t.ve[5].ve0.src_offset = 0x%x\n", (*ptr).ve[5].ve0.src_offset); + debug_printf("\t\t.ve[5].ve0.pad = 0x%x\n", (*ptr).ve[5].ve0.pad); + debug_printf("\t\t.ve[5].ve0.src_format = 0x%x\n", (*ptr).ve[5].ve0.src_format); + debug_printf("\t\t.ve[5].ve0.pad0 = 0x%x\n", (*ptr).ve[5].ve0.pad0); + debug_printf("\t\t.ve[5].ve0.valid = 0x%x\n", (*ptr).ve[5].ve0.valid); + debug_printf("\t\t.ve[5].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[5].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[5].ve1.dst_offset = 0x%x\n", (*ptr).ve[5].ve1.dst_offset); + debug_printf("\t\t.ve[5].ve1.pad = 0x%x\n", (*ptr).ve[5].ve1.pad); + debug_printf("\t\t.ve[5].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent3); + debug_printf("\t\t.ve[5].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent2); + debug_printf("\t\t.ve[5].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent1); + debug_printf("\t\t.ve[5].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent0); + debug_printf("\t\t.ve[6].ve0.src_offset = 0x%x\n", (*ptr).ve[6].ve0.src_offset); + debug_printf("\t\t.ve[6].ve0.pad = 0x%x\n", (*ptr).ve[6].ve0.pad); + debug_printf("\t\t.ve[6].ve0.src_format = 0x%x\n", (*ptr).ve[6].ve0.src_format); + debug_printf("\t\t.ve[6].ve0.pad0 = 0x%x\n", (*ptr).ve[6].ve0.pad0); + debug_printf("\t\t.ve[6].ve0.valid = 0x%x\n", (*ptr).ve[6].ve0.valid); + debug_printf("\t\t.ve[6].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[6].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[6].ve1.dst_offset = 0x%x\n", (*ptr).ve[6].ve1.dst_offset); + debug_printf("\t\t.ve[6].ve1.pad = 0x%x\n", (*ptr).ve[6].ve1.pad); + debug_printf("\t\t.ve[6].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent3); + debug_printf("\t\t.ve[6].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent2); + debug_printf("\t\t.ve[6].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent1); + debug_printf("\t\t.ve[6].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent0); + debug_printf("\t\t.ve[7].ve0.src_offset = 0x%x\n", (*ptr).ve[7].ve0.src_offset); + debug_printf("\t\t.ve[7].ve0.pad = 0x%x\n", (*ptr).ve[7].ve0.pad); + debug_printf("\t\t.ve[7].ve0.src_format = 0x%x\n", (*ptr).ve[7].ve0.src_format); + debug_printf("\t\t.ve[7].ve0.pad0 = 0x%x\n", (*ptr).ve[7].ve0.pad0); + debug_printf("\t\t.ve[7].ve0.valid = 0x%x\n", (*ptr).ve[7].ve0.valid); + debug_printf("\t\t.ve[7].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[7].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[7].ve1.dst_offset = 0x%x\n", (*ptr).ve[7].ve1.dst_offset); + debug_printf("\t\t.ve[7].ve1.pad = 0x%x\n", (*ptr).ve[7].ve1.pad); + debug_printf("\t\t.ve[7].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent3); + debug_printf("\t\t.ve[7].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent2); + debug_printf("\t\t.ve[7].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent1); + debug_printf("\t\t.ve[7].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent0); + debug_printf("\t\t.ve[8].ve0.src_offset = 0x%x\n", (*ptr).ve[8].ve0.src_offset); + debug_printf("\t\t.ve[8].ve0.pad = 0x%x\n", (*ptr).ve[8].ve0.pad); + debug_printf("\t\t.ve[8].ve0.src_format = 0x%x\n", (*ptr).ve[8].ve0.src_format); + debug_printf("\t\t.ve[8].ve0.pad0 = 0x%x\n", (*ptr).ve[8].ve0.pad0); + debug_printf("\t\t.ve[8].ve0.valid = 0x%x\n", (*ptr).ve[8].ve0.valid); + debug_printf("\t\t.ve[8].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[8].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[8].ve1.dst_offset = 0x%x\n", (*ptr).ve[8].ve1.dst_offset); + debug_printf("\t\t.ve[8].ve1.pad = 0x%x\n", (*ptr).ve[8].ve1.pad); + debug_printf("\t\t.ve[8].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent3); + debug_printf("\t\t.ve[8].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent2); + debug_printf("\t\t.ve[8].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent1); + debug_printf("\t\t.ve[8].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent0); + debug_printf("\t\t.ve[9].ve0.src_offset = 0x%x\n", (*ptr).ve[9].ve0.src_offset); + debug_printf("\t\t.ve[9].ve0.pad = 0x%x\n", (*ptr).ve[9].ve0.pad); + debug_printf("\t\t.ve[9].ve0.src_format = 0x%x\n", (*ptr).ve[9].ve0.src_format); + debug_printf("\t\t.ve[9].ve0.pad0 = 0x%x\n", (*ptr).ve[9].ve0.pad0); + debug_printf("\t\t.ve[9].ve0.valid = 0x%x\n", (*ptr).ve[9].ve0.valid); + debug_printf("\t\t.ve[9].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[9].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[9].ve1.dst_offset = 0x%x\n", (*ptr).ve[9].ve1.dst_offset); + debug_printf("\t\t.ve[9].ve1.pad = 0x%x\n", (*ptr).ve[9].ve1.pad); + debug_printf("\t\t.ve[9].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent3); + debug_printf("\t\t.ve[9].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent2); + debug_printf("\t\t.ve[9].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent1); + debug_printf("\t\t.ve[9].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent0); + debug_printf("\t\t.ve[10].ve0.src_offset = 0x%x\n", (*ptr).ve[10].ve0.src_offset); + debug_printf("\t\t.ve[10].ve0.pad = 0x%x\n", (*ptr).ve[10].ve0.pad); + debug_printf("\t\t.ve[10].ve0.src_format = 0x%x\n", (*ptr).ve[10].ve0.src_format); + debug_printf("\t\t.ve[10].ve0.pad0 = 0x%x\n", (*ptr).ve[10].ve0.pad0); + debug_printf("\t\t.ve[10].ve0.valid = 0x%x\n", (*ptr).ve[10].ve0.valid); + debug_printf("\t\t.ve[10].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[10].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[10].ve1.dst_offset = 0x%x\n", (*ptr).ve[10].ve1.dst_offset); + debug_printf("\t\t.ve[10].ve1.pad = 0x%x\n", (*ptr).ve[10].ve1.pad); + debug_printf("\t\t.ve[10].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent3); + debug_printf("\t\t.ve[10].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent2); + debug_printf("\t\t.ve[10].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent1); + debug_printf("\t\t.ve[10].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent0); + debug_printf("\t\t.ve[11].ve0.src_offset = 0x%x\n", (*ptr).ve[11].ve0.src_offset); + debug_printf("\t\t.ve[11].ve0.pad = 0x%x\n", (*ptr).ve[11].ve0.pad); + debug_printf("\t\t.ve[11].ve0.src_format = 0x%x\n", (*ptr).ve[11].ve0.src_format); + debug_printf("\t\t.ve[11].ve0.pad0 = 0x%x\n", (*ptr).ve[11].ve0.pad0); + debug_printf("\t\t.ve[11].ve0.valid = 0x%x\n", (*ptr).ve[11].ve0.valid); + debug_printf("\t\t.ve[11].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[11].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[11].ve1.dst_offset = 0x%x\n", (*ptr).ve[11].ve1.dst_offset); + debug_printf("\t\t.ve[11].ve1.pad = 0x%x\n", (*ptr).ve[11].ve1.pad); + debug_printf("\t\t.ve[11].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent3); + debug_printf("\t\t.ve[11].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent2); + debug_printf("\t\t.ve[11].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent1); + debug_printf("\t\t.ve[11].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent0); + debug_printf("\t\t.ve[12].ve0.src_offset = 0x%x\n", (*ptr).ve[12].ve0.src_offset); + debug_printf("\t\t.ve[12].ve0.pad = 0x%x\n", (*ptr).ve[12].ve0.pad); + debug_printf("\t\t.ve[12].ve0.src_format = 0x%x\n", (*ptr).ve[12].ve0.src_format); + debug_printf("\t\t.ve[12].ve0.pad0 = 0x%x\n", (*ptr).ve[12].ve0.pad0); + debug_printf("\t\t.ve[12].ve0.valid = 0x%x\n", (*ptr).ve[12].ve0.valid); + debug_printf("\t\t.ve[12].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[12].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[12].ve1.dst_offset = 0x%x\n", (*ptr).ve[12].ve1.dst_offset); + debug_printf("\t\t.ve[12].ve1.pad = 0x%x\n", (*ptr).ve[12].ve1.pad); + debug_printf("\t\t.ve[12].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent3); + debug_printf("\t\t.ve[12].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent2); + debug_printf("\t\t.ve[12].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent1); + debug_printf("\t\t.ve[12].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent0); + debug_printf("\t\t.ve[13].ve0.src_offset = 0x%x\n", (*ptr).ve[13].ve0.src_offset); + debug_printf("\t\t.ve[13].ve0.pad = 0x%x\n", (*ptr).ve[13].ve0.pad); + debug_printf("\t\t.ve[13].ve0.src_format = 0x%x\n", (*ptr).ve[13].ve0.src_format); + debug_printf("\t\t.ve[13].ve0.pad0 = 0x%x\n", (*ptr).ve[13].ve0.pad0); + debug_printf("\t\t.ve[13].ve0.valid = 0x%x\n", (*ptr).ve[13].ve0.valid); + debug_printf("\t\t.ve[13].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[13].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[13].ve1.dst_offset = 0x%x\n", (*ptr).ve[13].ve1.dst_offset); + debug_printf("\t\t.ve[13].ve1.pad = 0x%x\n", (*ptr).ve[13].ve1.pad); + debug_printf("\t\t.ve[13].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent3); + debug_printf("\t\t.ve[13].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent2); + debug_printf("\t\t.ve[13].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent1); + debug_printf("\t\t.ve[13].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent0); + debug_printf("\t\t.ve[14].ve0.src_offset = 0x%x\n", (*ptr).ve[14].ve0.src_offset); + debug_printf("\t\t.ve[14].ve0.pad = 0x%x\n", (*ptr).ve[14].ve0.pad); + debug_printf("\t\t.ve[14].ve0.src_format = 0x%x\n", (*ptr).ve[14].ve0.src_format); + debug_printf("\t\t.ve[14].ve0.pad0 = 0x%x\n", (*ptr).ve[14].ve0.pad0); + debug_printf("\t\t.ve[14].ve0.valid = 0x%x\n", (*ptr).ve[14].ve0.valid); + debug_printf("\t\t.ve[14].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[14].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[14].ve1.dst_offset = 0x%x\n", (*ptr).ve[14].ve1.dst_offset); + debug_printf("\t\t.ve[14].ve1.pad = 0x%x\n", (*ptr).ve[14].ve1.pad); + debug_printf("\t\t.ve[14].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent3); + debug_printf("\t\t.ve[14].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent2); + debug_printf("\t\t.ve[14].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent1); + debug_printf("\t\t.ve[14].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent0); + debug_printf("\t\t.ve[15].ve0.src_offset = 0x%x\n", (*ptr).ve[15].ve0.src_offset); + debug_printf("\t\t.ve[15].ve0.pad = 0x%x\n", (*ptr).ve[15].ve0.pad); + debug_printf("\t\t.ve[15].ve0.src_format = 0x%x\n", (*ptr).ve[15].ve0.src_format); + debug_printf("\t\t.ve[15].ve0.pad0 = 0x%x\n", (*ptr).ve[15].ve0.pad0); + debug_printf("\t\t.ve[15].ve0.valid = 0x%x\n", (*ptr).ve[15].ve0.valid); + debug_printf("\t\t.ve[15].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[15].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[15].ve1.dst_offset = 0x%x\n", (*ptr).ve[15].ve1.dst_offset); + debug_printf("\t\t.ve[15].ve1.pad = 0x%x\n", (*ptr).ve[15].ve1.pad); + debug_printf("\t\t.ve[15].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent3); + debug_printf("\t\t.ve[15].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent2); + debug_printf("\t\t.ve[15].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent1); + debug_printf("\t\t.ve[15].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent0); + debug_printf("\t\t.ve[16].ve0.src_offset = 0x%x\n", (*ptr).ve[16].ve0.src_offset); + debug_printf("\t\t.ve[16].ve0.pad = 0x%x\n", (*ptr).ve[16].ve0.pad); + debug_printf("\t\t.ve[16].ve0.src_format = 0x%x\n", (*ptr).ve[16].ve0.src_format); + debug_printf("\t\t.ve[16].ve0.pad0 = 0x%x\n", (*ptr).ve[16].ve0.pad0); + debug_printf("\t\t.ve[16].ve0.valid = 0x%x\n", (*ptr).ve[16].ve0.valid); + debug_printf("\t\t.ve[16].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[16].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[16].ve1.dst_offset = 0x%x\n", (*ptr).ve[16].ve1.dst_offset); + debug_printf("\t\t.ve[16].ve1.pad = 0x%x\n", (*ptr).ve[16].ve1.pad); + debug_printf("\t\t.ve[16].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent3); + debug_printf("\t\t.ve[16].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent2); + debug_printf("\t\t.ve[16].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent1); + debug_printf("\t\t.ve[16].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent0); + debug_printf("\t\t.ve[17].ve0.src_offset = 0x%x\n", (*ptr).ve[17].ve0.src_offset); + debug_printf("\t\t.ve[17].ve0.pad = 0x%x\n", (*ptr).ve[17].ve0.pad); + debug_printf("\t\t.ve[17].ve0.src_format = 0x%x\n", (*ptr).ve[17].ve0.src_format); + debug_printf("\t\t.ve[17].ve0.pad0 = 0x%x\n", (*ptr).ve[17].ve0.pad0); + debug_printf("\t\t.ve[17].ve0.valid = 0x%x\n", (*ptr).ve[17].ve0.valid); + debug_printf("\t\t.ve[17].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[17].ve0.vertex_buffer_index); + debug_printf("\t\t.ve[17].ve1.dst_offset = 0x%x\n", (*ptr).ve[17].ve1.dst_offset); + debug_printf("\t\t.ve[17].ve1.pad = 0x%x\n", (*ptr).ve[17].ve1.pad); + debug_printf("\t\t.ve[17].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent3); + debug_printf("\t\t.ve[17].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent2); + debug_printf("\t\t.ve[17].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent1); + debug_printf("\t\t.ve[17].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent0); +} + +void +brw_dump_vertex_element_state(const struct brw_vertex_element_state *ptr) +{ + debug_printf("\t\t.ve0.src_offset = 0x%x\n", (*ptr).ve0.src_offset); + debug_printf("\t\t.ve0.pad = 0x%x\n", (*ptr).ve0.pad); + debug_printf("\t\t.ve0.src_format = 0x%x\n", (*ptr).ve0.src_format); + debug_printf("\t\t.ve0.pad0 = 0x%x\n", (*ptr).ve0.pad0); + debug_printf("\t\t.ve0.valid = 0x%x\n", (*ptr).ve0.valid); + debug_printf("\t\t.ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve0.vertex_buffer_index); + debug_printf("\t\t.ve1.dst_offset = 0x%x\n", (*ptr).ve1.dst_offset); + debug_printf("\t\t.ve1.pad = 0x%x\n", (*ptr).ve1.pad); + debug_printf("\t\t.ve1.vfcomponent3 = 0x%x\n", (*ptr).ve1.vfcomponent3); + debug_printf("\t\t.ve1.vfcomponent2 = 0x%x\n", (*ptr).ve1.vfcomponent2); + debug_printf("\t\t.ve1.vfcomponent1 = 0x%x\n", (*ptr).ve1.vfcomponent1); + debug_printf("\t\t.ve1.vfcomponent0 = 0x%x\n", (*ptr).ve1.vfcomponent0); +} + +void +brw_dump_vf_statistics(const struct brw_vf_statistics *ptr) +{ + debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable); + debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); + debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode); +} + +void +brw_dump_vs_unit_state(const struct brw_vs_unit_state *ptr) +{ + debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0); + debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); + debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1); + debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); + debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable); + debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); + debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); + debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable); + debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); + debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0); + debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset); + debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1); + debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); + debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); + debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); + debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3); + debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); + debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); + debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0); + debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); + debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); + debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); + debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0); + debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); + debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1); + debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); + debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2); + debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); + debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3); + debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0); + debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable); + debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries); + debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1); + debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size); + debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2); + debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads); + debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3); + debug_printf("\t\t.vs5.sampler_count = 0x%x\n", (*ptr).vs5.sampler_count); + debug_printf("\t\t.vs5.pad0 = 0x%x\n", (*ptr).vs5.pad0); + debug_printf("\t\t.vs5.sampler_state_pointer = 0x%x\n", (*ptr).vs5.sampler_state_pointer); + debug_printf("\t\t.vs6.vs_enable = 0x%x\n", (*ptr).vs6.vs_enable); + debug_printf("\t\t.vs6.vert_cache_disable = 0x%x\n", (*ptr).vs6.vert_cache_disable); + debug_printf("\t\t.vs6.pad0 = 0x%x\n", (*ptr).vs6.pad0); +} + +void +brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr) +{ + debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0); + debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); + debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1); + debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); + debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable); + debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); + debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); + debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable); + debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); + debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0); + debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset); + debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1); + debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); + debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); + debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); + debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3); + debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); + debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); + debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0); + debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); + debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); + debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); + debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0); + debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); + debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1); + debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); + debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2); + debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); + debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3); + debug_printf("\t\t.wm4.stats_enable = 0x%x\n", (*ptr).wm4.stats_enable); + debug_printf("\t\t.wm4.depth_buffer_clear = 0x%x\n", (*ptr).wm4.depth_buffer_clear); + debug_printf("\t\t.wm4.sampler_count = 0x%x\n", (*ptr).wm4.sampler_count); + debug_printf("\t\t.wm4.sampler_state_pointer = 0x%x\n", (*ptr).wm4.sampler_state_pointer); + debug_printf("\t\t.wm5.enable_8_pix = 0x%x\n", (*ptr).wm5.enable_8_pix); + debug_printf("\t\t.wm5.enable_16_pix = 0x%x\n", (*ptr).wm5.enable_16_pix); + debug_printf("\t\t.wm5.enable_32_pix = 0x%x\n", (*ptr).wm5.enable_32_pix); + debug_printf("\t\t.wm5.enable_con_32_pix = 0x%x\n", (*ptr).wm5.enable_con_32_pix); + debug_printf("\t\t.wm5.enable_con_64_pix = 0x%x\n", (*ptr).wm5.enable_con_64_pix); + debug_printf("\t\t.wm5.pad0 = 0x%x\n", (*ptr).wm5.pad0); + debug_printf("\t\t.wm5.legacy_global_depth_bias = 0x%x\n", (*ptr).wm5.legacy_global_depth_bias); + debug_printf("\t\t.wm5.line_stipple = 0x%x\n", (*ptr).wm5.line_stipple); + debug_printf("\t\t.wm5.depth_offset = 0x%x\n", (*ptr).wm5.depth_offset); + debug_printf("\t\t.wm5.polygon_stipple = 0x%x\n", (*ptr).wm5.polygon_stipple); + debug_printf("\t\t.wm5.line_aa_region_width = 0x%x\n", (*ptr).wm5.line_aa_region_width); + debug_printf("\t\t.wm5.line_endcap_aa_region_width = 0x%x\n", (*ptr).wm5.line_endcap_aa_region_width); + debug_printf("\t\t.wm5.early_depth_test = 0x%x\n", (*ptr).wm5.early_depth_test); + debug_printf("\t\t.wm5.thread_dispatch_enable = 0x%x\n", (*ptr).wm5.thread_dispatch_enable); + debug_printf("\t\t.wm5.program_uses_depth = 0x%x\n", (*ptr).wm5.program_uses_depth); + debug_printf("\t\t.wm5.program_computes_depth = 0x%x\n", (*ptr).wm5.program_computes_depth); + debug_printf("\t\t.wm5.program_uses_killpixel = 0x%x\n", (*ptr).wm5.program_uses_killpixel); + debug_printf("\t\t.wm5.legacy_line_rast = 0x%x\n", (*ptr).wm5.legacy_line_rast); + debug_printf("\t\t.wm5.transposed_urb_read_enable = 0x%x\n", (*ptr).wm5.transposed_urb_read_enable); + debug_printf("\t\t.wm5.max_threads = 0x%x\n", (*ptr).wm5.max_threads); + debug_printf("\t\t.global_depth_offset_constant = %f\n", (*ptr).global_depth_offset_constant); + debug_printf("\t\t.global_depth_offset_scale = %f\n", (*ptr).global_depth_offset_scale); + debug_printf("\t\t.wm8.pad0 = 0x%x\n", (*ptr).wm8.pad0); + debug_printf("\t\t.wm8.grf_reg_count_1 = 0x%x\n", (*ptr).wm8.grf_reg_count_1); + debug_printf("\t\t.wm8.pad1 = 0x%x\n", (*ptr).wm8.pad1); + debug_printf("\t\t.wm8.kernel_start_pointer_1 = 0x%x\n", (*ptr).wm8.kernel_start_pointer_1); + debug_printf("\t\t.wm9.pad0 = 0x%x\n", (*ptr).wm9.pad0); + debug_printf("\t\t.wm9.grf_reg_count_2 = 0x%x\n", (*ptr).wm9.grf_reg_count_2); + debug_printf("\t\t.wm9.pad1 = 0x%x\n", (*ptr).wm9.pad1); + debug_printf("\t\t.wm9.kernel_start_pointer_2 = 0x%x\n", (*ptr).wm9.kernel_start_pointer_2); + debug_printf("\t\t.wm10.pad0 = 0x%x\n", (*ptr).wm10.pad0); + debug_printf("\t\t.wm10.grf_reg_count_3 = 0x%x\n", (*ptr).wm10.grf_reg_count_3); + debug_printf("\t\t.wm10.pad1 = 0x%x\n", (*ptr).wm10.pad1); + debug_printf("\t\t.wm10.kernel_start_pointer_3 = 0x%x\n", (*ptr).wm10.kernel_start_pointer_3); +} + diff --git a/src/gallium/drivers/i965/brw_structs_dump.h b/src/gallium/drivers/i965/brw_structs_dump.h new file mode 100644 index 0000000000..7c02dbfe33 --- /dev/null +++ b/src/gallium/drivers/i965/brw_structs_dump.h @@ -0,0 +1,276 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +/** + * @file + * Dump i965 data structures. + * + * Generated automatically from brw_structs.h by brw_structs_dump.py. + */ + +#ifndef BRW_STRUCTS_DUMP_H +#define BRW_STRUCTS_DUMP_H + +struct brw_3d_control; +struct brw_3d_primitive; +struct brw_aa_line_parameters; +struct brw_binding_table_pointers; +struct brw_blend_constant_color; +struct brw_cc0; +struct brw_cc1; +struct brw_cc2; +struct brw_cc3; +struct brw_cc4; +struct brw_cc5; +struct brw_cc6; +struct brw_cc7; +struct brw_cc_unit_state; +struct brw_cc_viewport; +struct brw_clip_unit_state; +struct brw_clipper_viewport; +struct brw_constant_buffer; +struct brw_cs_urb_state; +struct brw_depthbuffer; +struct brw_depthbuffer_g4x; +struct brw_drawrect; +struct brw_global_depth_offset_clamp; +struct brw_gs_unit_state; +struct brw_indexbuffer; +struct brw_line_stipple; +struct brw_mi_flush; +struct brw_pipe_control; +struct brw_pipeline_select; +struct brw_pipelined_state_pointers; +struct brw_polygon_stipple; +struct brw_polygon_stipple_offset; +struct brw_sampler_default_color; +struct brw_sampler_state; +struct brw_sf_unit_state; +struct brw_sf_viewport; +struct brw_ss0; +struct brw_ss1; +struct brw_ss2; +struct brw_ss3; +struct brw_state_base_address; +struct brw_state_prefetch; +struct brw_surf_ss0; +struct brw_surf_ss1; +struct brw_surf_ss2; +struct brw_surf_ss3; +struct brw_surf_ss4; +struct brw_surf_ss5; +struct brw_surface_state; +struct brw_system_instruction_pointer; +struct brw_urb_fence; +struct brw_urb_immediate; +struct brw_vb_array_state; +struct brw_vertex_buffer_state; +struct brw_vertex_element_packet; +struct brw_vertex_element_state; +struct brw_vf_statistics; +struct brw_vs_unit_state; +struct brw_wm_unit_state; + +void +brw_dump_3d_control(const struct brw_3d_control *ptr); + +void +brw_dump_3d_primitive(const struct brw_3d_primitive *ptr); + +void +brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr); + +void +brw_dump_binding_table_pointers(const struct brw_binding_table_pointers *ptr); + +void +brw_dump_blend_constant_color(const struct brw_blend_constant_color *ptr); + +void +brw_dump_cc0(const struct brw_cc0 *ptr); + +void +brw_dump_cc1(const struct brw_cc1 *ptr); + +void +brw_dump_cc2(const struct brw_cc2 *ptr); + +void +brw_dump_cc3(const struct brw_cc3 *ptr); + +void +brw_dump_cc4(const struct brw_cc4 *ptr); + +void +brw_dump_cc5(const struct brw_cc5 *ptr); + +void +brw_dump_cc6(const struct brw_cc6 *ptr); + +void +brw_dump_cc7(const struct brw_cc7 *ptr); + +void +brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr); + +void +brw_dump_cc_viewport(const struct brw_cc_viewport *ptr); + +void +brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr); + +void +brw_dump_clipper_viewport(const struct brw_clipper_viewport *ptr); + +void +brw_dump_constant_buffer(const struct brw_constant_buffer *ptr); + +void +brw_dump_cs_urb_state(const struct brw_cs_urb_state *ptr); + +void +brw_dump_depthbuffer(const struct brw_depthbuffer *ptr); + +void +brw_dump_depthbuffer_g4x(const struct brw_depthbuffer_g4x *ptr); + +void +brw_dump_drawrect(const struct brw_drawrect *ptr); + +void +brw_dump_global_depth_offset_clamp(const struct brw_global_depth_offset_clamp *ptr); + +void +brw_dump_gs_unit_state(const struct brw_gs_unit_state *ptr); + +void +brw_dump_indexbuffer(const struct brw_indexbuffer *ptr); + +void +brw_dump_line_stipple(const struct brw_line_stipple *ptr); + +void +brw_dump_mi_flush(const struct brw_mi_flush *ptr); + +void +brw_dump_pipe_control(const struct brw_pipe_control *ptr); + +void +brw_dump_pipeline_select(const struct brw_pipeline_select *ptr); + +void +brw_dump_pipelined_state_pointers(const struct brw_pipelined_state_pointers *ptr); + +void +brw_dump_polygon_stipple(const struct brw_polygon_stipple *ptr); + +void +brw_dump_polygon_stipple_offset(const struct brw_polygon_stipple_offset *ptr); + +void +brw_dump_sampler_default_color(const struct brw_sampler_default_color *ptr); + +void +brw_dump_sampler_state(const struct brw_sampler_state *ptr); + +void +brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr); + +void +brw_dump_sf_viewport(const struct brw_sf_viewport *ptr); + +void +brw_dump_ss0(const struct brw_ss0 *ptr); + +void +brw_dump_ss1(const struct brw_ss1 *ptr); + +void +brw_dump_ss2(const struct brw_ss2 *ptr); + +void +brw_dump_ss3(const struct brw_ss3 *ptr); + +void +brw_dump_state_base_address(const struct brw_state_base_address *ptr); + +void +brw_dump_state_prefetch(const struct brw_state_prefetch *ptr); + +void +brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr); + +void +brw_dump_surf_ss1(const struct brw_surf_ss1 *ptr); + +void +brw_dump_surf_ss2(const struct brw_surf_ss2 *ptr); + +void +brw_dump_surf_ss3(const struct brw_surf_ss3 *ptr); + +void +brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr); + +void +brw_dump_surf_ss5(const struct brw_surf_ss5 *ptr); + +void +brw_dump_surface_state(const struct brw_surface_state *ptr); + +void +brw_dump_system_instruction_pointer(const struct brw_system_instruction_pointer *ptr); + +void +brw_dump_urb_fence(const struct brw_urb_fence *ptr); + +void +brw_dump_urb_immediate(const struct brw_urb_immediate *ptr); + +void +brw_dump_vb_array_state(const struct brw_vb_array_state *ptr); + +void +brw_dump_vertex_buffer_state(const struct brw_vertex_buffer_state *ptr); + +void +brw_dump_vertex_element_packet(const struct brw_vertex_element_packet *ptr); + +void +brw_dump_vertex_element_state(const struct brw_vertex_element_state *ptr); + +void +brw_dump_vf_statistics(const struct brw_vf_statistics *ptr); + +void +brw_dump_vs_unit_state(const struct brw_vs_unit_state *ptr); + +void +brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr); + + +#endif /* BRW_STRUCTS_DUMP_H */ diff --git a/src/gallium/drivers/i965/brw_structs_dump.py b/src/gallium/drivers/i965/brw_structs_dump.py new file mode 100755 index 0000000000..581515878e --- /dev/null +++ b/src/gallium/drivers/i965/brw_structs_dump.py @@ -0,0 +1,284 @@ +#!/usr/bin/env python +''' +Generates dumpers for the i965 state strucutures using pygccxml. + +Run as + + PYTHONPATH=/path/to/pygccxml-1.0.0 python brw_structs_dump.py + +Jose Fonseca +''' + +copyright = ''' +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + ''' + +import os +import sys + +from pygccxml import parser +from pygccxml import declarations + +from pygccxml.declarations import algorithm +from pygccxml.declarations import decl_visitor +from pygccxml.declarations import type_traits +from pygccxml.declarations import type_visitor + + +enums = True + + +class decl_dumper_t(decl_visitor.decl_visitor_t): + + def __init__(self, stream, instance = '', decl = None): + decl_visitor.decl_visitor_t.__init__(self) + self.stream = stream + self._instance = instance + self.decl = decl + + def clone(self): + return decl_dumper_t(self.stream, self._instance, self.decl) + + def visit_class(self): + class_ = self.decl + assert self.decl.class_type in ('struct', 'union') + + for variable in class_.variables(recursive = False): + dump_type(self.stream, self._instance + '.' + variable.name, variable.type) + + def visit_enumeration(self): + if enums: + self.stream.write(' switch(%s) {\n' % ("(*ptr)" + self._instance,)) + for name, value in self.decl.values: + self.stream.write(' case %s:\n' % (name,)) + self.stream.write(' debug_printf("\\t\\t%s = %s\\n");\n' % (self._instance, name)) + self.stream.write(' break;\n') + self.stream.write(' default:\n') + self.stream.write(' debug_printf("\\t\\t%s = %%i\\n", %s);\n' % (self._instance, "(*ptr)" + self._instance)) + self.stream.write(' break;\n') + self.stream.write(' }\n') + else: + self.stream.write(' debug_printf("\\t\\t%s = %%i\\n", %s);\n' % (self._instance, "(*ptr)" + self._instance)) + + +def dump_decl(stream, instance, decl): + dumper = decl_dumper_t(stream, instance, decl) + algorithm.apply_visitor(dumper, decl) + + +class type_dumper_t(type_visitor.type_visitor_t): + + def __init__(self, stream, instance, type_): + type_visitor.type_visitor_t.__init__(self) + self.stream = stream + self.instance = instance + self.type = type_ + + def clone(self): + return type_dumper_t(self.instance, self.type) + + def visit_bool(self): + self.print_instance('%i') + + def visit_char(self): + #self.print_instance('%i') + self.print_instance('0x%x') + + def visit_unsigned_char(self): + #self.print_instance('%u') + self.print_instance('0x%x') + + def visit_signed_char(self): + #self.print_instance('%i') + self.print_instance('0x%x') + + def visit_wchar(self): + self.print_instance('0x%x') + + def visit_short_int(self): + #self.print_instance('%i') + self.print_instance('0x%x') + + def visit_short_unsigned_int(self): + #self.print_instance('%u') + self.print_instance('0x%x') + + def visit_int(self): + #self.print_instance('%i') + self.print_instance('0x%x') + + def visit_unsigned_int(self): + #self.print_instance('%u') + self.print_instance('0x%x') + + def visit_long_int(self): + #self.print_instance('%li') + self.print_instance('0x%lx') + + def visit_long_unsigned_int(self): + #self.print_instance('%lu') + self.print_instance('%0xlx') + + def visit_long_long_int(self): + #self.print_instance('%lli') + self.print_instance('%0xllx') + + def visit_long_long_unsigned_int(self): + #self.print_instance('%llu') + self.print_instance('0x%llx') + + def visit_float(self): + self.print_instance('%f') + + def visit_double(self): + self.print_instance('%f') + + def visit_array(self): + for i in range(type_traits.array_size(self.type)): + dump_type(self.stream, self.instance + '[%i]' % i, type_traits.base_type(self.type)) + + def visit_pointer(self): + self.print_instance('%p') + + def visit_declarated(self): + #stream.write('decl = %r\n' % self.type.decl_string) + decl = type_traits.remove_declarated(self.type) + dump_decl(self.stream, self.instance, decl) + + def print_instance(self, format): + self.stream.write(' debug_printf("\\t\\t%s = %s\\n", %s);\n' % (self.instance, format, "(*ptr)" + self.instance)) + + + +def dump_type(stream, instance, type_): + type_ = type_traits.remove_alias(type_) + visitor = type_dumper_t(stream, instance, type_) + algorithm.apply_visitor(visitor, type_) + + +def dump_struct_interface(stream, class_, suffix = ';'): + name = class_.name + assert name.startswith('brw_'); + name = name[:4] + 'dump_' + name[4:] + stream.write('void\n') + stream.write('%s(const struct %s *ptr)%s\n' % (name, class_.name, suffix)) + + +def dump_struct_implementation(stream, decls, class_): + dump_struct_interface(stream, class_, suffix = '') + stream.write('{\n') + dump_decl(stream, '', class_) + stream.write('}\n') + stream.write('\n') + + +def dump_header(stream): + stream.write(copyright.strip() + '\n') + stream.write('\n') + stream.write('/**\n') + stream.write(' * @file\n') + stream.write(' * Dump i965 data structures.\n') + stream.write(' *\n') + stream.write(' * Generated automatically from brw_structs.h by brw_structs_dump.py.\n') + stream.write(' */\n') + stream.write('\n') + + +def dump_interfaces(decls, global_ns, names): + stream = open('brw_structs_dump.h', 'wt') + + dump_header(stream) + + stream.write('#ifndef BRW_STRUCTS_DUMP_H\n') + stream.write('#define BRW_STRUCTS_DUMP_H\n') + stream.write('\n') + + for name in names: + stream.write('struct %s;\n' % (name,)) + stream.write('\n') + + for name in names: + (class_,) = global_ns.classes(name = name) + dump_struct_interface(stream, class_) + stream.write('\n') + stream.write('\n') + + stream.write('#endif /* BRW_STRUCTS_DUMP_H */\n') + + +def dump_implementations(decls, global_ns, names): + stream = open('brw_structs_dump.c', 'wt') + + dump_header(stream) + + stream.write('#include "util/u_debug.h"\n') + stream.write('\n') + stream.write('#include "brw_types.h"\n') + stream.write('#include "brw_structs.h"\n') + stream.write('#include "brw_structs_dump.h"\n') + stream.write('\n') + + for name in names: + (class_,) = global_ns.classes(name = name) + dump_struct_implementation(stream, decls, class_) + + +def decl_filter(decl): + '''Filter the declarations we're interested in''' + name = decl.name + return name.startswith('brw_') and name not in ('brw_instruction',) + + +def main(): + + config = parser.config_t( + include_paths = [ + '../../include', + ], + compiler = 'gcc', + ) + + headers = [ + 'brw_types.h', + 'brw_structs.h', + ] + + decls = parser.parse(headers, config, parser.COMPILATION_MODE.ALL_AT_ONCE) + global_ns = declarations.get_global_namespace(decls) + + names = [] + for class_ in global_ns.classes(decl_filter): + names.append(class_.name) + names.sort() + + dump_interfaces(decls, global_ns, names) + dump_implementations(decls, global_ns, names) + + +if __name__ == '__main__': + main() -- cgit v1.2.3 From 31b8b1dd36d9f07a7893a89ee985d83c4d0bb95b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 5 Nov 2009 12:44:36 +0000 Subject: i965g: Don't dump pads or dwords aliases. --- src/gallium/drivers/i965/brw_structs_dump.c | 264 --------------------------- src/gallium/drivers/i965/brw_structs_dump.py | 9 +- 2 files changed, 8 insertions(+), 265 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_structs_dump.c b/src/gallium/drivers/i965/brw_structs_dump.c index a8b96c6418..cd40fc6d61 100644 --- a/src/gallium/drivers/i965/brw_structs_dump.c +++ b/src/gallium/drivers/i965/brw_structs_dump.c @@ -43,12 +43,10 @@ brw_dump_3d_control(const struct brw_3d_control *ptr) { debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.notify_enable = 0x%x\n", (*ptr).header.notify_enable); - debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad); debug_printf("\t\t.header.wc_flush_enable = 0x%x\n", (*ptr).header.wc_flush_enable); debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable); debug_printf("\t\t.header.operation = 0x%x\n", (*ptr).header.operation); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); - debug_printf("\t\t.dest.pad = 0x%x\n", (*ptr).dest.pad); debug_printf("\t\t.dest.dest_addr_type = 0x%x\n", (*ptr).dest.dest_addr_type); debug_printf("\t\t.dest.dest_addr = 0x%x\n", (*ptr).dest.dest_addr); debug_printf("\t\t.dword2 = 0x%x\n", (*ptr).dword2); @@ -59,7 +57,6 @@ void brw_dump_3d_primitive(const struct brw_3d_primitive *ptr) { debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); - debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad); debug_printf("\t\t.header.topology = 0x%x\n", (*ptr).header.topology); debug_printf("\t\t.header.indexed = 0x%x\n", (*ptr).header.indexed); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); @@ -76,13 +73,9 @@ brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr) debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_scope); - debug_printf("\t\t.bits0.pad0 = 0x%x\n", (*ptr).bits0.pad0); debug_printf("\t\t.bits0.aa_coverage_bias = 0x%x\n", (*ptr).bits0.aa_coverage_bias); - debug_printf("\t\t.bits0.pad1 = 0x%x\n", (*ptr).bits0.pad1); debug_printf("\t\t.bits1.aa_coverage_endcap_slope = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_slope); - debug_printf("\t\t.bits1.pad0 = 0x%x\n", (*ptr).bits1.pad0); debug_printf("\t\t.bits1.aa_coverage_endcap_bias = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_bias); - debug_printf("\t\t.bits1.pad1 = 0x%x\n", (*ptr).bits1.pad1); } void @@ -111,13 +104,11 @@ brw_dump_blend_constant_color(const struct brw_blend_constant_color *ptr) void brw_dump_cc0(const struct brw_cc0 *ptr) { - debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); debug_printf("\t\t.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_pass_op); debug_printf("\t\t.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_fail_op); debug_printf("\t\t.bf_stencil_fail_op = 0x%x\n", (*ptr).bf_stencil_fail_op); debug_printf("\t\t.bf_stencil_func = 0x%x\n", (*ptr).bf_stencil_func); debug_printf("\t\t.bf_stencil_enable = 0x%x\n", (*ptr).bf_stencil_enable); - debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1); debug_printf("\t\t.stencil_write_enable = 0x%x\n", (*ptr).stencil_write_enable); debug_printf("\t\t.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).stencil_pass_depth_pass_op); debug_printf("\t\t.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).stencil_pass_depth_fail_op); @@ -139,7 +130,6 @@ void brw_dump_cc2(const struct brw_cc2 *ptr) { debug_printf("\t\t.logicop_enable = 0x%x\n", (*ptr).logicop_enable); - debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); debug_printf("\t\t.depth_write_enable = 0x%x\n", (*ptr).depth_write_enable); debug_printf("\t\t.depth_test_function = 0x%x\n", (*ptr).depth_test_function); debug_printf("\t\t.depth_test = 0x%x\n", (*ptr).depth_test); @@ -150,33 +140,27 @@ brw_dump_cc2(const struct brw_cc2 *ptr) void brw_dump_cc3(const struct brw_cc3 *ptr) { - debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); debug_printf("\t\t.alpha_test_func = 0x%x\n", (*ptr).alpha_test_func); debug_printf("\t\t.alpha_test = 0x%x\n", (*ptr).alpha_test); debug_printf("\t\t.blend_enable = 0x%x\n", (*ptr).blend_enable); debug_printf("\t\t.ia_blend_enable = 0x%x\n", (*ptr).ia_blend_enable); - debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1); debug_printf("\t\t.alpha_test_format = 0x%x\n", (*ptr).alpha_test_format); - debug_printf("\t\t.pad2 = 0x%x\n", (*ptr).pad2); } void brw_dump_cc4(const struct brw_cc4 *ptr) { - debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); debug_printf("\t\t.cc_viewport_state_offset = 0x%x\n", (*ptr).cc_viewport_state_offset); } void brw_dump_cc5(const struct brw_cc5 *ptr) { - debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); debug_printf("\t\t.ia_dest_blend_factor = 0x%x\n", (*ptr).ia_dest_blend_factor); debug_printf("\t\t.ia_src_blend_factor = 0x%x\n", (*ptr).ia_src_blend_factor); debug_printf("\t\t.ia_blend_function = 0x%x\n", (*ptr).ia_blend_function); debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable); debug_printf("\t\t.logicop_func = 0x%x\n", (*ptr).logicop_func); - debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1); debug_printf("\t\t.dither_enable = 0x%x\n", (*ptr).dither_enable); } @@ -186,7 +170,6 @@ brw_dump_cc6(const struct brw_cc6 *ptr) debug_printf("\t\t.clamp_post_alpha_blend = 0x%x\n", (*ptr).clamp_post_alpha_blend); debug_printf("\t\t.clamp_pre_alpha_blend = 0x%x\n", (*ptr).clamp_pre_alpha_blend); debug_printf("\t\t.clamp_range = 0x%x\n", (*ptr).clamp_range); - debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); debug_printf("\t\t.y_dither_offset = 0x%x\n", (*ptr).y_dither_offset); debug_printf("\t\t.x_dither_offset = 0x%x\n", (*ptr).x_dither_offset); debug_printf("\t\t.dest_blend_factor = 0x%x\n", (*ptr).dest_blend_factor); @@ -207,13 +190,11 @@ brw_dump_cc7(const struct brw_cc7 *ptr) void brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr) { - debug_printf("\t\t.cc0.pad0 = 0x%x\n", (*ptr).cc0.pad0); debug_printf("\t\t.cc0.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_pass_op); debug_printf("\t\t.cc0.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_fail_op); debug_printf("\t\t.cc0.bf_stencil_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_fail_op); debug_printf("\t\t.cc0.bf_stencil_func = 0x%x\n", (*ptr).cc0.bf_stencil_func); debug_printf("\t\t.cc0.bf_stencil_enable = 0x%x\n", (*ptr).cc0.bf_stencil_enable); - debug_printf("\t\t.cc0.pad1 = 0x%x\n", (*ptr).cc0.pad1); debug_printf("\t\t.cc0.stencil_write_enable = 0x%x\n", (*ptr).cc0.stencil_write_enable); debug_printf("\t\t.cc0.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_pass_op); debug_printf("\t\t.cc0.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_fail_op); @@ -225,34 +206,26 @@ brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr) debug_printf("\t\t.cc1.stencil_test_mask = 0x%x\n", (*ptr).cc1.stencil_test_mask); debug_printf("\t\t.cc1.stencil_ref = 0x%x\n", (*ptr).cc1.stencil_ref); debug_printf("\t\t.cc2.logicop_enable = 0x%x\n", (*ptr).cc2.logicop_enable); - debug_printf("\t\t.cc2.pad0 = 0x%x\n", (*ptr).cc2.pad0); debug_printf("\t\t.cc2.depth_write_enable = 0x%x\n", (*ptr).cc2.depth_write_enable); debug_printf("\t\t.cc2.depth_test_function = 0x%x\n", (*ptr).cc2.depth_test_function); debug_printf("\t\t.cc2.depth_test = 0x%x\n", (*ptr).cc2.depth_test); debug_printf("\t\t.cc2.bf_stencil_write_mask = 0x%x\n", (*ptr).cc2.bf_stencil_write_mask); debug_printf("\t\t.cc2.bf_stencil_test_mask = 0x%x\n", (*ptr).cc2.bf_stencil_test_mask); - debug_printf("\t\t.cc3.pad0 = 0x%x\n", (*ptr).cc3.pad0); debug_printf("\t\t.cc3.alpha_test_func = 0x%x\n", (*ptr).cc3.alpha_test_func); debug_printf("\t\t.cc3.alpha_test = 0x%x\n", (*ptr).cc3.alpha_test); debug_printf("\t\t.cc3.blend_enable = 0x%x\n", (*ptr).cc3.blend_enable); debug_printf("\t\t.cc3.ia_blend_enable = 0x%x\n", (*ptr).cc3.ia_blend_enable); - debug_printf("\t\t.cc3.pad1 = 0x%x\n", (*ptr).cc3.pad1); debug_printf("\t\t.cc3.alpha_test_format = 0x%x\n", (*ptr).cc3.alpha_test_format); - debug_printf("\t\t.cc3.pad2 = 0x%x\n", (*ptr).cc3.pad2); - debug_printf("\t\t.cc4.pad0 = 0x%x\n", (*ptr).cc4.pad0); debug_printf("\t\t.cc4.cc_viewport_state_offset = 0x%x\n", (*ptr).cc4.cc_viewport_state_offset); - debug_printf("\t\t.cc5.pad0 = 0x%x\n", (*ptr).cc5.pad0); debug_printf("\t\t.cc5.ia_dest_blend_factor = 0x%x\n", (*ptr).cc5.ia_dest_blend_factor); debug_printf("\t\t.cc5.ia_src_blend_factor = 0x%x\n", (*ptr).cc5.ia_src_blend_factor); debug_printf("\t\t.cc5.ia_blend_function = 0x%x\n", (*ptr).cc5.ia_blend_function); debug_printf("\t\t.cc5.statistics_enable = 0x%x\n", (*ptr).cc5.statistics_enable); debug_printf("\t\t.cc5.logicop_func = 0x%x\n", (*ptr).cc5.logicop_func); - debug_printf("\t\t.cc5.pad1 = 0x%x\n", (*ptr).cc5.pad1); debug_printf("\t\t.cc5.dither_enable = 0x%x\n", (*ptr).cc5.dither_enable); debug_printf("\t\t.cc6.clamp_post_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_post_alpha_blend); debug_printf("\t\t.cc6.clamp_pre_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_pre_alpha_blend); debug_printf("\t\t.cc6.clamp_range = 0x%x\n", (*ptr).cc6.clamp_range); - debug_printf("\t\t.cc6.pad0 = 0x%x\n", (*ptr).cc6.pad0); debug_printf("\t\t.cc6.y_dither_offset = 0x%x\n", (*ptr).cc6.y_dither_offset); debug_printf("\t\t.cc6.x_dither_offset = 0x%x\n", (*ptr).cc6.x_dither_offset); debug_printf("\t\t.cc6.dest_blend_factor = 0x%x\n", (*ptr).cc6.dest_blend_factor); @@ -275,44 +248,27 @@ brw_dump_cc_viewport(const struct brw_cc_viewport *ptr) void brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr) { - debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0); debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); - debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1); debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); - debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0); debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); - debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1); debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); - debug_printf("\t\t.thread1.pad2 = 0x%x\n", (*ptr).thread1.pad2); debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); - debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3); debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); - debug_printf("\t\t.thread1.pad4 = 0x%x\n", (*ptr).thread1.pad4); debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); - debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0); debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); - debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0); debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); - debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1); debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); - debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2); debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); - debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3); - debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0); debug_printf("\t\t.thread4.gs_output_stats = 0x%x\n", (*ptr).thread4.gs_output_stats); debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable); debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries); - debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1); debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size); - debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2); debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads); - debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3); - debug_printf("\t\t.clip5.pad0 = 0x%x\n", (*ptr).clip5.pad0); debug_printf("\t\t.clip5.clip_mode = 0x%x\n", (*ptr).clip5.clip_mode); debug_printf("\t\t.clip5.userclip_enable_flags = 0x%x\n", (*ptr).clip5.userclip_enable_flags); debug_printf("\t\t.clip5.userclip_must_clip = 0x%x\n", (*ptr).clip5.userclip_must_clip); @@ -322,8 +278,6 @@ brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr) debug_printf("\t\t.clip5.viewport_xy_clip_enable = 0x%x\n", (*ptr).clip5.viewport_xy_clip_enable); debug_printf("\t\t.clip5.vertex_position_space = 0x%x\n", (*ptr).clip5.vertex_position_space); debug_printf("\t\t.clip5.api_mode = 0x%x\n", (*ptr).clip5.api_mode); - debug_printf("\t\t.clip5.pad2 = 0x%x\n", (*ptr).clip5.pad2); - debug_printf("\t\t.clip6.pad0 = 0x%x\n", (*ptr).clip6.pad0); debug_printf("\t\t.clip6.clipper_viewport_state_ptr = 0x%x\n", (*ptr).clip6.clipper_viewport_state_ptr); debug_printf("\t\t.viewport_xmin = %f\n", (*ptr).viewport_xmin); debug_printf("\t\t.viewport_xmax = %f\n", (*ptr).viewport_xmax); @@ -345,7 +299,6 @@ brw_dump_constant_buffer(const struct brw_constant_buffer *ptr) { debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.valid = 0x%x\n", (*ptr).header.valid); - debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); debug_printf("\t\t.bits0.buffer_length = 0x%x\n", (*ptr).bits0.buffer_length); debug_printf("\t\t.bits0.buffer_address = 0x%x\n", (*ptr).bits0.buffer_address); @@ -357,9 +310,7 @@ brw_dump_cs_urb_state(const struct brw_cs_urb_state *ptr) debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); debug_printf("\t\t.bits0.nr_urb_entries = 0x%x\n", (*ptr).bits0.nr_urb_entries); - debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); debug_printf("\t\t.bits0.urb_entry_size = 0x%x\n", (*ptr).bits0.urb_entry_size); - debug_printf("\t\t.bits0.pad0 = 0x%x\n", (*ptr).bits0.pad0); } void @@ -367,28 +318,20 @@ brw_dump_depthbuffer(const struct brw_depthbuffer *ptr) { debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length); debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode); - debug_printf("\t\t.header.dword = 0x%x\n", (*ptr).header.dword); debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch); debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format); - debug_printf("\t\t.dword1.bits.pad = 0x%x\n", (*ptr).dword1.bits.pad); debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode); debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable); debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk); debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface); - debug_printf("\t\t.dword1.bits.pad2 = 0x%x\n", (*ptr).dword1.bits.pad2); debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type); - debug_printf("\t\t.dword1.dword = 0x%x\n", (*ptr).dword1.dword); debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr); - debug_printf("\t\t.dword3.bits.pad = 0x%x\n", (*ptr).dword3.bits.pad); debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout); debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod); debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width); debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height); - debug_printf("\t\t.dword3.dword = 0x%x\n", (*ptr).dword3.dword); - debug_printf("\t\t.dword4.bits.pad = 0x%x\n", (*ptr).dword4.bits.pad); debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element); debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth); - debug_printf("\t\t.dword4.dword = 0x%x\n", (*ptr).dword4.dword); } void @@ -396,31 +339,22 @@ brw_dump_depthbuffer_g4x(const struct brw_depthbuffer_g4x *ptr) { debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length); debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode); - debug_printf("\t\t.header.dword = 0x%x\n", (*ptr).header.dword); debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch); debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format); - debug_printf("\t\t.dword1.bits.pad = 0x%x\n", (*ptr).dword1.bits.pad); debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode); debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable); debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk); debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface); - debug_printf("\t\t.dword1.bits.pad2 = 0x%x\n", (*ptr).dword1.bits.pad2); debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type); - debug_printf("\t\t.dword1.dword = 0x%x\n", (*ptr).dword1.dword); debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr); - debug_printf("\t\t.dword3.bits.pad = 0x%x\n", (*ptr).dword3.bits.pad); debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout); debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod); debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width); debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height); - debug_printf("\t\t.dword3.dword = 0x%x\n", (*ptr).dword3.dword); - debug_printf("\t\t.dword4.bits.pad = 0x%x\n", (*ptr).dword4.bits.pad); debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element); debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth); - debug_printf("\t\t.dword4.dword = 0x%x\n", (*ptr).dword4.dword); debug_printf("\t\t.dword5.bits.xoffset = 0x%x\n", (*ptr).dword5.bits.xoffset); debug_printf("\t\t.dword5.bits.yoffset = 0x%x\n", (*ptr).dword5.bits.yoffset); - debug_printf("\t\t.dword5.dword = 0x%x\n", (*ptr).dword5.dword); } void @@ -447,57 +381,38 @@ brw_dump_global_depth_offset_clamp(const struct brw_global_depth_offset_clamp *p void brw_dump_gs_unit_state(const struct brw_gs_unit_state *ptr) { - debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0); debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); - debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1); debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable); debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable); debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); - debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0); debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset); - debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1); debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); - debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3); debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); - debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0); debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); - debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0); debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); - debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1); debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); - debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2); debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); - debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3); - debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0); debug_printf("\t\t.thread4.rendering_enable = 0x%x\n", (*ptr).thread4.rendering_enable); - debug_printf("\t\t.thread4.pad4 = 0x%x\n", (*ptr).thread4.pad4); debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable); debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries); - debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1); debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size); - debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2); debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads); - debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3); debug_printf("\t\t.gs5.sampler_count = 0x%x\n", (*ptr).gs5.sampler_count); - debug_printf("\t\t.gs5.pad0 = 0x%x\n", (*ptr).gs5.pad0); debug_printf("\t\t.gs5.sampler_state_pointer = 0x%x\n", (*ptr).gs5.sampler_state_pointer); debug_printf("\t\t.gs6.max_vp_index = 0x%x\n", (*ptr).gs6.max_vp_index); - debug_printf("\t\t.gs6.pad0 = 0x%x\n", (*ptr).gs6.pad0); debug_printf("\t\t.gs6.svbi_post_inc_value = 0x%x\n", (*ptr).gs6.svbi_post_inc_value); - debug_printf("\t\t.gs6.pad1 = 0x%x\n", (*ptr).gs6.pad1); debug_printf("\t\t.gs6.svbi_post_inc_enable = 0x%x\n", (*ptr).gs6.svbi_post_inc_enable); debug_printf("\t\t.gs6.svbi_payload = 0x%x\n", (*ptr).gs6.svbi_payload); debug_printf("\t\t.gs6.discard_adjaceny = 0x%x\n", (*ptr).gs6.discard_adjaceny); debug_printf("\t\t.gs6.reorder_enable = 0x%x\n", (*ptr).gs6.reorder_enable); - debug_printf("\t\t.gs6.pad2 = 0x%x\n", (*ptr).gs6.pad2); } void @@ -506,9 +421,7 @@ brw_dump_indexbuffer(const struct brw_indexbuffer *ptr) debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length); debug_printf("\t\t.header.bits.index_format = 0x%x\n", (*ptr).header.bits.index_format); debug_printf("\t\t.header.bits.cut_index_enable = 0x%x\n", (*ptr).header.bits.cut_index_enable); - debug_printf("\t\t.header.bits.pad = 0x%x\n", (*ptr).header.bits.pad); debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode); - debug_printf("\t\t.header.dword = 0x%x\n", (*ptr).header.dword); debug_printf("\t\t.buffer_start = 0x%x\n", (*ptr).buffer_start); debug_printf("\t\t.buffer_end = 0x%x\n", (*ptr).buffer_end); } @@ -519,9 +432,7 @@ brw_dump_line_stipple(const struct brw_line_stipple *ptr) debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); debug_printf("\t\t.bits0.pattern = 0x%x\n", (*ptr).bits0.pattern); - debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); debug_printf("\t\t.bits1.repeat_count = 0x%x\n", (*ptr).bits1.repeat_count); - debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad); debug_printf("\t\t.bits1.inverse_repeat_count = 0x%x\n", (*ptr).bits1.inverse_repeat_count); } @@ -529,7 +440,6 @@ void brw_dump_mi_flush(const struct brw_mi_flush *ptr) { debug_printf("\t\t.flags = 0x%x\n", (*ptr).flags); - debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode); } @@ -545,7 +455,6 @@ brw_dump_pipe_control(const struct brw_pipe_control *ptr) debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable); debug_printf("\t\t.header.post_sync_operation = 0x%x\n", (*ptr).header.post_sync_operation); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); - debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad); debug_printf("\t\t.bits1.dest_addr_type = 0x%x\n", (*ptr).bits1.dest_addr_type); debug_printf("\t\t.bits1.dest_addr = 0x%x\n", (*ptr).bits1.dest_addr); debug_printf("\t\t.data0 = 0x%x\n", (*ptr).data0); @@ -556,7 +465,6 @@ void brw_dump_pipeline_select(const struct brw_pipeline_select *ptr) { debug_printf("\t\t.header.pipeline_select = 0x%x\n", (*ptr).header.pipeline_select); - debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); } @@ -565,19 +473,13 @@ brw_dump_pipelined_state_pointers(const struct brw_pipelined_state_pointers *ptr { debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); - debug_printf("\t\t.vs.pad = 0x%x\n", (*ptr).vs.pad); debug_printf("\t\t.vs.offset = 0x%x\n", (*ptr).vs.offset); debug_printf("\t\t.gs.enable = 0x%x\n", (*ptr).gs.enable); - debug_printf("\t\t.gs.pad = 0x%x\n", (*ptr).gs.pad); debug_printf("\t\t.gs.offset = 0x%x\n", (*ptr).gs.offset); debug_printf("\t\t.clp.enable = 0x%x\n", (*ptr).clp.enable); - debug_printf("\t\t.clp.pad = 0x%x\n", (*ptr).clp.pad); debug_printf("\t\t.clp.offset = 0x%x\n", (*ptr).clp.offset); - debug_printf("\t\t.sf.pad = 0x%x\n", (*ptr).sf.pad); debug_printf("\t\t.sf.offset = 0x%x\n", (*ptr).sf.offset); - debug_printf("\t\t.wm.pad = 0x%x\n", (*ptr).wm.pad); debug_printf("\t\t.wm.offset = 0x%x\n", (*ptr).wm.offset); - debug_printf("\t\t.cc.pad = 0x%x\n", (*ptr).cc.pad); debug_printf("\t\t.cc.offset = 0x%x\n", (*ptr).cc.offset); } @@ -626,9 +528,7 @@ brw_dump_polygon_stipple_offset(const struct brw_polygon_stipple_offset *ptr) debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); debug_printf("\t\t.bits0.y_offset = 0x%x\n", (*ptr).bits0.y_offset); - debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); debug_printf("\t\t.bits0.x_offset = 0x%x\n", (*ptr).bits0.x_offset); - debug_printf("\t\t.bits0.pad0 = 0x%x\n", (*ptr).bits0.pad0); } void @@ -649,20 +549,15 @@ brw_dump_sampler_state(const struct brw_sampler_state *ptr) debug_printf("\t\t.ss0.mag_filter = 0x%x\n", (*ptr).ss0.mag_filter); debug_printf("\t\t.ss0.mip_filter = 0x%x\n", (*ptr).ss0.mip_filter); debug_printf("\t\t.ss0.base_level = 0x%x\n", (*ptr).ss0.base_level); - debug_printf("\t\t.ss0.pad = 0x%x\n", (*ptr).ss0.pad); debug_printf("\t\t.ss0.lod_preclamp = 0x%x\n", (*ptr).ss0.lod_preclamp); debug_printf("\t\t.ss0.default_color_mode = 0x%x\n", (*ptr).ss0.default_color_mode); - debug_printf("\t\t.ss0.pad0 = 0x%x\n", (*ptr).ss0.pad0); debug_printf("\t\t.ss0.disable = 0x%x\n", (*ptr).ss0.disable); debug_printf("\t\t.ss1.r_wrap_mode = 0x%x\n", (*ptr).ss1.r_wrap_mode); debug_printf("\t\t.ss1.t_wrap_mode = 0x%x\n", (*ptr).ss1.t_wrap_mode); debug_printf("\t\t.ss1.s_wrap_mode = 0x%x\n", (*ptr).ss1.s_wrap_mode); - debug_printf("\t\t.ss1.pad = 0x%x\n", (*ptr).ss1.pad); debug_printf("\t\t.ss1.max_lod = 0x%x\n", (*ptr).ss1.max_lod); debug_printf("\t\t.ss1.min_lod = 0x%x\n", (*ptr).ss1.min_lod); - debug_printf("\t\t.ss2.pad = 0x%x\n", (*ptr).ss2.pad); debug_printf("\t\t.ss2.default_color_pointer = 0x%x\n", (*ptr).ss2.default_color_pointer); - debug_printf("\t\t.ss3.pad = 0x%x\n", (*ptr).ss3.pad); debug_printf("\t\t.ss3.max_aniso = 0x%x\n", (*ptr).ss3.max_aniso); debug_printf("\t\t.ss3.chroma_key_mode = 0x%x\n", (*ptr).ss3.chroma_key_mode); debug_printf("\t\t.ss3.chroma_key_index = 0x%x\n", (*ptr).ss3.chroma_key_index); @@ -674,48 +569,32 @@ brw_dump_sampler_state(const struct brw_sampler_state *ptr) void brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr) { - debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0); debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); - debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1); debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable); debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable); debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); - debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0); debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset); - debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1); debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); - debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3); debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); - debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0); debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); - debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0); debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); - debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1); debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); - debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2); debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); - debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3); - debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0); debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable); debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries); - debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1); debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size); - debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2); debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads); - debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3); debug_printf("\t\t.sf5.front_winding = 0x%x\n", (*ptr).sf5.front_winding); debug_printf("\t\t.sf5.viewport_transform = 0x%x\n", (*ptr).sf5.viewport_transform); - debug_printf("\t\t.sf5.pad0 = 0x%x\n", (*ptr).sf5.pad0); debug_printf("\t\t.sf5.sf_viewport_state_offset = 0x%x\n", (*ptr).sf5.sf_viewport_state_offset); - debug_printf("\t\t.sf6.pad0 = 0x%x\n", (*ptr).sf6.pad0); debug_printf("\t\t.sf6.dest_org_vbias = 0x%x\n", (*ptr).sf6.dest_org_vbias); debug_printf("\t\t.sf6.dest_org_hbias = 0x%x\n", (*ptr).sf6.dest_org_hbias); debug_printf("\t\t.sf6.scissor = 0x%x\n", (*ptr).sf6.scissor); @@ -731,7 +610,6 @@ brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr) debug_printf("\t\t.sf7.use_point_size_state = 0x%x\n", (*ptr).sf7.use_point_size_state); debug_printf("\t\t.sf7.subpixel_precision = 0x%x\n", (*ptr).sf7.subpixel_precision); debug_printf("\t\t.sf7.sprite_point = 0x%x\n", (*ptr).sf7.sprite_point); - debug_printf("\t\t.sf7.pad0 = 0x%x\n", (*ptr).sf7.pad0); debug_printf("\t\t.sf7.aa_line_distance_mode = 0x%x\n", (*ptr).sf7.aa_line_distance_mode); debug_printf("\t\t.sf7.trifan_pv = 0x%x\n", (*ptr).sf7.trifan_pv); debug_printf("\t\t.sf7.linestrip_pv = 0x%x\n", (*ptr).sf7.linestrip_pv); @@ -763,10 +641,8 @@ brw_dump_ss0(const struct brw_ss0 *ptr) debug_printf("\t\t.mag_filter = 0x%x\n", (*ptr).mag_filter); debug_printf("\t\t.mip_filter = 0x%x\n", (*ptr).mip_filter); debug_printf("\t\t.base_level = 0x%x\n", (*ptr).base_level); - debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); debug_printf("\t\t.lod_preclamp = 0x%x\n", (*ptr).lod_preclamp); debug_printf("\t\t.default_color_mode = 0x%x\n", (*ptr).default_color_mode); - debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); debug_printf("\t\t.disable = 0x%x\n", (*ptr).disable); } @@ -776,7 +652,6 @@ brw_dump_ss1(const struct brw_ss1 *ptr) debug_printf("\t\t.r_wrap_mode = 0x%x\n", (*ptr).r_wrap_mode); debug_printf("\t\t.t_wrap_mode = 0x%x\n", (*ptr).t_wrap_mode); debug_printf("\t\t.s_wrap_mode = 0x%x\n", (*ptr).s_wrap_mode); - debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); debug_printf("\t\t.max_lod = 0x%x\n", (*ptr).max_lod); debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod); } @@ -784,14 +659,12 @@ brw_dump_ss1(const struct brw_ss1 *ptr) void brw_dump_ss2(const struct brw_ss2 *ptr) { - debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); debug_printf("\t\t.default_color_pointer = 0x%x\n", (*ptr).default_color_pointer); } void brw_dump_ss3(const struct brw_ss3 *ptr) { - debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); debug_printf("\t\t.max_aniso = 0x%x\n", (*ptr).max_aniso); debug_printf("\t\t.chroma_key_mode = 0x%x\n", (*ptr).chroma_key_mode); debug_printf("\t\t.chroma_key_index = 0x%x\n", (*ptr).chroma_key_index); @@ -806,19 +679,14 @@ brw_dump_state_base_address(const struct brw_state_base_address *ptr) debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); debug_printf("\t\t.bits0.modify_enable = 0x%x\n", (*ptr).bits0.modify_enable); - debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); debug_printf("\t\t.bits0.general_state_address = 0x%x\n", (*ptr).bits0.general_state_address); debug_printf("\t\t.bits1.modify_enable = 0x%x\n", (*ptr).bits1.modify_enable); - debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad); debug_printf("\t\t.bits1.surface_state_address = 0x%x\n", (*ptr).bits1.surface_state_address); debug_printf("\t\t.bits2.modify_enable = 0x%x\n", (*ptr).bits2.modify_enable); - debug_printf("\t\t.bits2.pad = 0x%x\n", (*ptr).bits2.pad); debug_printf("\t\t.bits2.indirect_object_state_address = 0x%x\n", (*ptr).bits2.indirect_object_state_address); debug_printf("\t\t.bits3.modify_enable = 0x%x\n", (*ptr).bits3.modify_enable); - debug_printf("\t\t.bits3.pad = 0x%x\n", (*ptr).bits3.pad); debug_printf("\t\t.bits3.general_state_upper_bound = 0x%x\n", (*ptr).bits3.general_state_upper_bound); debug_printf("\t\t.bits4.modify_enable = 0x%x\n", (*ptr).bits4.modify_enable); - debug_printf("\t\t.bits4.pad = 0x%x\n", (*ptr).bits4.pad); debug_printf("\t\t.bits4.indirect_object_state_upper_bound = 0x%x\n", (*ptr).bits4.indirect_object_state_upper_bound); } @@ -828,7 +696,6 @@ brw_dump_state_prefetch(const struct brw_state_prefetch *ptr) debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); debug_printf("\t\t.bits0.prefetch_count = 0x%x\n", (*ptr).bits0.prefetch_count); - debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); debug_printf("\t\t.bits0.prefetch_pointer = 0x%x\n", (*ptr).bits0.prefetch_pointer); } @@ -841,7 +708,6 @@ brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr) debug_printf("\t\t.cube_neg_y = 0x%x\n", (*ptr).cube_neg_y); debug_printf("\t\t.cube_pos_x = 0x%x\n", (*ptr).cube_pos_x); debug_printf("\t\t.cube_neg_x = 0x%x\n", (*ptr).cube_neg_x); - debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); debug_printf("\t\t.mipmap_layout_mode = 0x%x\n", (*ptr).mipmap_layout_mode); debug_printf("\t\t.vert_line_stride_ofs = 0x%x\n", (*ptr).vert_line_stride_ofs); debug_printf("\t\t.vert_line_stride = 0x%x\n", (*ptr).vert_line_stride); @@ -852,7 +718,6 @@ brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr) debug_printf("\t\t.writedisable_alpha = 0x%x\n", (*ptr).writedisable_alpha); debug_printf("\t\t.surface_format = 0x%x\n", (*ptr).surface_format); debug_printf("\t\t.data_return_format = 0x%x\n", (*ptr).data_return_format); - debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); debug_printf("\t\t.surface_type = 0x%x\n", (*ptr).surface_type); } @@ -865,7 +730,6 @@ brw_dump_surf_ss1(const struct brw_surf_ss1 *ptr) void brw_dump_surf_ss2(const struct brw_surf_ss2 *ptr) { - debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); debug_printf("\t\t.mip_count = 0x%x\n", (*ptr).mip_count); debug_printf("\t\t.width = 0x%x\n", (*ptr).width); debug_printf("\t\t.height = 0x%x\n", (*ptr).height); @@ -876,7 +740,6 @@ brw_dump_surf_ss3(const struct brw_surf_ss3 *ptr) { debug_printf("\t\t.tile_walk = 0x%x\n", (*ptr).tile_walk); debug_printf("\t\t.tiled_surface = 0x%x\n", (*ptr).tiled_surface); - debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); debug_printf("\t\t.pitch = 0x%x\n", (*ptr).pitch); debug_printf("\t\t.depth = 0x%x\n", (*ptr).depth); } @@ -885,9 +748,7 @@ void brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr) { debug_printf("\t\t.multisample_position_palette_index = 0x%x\n", (*ptr).multisample_position_palette_index); - debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1); debug_printf("\t\t.num_multisamples = 0x%x\n", (*ptr).num_multisamples); - debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); debug_printf("\t\t.render_target_view_extent = 0x%x\n", (*ptr).render_target_view_extent); debug_printf("\t\t.min_array_elt = 0x%x\n", (*ptr).min_array_elt); debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod); @@ -896,13 +757,11 @@ brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr) void brw_dump_surf_ss5(const struct brw_surf_ss5 *ptr) { - debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1); debug_printf("\t\t.llc_mapping = 0x%x\n", (*ptr).llc_mapping); debug_printf("\t\t.mlc_mapping = 0x%x\n", (*ptr).mlc_mapping); debug_printf("\t\t.gfdt = 0x%x\n", (*ptr).gfdt); debug_printf("\t\t.gfdt_src = 0x%x\n", (*ptr).gfdt_src); debug_printf("\t\t.y_offset = 0x%x\n", (*ptr).y_offset); - debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0); debug_printf("\t\t.x_offset = 0x%x\n", (*ptr).x_offset); } @@ -915,7 +774,6 @@ brw_dump_surface_state(const struct brw_surface_state *ptr) debug_printf("\t\t.ss0.cube_neg_y = 0x%x\n", (*ptr).ss0.cube_neg_y); debug_printf("\t\t.ss0.cube_pos_x = 0x%x\n", (*ptr).ss0.cube_pos_x); debug_printf("\t\t.ss0.cube_neg_x = 0x%x\n", (*ptr).ss0.cube_neg_x); - debug_printf("\t\t.ss0.pad = 0x%x\n", (*ptr).ss0.pad); debug_printf("\t\t.ss0.mipmap_layout_mode = 0x%x\n", (*ptr).ss0.mipmap_layout_mode); debug_printf("\t\t.ss0.vert_line_stride_ofs = 0x%x\n", (*ptr).ss0.vert_line_stride_ofs); debug_printf("\t\t.ss0.vert_line_stride = 0x%x\n", (*ptr).ss0.vert_line_stride); @@ -926,32 +784,25 @@ brw_dump_surface_state(const struct brw_surface_state *ptr) debug_printf("\t\t.ss0.writedisable_alpha = 0x%x\n", (*ptr).ss0.writedisable_alpha); debug_printf("\t\t.ss0.surface_format = 0x%x\n", (*ptr).ss0.surface_format); debug_printf("\t\t.ss0.data_return_format = 0x%x\n", (*ptr).ss0.data_return_format); - debug_printf("\t\t.ss0.pad0 = 0x%x\n", (*ptr).ss0.pad0); debug_printf("\t\t.ss0.surface_type = 0x%x\n", (*ptr).ss0.surface_type); debug_printf("\t\t.ss1.base_addr = 0x%x\n", (*ptr).ss1.base_addr); - debug_printf("\t\t.ss2.pad = 0x%x\n", (*ptr).ss2.pad); debug_printf("\t\t.ss2.mip_count = 0x%x\n", (*ptr).ss2.mip_count); debug_printf("\t\t.ss2.width = 0x%x\n", (*ptr).ss2.width); debug_printf("\t\t.ss2.height = 0x%x\n", (*ptr).ss2.height); debug_printf("\t\t.ss3.tile_walk = 0x%x\n", (*ptr).ss3.tile_walk); debug_printf("\t\t.ss3.tiled_surface = 0x%x\n", (*ptr).ss3.tiled_surface); - debug_printf("\t\t.ss3.pad = 0x%x\n", (*ptr).ss3.pad); debug_printf("\t\t.ss3.pitch = 0x%x\n", (*ptr).ss3.pitch); debug_printf("\t\t.ss3.depth = 0x%x\n", (*ptr).ss3.depth); debug_printf("\t\t.ss4.multisample_position_palette_index = 0x%x\n", (*ptr).ss4.multisample_position_palette_index); - debug_printf("\t\t.ss4.pad1 = 0x%x\n", (*ptr).ss4.pad1); debug_printf("\t\t.ss4.num_multisamples = 0x%x\n", (*ptr).ss4.num_multisamples); - debug_printf("\t\t.ss4.pad0 = 0x%x\n", (*ptr).ss4.pad0); debug_printf("\t\t.ss4.render_target_view_extent = 0x%x\n", (*ptr).ss4.render_target_view_extent); debug_printf("\t\t.ss4.min_array_elt = 0x%x\n", (*ptr).ss4.min_array_elt); debug_printf("\t\t.ss4.min_lod = 0x%x\n", (*ptr).ss4.min_lod); - debug_printf("\t\t.ss5.pad1 = 0x%x\n", (*ptr).ss5.pad1); debug_printf("\t\t.ss5.llc_mapping = 0x%x\n", (*ptr).ss5.llc_mapping); debug_printf("\t\t.ss5.mlc_mapping = 0x%x\n", (*ptr).ss5.mlc_mapping); debug_printf("\t\t.ss5.gfdt = 0x%x\n", (*ptr).ss5.gfdt); debug_printf("\t\t.ss5.gfdt_src = 0x%x\n", (*ptr).ss5.gfdt_src); debug_printf("\t\t.ss5.y_offset = 0x%x\n", (*ptr).ss5.y_offset); - debug_printf("\t\t.ss5.pad0 = 0x%x\n", (*ptr).ss5.pad0); debug_printf("\t\t.ss5.x_offset = 0x%x\n", (*ptr).ss5.x_offset); } @@ -960,7 +811,6 @@ brw_dump_system_instruction_pointer(const struct brw_system_instruction_pointer { debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); - debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); debug_printf("\t\t.bits0.system_instruction_pointer = 0x%x\n", (*ptr).bits0.system_instruction_pointer); } @@ -974,16 +824,13 @@ brw_dump_urb_fence(const struct brw_urb_fence *ptr) debug_printf("\t\t.header.sf_realloc = 0x%x\n", (*ptr).header.sf_realloc); debug_printf("\t\t.header.vfe_realloc = 0x%x\n", (*ptr).header.vfe_realloc); debug_printf("\t\t.header.cs_realloc = 0x%x\n", (*ptr).header.cs_realloc); - debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); debug_printf("\t\t.bits0.vs_fence = 0x%x\n", (*ptr).bits0.vs_fence); debug_printf("\t\t.bits0.gs_fence = 0x%x\n", (*ptr).bits0.gs_fence); debug_printf("\t\t.bits0.clp_fence = 0x%x\n", (*ptr).bits0.clp_fence); - debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad); debug_printf("\t\t.bits1.sf_fence = 0x%x\n", (*ptr).bits1.sf_fence); debug_printf("\t\t.bits1.vf_fence = 0x%x\n", (*ptr).bits1.vf_fence); debug_printf("\t\t.bits1.cs_fence = 0x%x\n", (*ptr).bits1.cs_fence); - debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad); } void @@ -992,14 +839,12 @@ brw_dump_urb_immediate(const struct brw_urb_immediate *ptr) debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode); debug_printf("\t\t.offset = 0x%x\n", (*ptr).offset); debug_printf("\t\t.swizzle_control = 0x%x\n", (*ptr).swizzle_control); - debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); debug_printf("\t\t.allocate = 0x%x\n", (*ptr).allocate); debug_printf("\t\t.used = 0x%x\n", (*ptr).used); debug_printf("\t\t.complete = 0x%x\n", (*ptr).complete); debug_printf("\t\t.response_length = 0x%x\n", (*ptr).response_length); debug_printf("\t\t.msg_length = 0x%x\n", (*ptr).msg_length); debug_printf("\t\t.msg_target = 0x%x\n", (*ptr).msg_target); - debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1); debug_printf("\t\t.end_of_thread = 0x%x\n", (*ptr).end_of_thread); } @@ -1009,119 +854,102 @@ brw_dump_vb_array_state(const struct brw_vb_array_state *ptr) debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); debug_printf("\t\t.vb[0].vb0.pitch = 0x%x\n", (*ptr).vb[0].vb0.pitch); - debug_printf("\t\t.vb[0].vb0.pad = 0x%x\n", (*ptr).vb[0].vb0.pad); debug_printf("\t\t.vb[0].vb0.access_type = 0x%x\n", (*ptr).vb[0].vb0.access_type); debug_printf("\t\t.vb[0].vb0.vb_index = 0x%x\n", (*ptr).vb[0].vb0.vb_index); debug_printf("\t\t.vb[0].start_addr = 0x%x\n", (*ptr).vb[0].start_addr); debug_printf("\t\t.vb[0].max_index = 0x%x\n", (*ptr).vb[0].max_index); debug_printf("\t\t.vb[0].instance_data_step_rate = 0x%x\n", (*ptr).vb[0].instance_data_step_rate); debug_printf("\t\t.vb[1].vb0.pitch = 0x%x\n", (*ptr).vb[1].vb0.pitch); - debug_printf("\t\t.vb[1].vb0.pad = 0x%x\n", (*ptr).vb[1].vb0.pad); debug_printf("\t\t.vb[1].vb0.access_type = 0x%x\n", (*ptr).vb[1].vb0.access_type); debug_printf("\t\t.vb[1].vb0.vb_index = 0x%x\n", (*ptr).vb[1].vb0.vb_index); debug_printf("\t\t.vb[1].start_addr = 0x%x\n", (*ptr).vb[1].start_addr); debug_printf("\t\t.vb[1].max_index = 0x%x\n", (*ptr).vb[1].max_index); debug_printf("\t\t.vb[1].instance_data_step_rate = 0x%x\n", (*ptr).vb[1].instance_data_step_rate); debug_printf("\t\t.vb[2].vb0.pitch = 0x%x\n", (*ptr).vb[2].vb0.pitch); - debug_printf("\t\t.vb[2].vb0.pad = 0x%x\n", (*ptr).vb[2].vb0.pad); debug_printf("\t\t.vb[2].vb0.access_type = 0x%x\n", (*ptr).vb[2].vb0.access_type); debug_printf("\t\t.vb[2].vb0.vb_index = 0x%x\n", (*ptr).vb[2].vb0.vb_index); debug_printf("\t\t.vb[2].start_addr = 0x%x\n", (*ptr).vb[2].start_addr); debug_printf("\t\t.vb[2].max_index = 0x%x\n", (*ptr).vb[2].max_index); debug_printf("\t\t.vb[2].instance_data_step_rate = 0x%x\n", (*ptr).vb[2].instance_data_step_rate); debug_printf("\t\t.vb[3].vb0.pitch = 0x%x\n", (*ptr).vb[3].vb0.pitch); - debug_printf("\t\t.vb[3].vb0.pad = 0x%x\n", (*ptr).vb[3].vb0.pad); debug_printf("\t\t.vb[3].vb0.access_type = 0x%x\n", (*ptr).vb[3].vb0.access_type); debug_printf("\t\t.vb[3].vb0.vb_index = 0x%x\n", (*ptr).vb[3].vb0.vb_index); debug_printf("\t\t.vb[3].start_addr = 0x%x\n", (*ptr).vb[3].start_addr); debug_printf("\t\t.vb[3].max_index = 0x%x\n", (*ptr).vb[3].max_index); debug_printf("\t\t.vb[3].instance_data_step_rate = 0x%x\n", (*ptr).vb[3].instance_data_step_rate); debug_printf("\t\t.vb[4].vb0.pitch = 0x%x\n", (*ptr).vb[4].vb0.pitch); - debug_printf("\t\t.vb[4].vb0.pad = 0x%x\n", (*ptr).vb[4].vb0.pad); debug_printf("\t\t.vb[4].vb0.access_type = 0x%x\n", (*ptr).vb[4].vb0.access_type); debug_printf("\t\t.vb[4].vb0.vb_index = 0x%x\n", (*ptr).vb[4].vb0.vb_index); debug_printf("\t\t.vb[4].start_addr = 0x%x\n", (*ptr).vb[4].start_addr); debug_printf("\t\t.vb[4].max_index = 0x%x\n", (*ptr).vb[4].max_index); debug_printf("\t\t.vb[4].instance_data_step_rate = 0x%x\n", (*ptr).vb[4].instance_data_step_rate); debug_printf("\t\t.vb[5].vb0.pitch = 0x%x\n", (*ptr).vb[5].vb0.pitch); - debug_printf("\t\t.vb[5].vb0.pad = 0x%x\n", (*ptr).vb[5].vb0.pad); debug_printf("\t\t.vb[5].vb0.access_type = 0x%x\n", (*ptr).vb[5].vb0.access_type); debug_printf("\t\t.vb[5].vb0.vb_index = 0x%x\n", (*ptr).vb[5].vb0.vb_index); debug_printf("\t\t.vb[5].start_addr = 0x%x\n", (*ptr).vb[5].start_addr); debug_printf("\t\t.vb[5].max_index = 0x%x\n", (*ptr).vb[5].max_index); debug_printf("\t\t.vb[5].instance_data_step_rate = 0x%x\n", (*ptr).vb[5].instance_data_step_rate); debug_printf("\t\t.vb[6].vb0.pitch = 0x%x\n", (*ptr).vb[6].vb0.pitch); - debug_printf("\t\t.vb[6].vb0.pad = 0x%x\n", (*ptr).vb[6].vb0.pad); debug_printf("\t\t.vb[6].vb0.access_type = 0x%x\n", (*ptr).vb[6].vb0.access_type); debug_printf("\t\t.vb[6].vb0.vb_index = 0x%x\n", (*ptr).vb[6].vb0.vb_index); debug_printf("\t\t.vb[6].start_addr = 0x%x\n", (*ptr).vb[6].start_addr); debug_printf("\t\t.vb[6].max_index = 0x%x\n", (*ptr).vb[6].max_index); debug_printf("\t\t.vb[6].instance_data_step_rate = 0x%x\n", (*ptr).vb[6].instance_data_step_rate); debug_printf("\t\t.vb[7].vb0.pitch = 0x%x\n", (*ptr).vb[7].vb0.pitch); - debug_printf("\t\t.vb[7].vb0.pad = 0x%x\n", (*ptr).vb[7].vb0.pad); debug_printf("\t\t.vb[7].vb0.access_type = 0x%x\n", (*ptr).vb[7].vb0.access_type); debug_printf("\t\t.vb[7].vb0.vb_index = 0x%x\n", (*ptr).vb[7].vb0.vb_index); debug_printf("\t\t.vb[7].start_addr = 0x%x\n", (*ptr).vb[7].start_addr); debug_printf("\t\t.vb[7].max_index = 0x%x\n", (*ptr).vb[7].max_index); debug_printf("\t\t.vb[7].instance_data_step_rate = 0x%x\n", (*ptr).vb[7].instance_data_step_rate); debug_printf("\t\t.vb[8].vb0.pitch = 0x%x\n", (*ptr).vb[8].vb0.pitch); - debug_printf("\t\t.vb[8].vb0.pad = 0x%x\n", (*ptr).vb[8].vb0.pad); debug_printf("\t\t.vb[8].vb0.access_type = 0x%x\n", (*ptr).vb[8].vb0.access_type); debug_printf("\t\t.vb[8].vb0.vb_index = 0x%x\n", (*ptr).vb[8].vb0.vb_index); debug_printf("\t\t.vb[8].start_addr = 0x%x\n", (*ptr).vb[8].start_addr); debug_printf("\t\t.vb[8].max_index = 0x%x\n", (*ptr).vb[8].max_index); debug_printf("\t\t.vb[8].instance_data_step_rate = 0x%x\n", (*ptr).vb[8].instance_data_step_rate); debug_printf("\t\t.vb[9].vb0.pitch = 0x%x\n", (*ptr).vb[9].vb0.pitch); - debug_printf("\t\t.vb[9].vb0.pad = 0x%x\n", (*ptr).vb[9].vb0.pad); debug_printf("\t\t.vb[9].vb0.access_type = 0x%x\n", (*ptr).vb[9].vb0.access_type); debug_printf("\t\t.vb[9].vb0.vb_index = 0x%x\n", (*ptr).vb[9].vb0.vb_index); debug_printf("\t\t.vb[9].start_addr = 0x%x\n", (*ptr).vb[9].start_addr); debug_printf("\t\t.vb[9].max_index = 0x%x\n", (*ptr).vb[9].max_index); debug_printf("\t\t.vb[9].instance_data_step_rate = 0x%x\n", (*ptr).vb[9].instance_data_step_rate); debug_printf("\t\t.vb[10].vb0.pitch = 0x%x\n", (*ptr).vb[10].vb0.pitch); - debug_printf("\t\t.vb[10].vb0.pad = 0x%x\n", (*ptr).vb[10].vb0.pad); debug_printf("\t\t.vb[10].vb0.access_type = 0x%x\n", (*ptr).vb[10].vb0.access_type); debug_printf("\t\t.vb[10].vb0.vb_index = 0x%x\n", (*ptr).vb[10].vb0.vb_index); debug_printf("\t\t.vb[10].start_addr = 0x%x\n", (*ptr).vb[10].start_addr); debug_printf("\t\t.vb[10].max_index = 0x%x\n", (*ptr).vb[10].max_index); debug_printf("\t\t.vb[10].instance_data_step_rate = 0x%x\n", (*ptr).vb[10].instance_data_step_rate); debug_printf("\t\t.vb[11].vb0.pitch = 0x%x\n", (*ptr).vb[11].vb0.pitch); - debug_printf("\t\t.vb[11].vb0.pad = 0x%x\n", (*ptr).vb[11].vb0.pad); debug_printf("\t\t.vb[11].vb0.access_type = 0x%x\n", (*ptr).vb[11].vb0.access_type); debug_printf("\t\t.vb[11].vb0.vb_index = 0x%x\n", (*ptr).vb[11].vb0.vb_index); debug_printf("\t\t.vb[11].start_addr = 0x%x\n", (*ptr).vb[11].start_addr); debug_printf("\t\t.vb[11].max_index = 0x%x\n", (*ptr).vb[11].max_index); debug_printf("\t\t.vb[11].instance_data_step_rate = 0x%x\n", (*ptr).vb[11].instance_data_step_rate); debug_printf("\t\t.vb[12].vb0.pitch = 0x%x\n", (*ptr).vb[12].vb0.pitch); - debug_printf("\t\t.vb[12].vb0.pad = 0x%x\n", (*ptr).vb[12].vb0.pad); debug_printf("\t\t.vb[12].vb0.access_type = 0x%x\n", (*ptr).vb[12].vb0.access_type); debug_printf("\t\t.vb[12].vb0.vb_index = 0x%x\n", (*ptr).vb[12].vb0.vb_index); debug_printf("\t\t.vb[12].start_addr = 0x%x\n", (*ptr).vb[12].start_addr); debug_printf("\t\t.vb[12].max_index = 0x%x\n", (*ptr).vb[12].max_index); debug_printf("\t\t.vb[12].instance_data_step_rate = 0x%x\n", (*ptr).vb[12].instance_data_step_rate); debug_printf("\t\t.vb[13].vb0.pitch = 0x%x\n", (*ptr).vb[13].vb0.pitch); - debug_printf("\t\t.vb[13].vb0.pad = 0x%x\n", (*ptr).vb[13].vb0.pad); debug_printf("\t\t.vb[13].vb0.access_type = 0x%x\n", (*ptr).vb[13].vb0.access_type); debug_printf("\t\t.vb[13].vb0.vb_index = 0x%x\n", (*ptr).vb[13].vb0.vb_index); debug_printf("\t\t.vb[13].start_addr = 0x%x\n", (*ptr).vb[13].start_addr); debug_printf("\t\t.vb[13].max_index = 0x%x\n", (*ptr).vb[13].max_index); debug_printf("\t\t.vb[13].instance_data_step_rate = 0x%x\n", (*ptr).vb[13].instance_data_step_rate); debug_printf("\t\t.vb[14].vb0.pitch = 0x%x\n", (*ptr).vb[14].vb0.pitch); - debug_printf("\t\t.vb[14].vb0.pad = 0x%x\n", (*ptr).vb[14].vb0.pad); debug_printf("\t\t.vb[14].vb0.access_type = 0x%x\n", (*ptr).vb[14].vb0.access_type); debug_printf("\t\t.vb[14].vb0.vb_index = 0x%x\n", (*ptr).vb[14].vb0.vb_index); debug_printf("\t\t.vb[14].start_addr = 0x%x\n", (*ptr).vb[14].start_addr); debug_printf("\t\t.vb[14].max_index = 0x%x\n", (*ptr).vb[14].max_index); debug_printf("\t\t.vb[14].instance_data_step_rate = 0x%x\n", (*ptr).vb[14].instance_data_step_rate); debug_printf("\t\t.vb[15].vb0.pitch = 0x%x\n", (*ptr).vb[15].vb0.pitch); - debug_printf("\t\t.vb[15].vb0.pad = 0x%x\n", (*ptr).vb[15].vb0.pad); debug_printf("\t\t.vb[15].vb0.access_type = 0x%x\n", (*ptr).vb[15].vb0.access_type); debug_printf("\t\t.vb[15].vb0.vb_index = 0x%x\n", (*ptr).vb[15].vb0.vb_index); debug_printf("\t\t.vb[15].start_addr = 0x%x\n", (*ptr).vb[15].start_addr); debug_printf("\t\t.vb[15].max_index = 0x%x\n", (*ptr).vb[15].max_index); debug_printf("\t\t.vb[15].instance_data_step_rate = 0x%x\n", (*ptr).vb[15].instance_data_step_rate); debug_printf("\t\t.vb[16].vb0.pitch = 0x%x\n", (*ptr).vb[16].vb0.pitch); - debug_printf("\t\t.vb[16].vb0.pad = 0x%x\n", (*ptr).vb[16].vb0.pad); debug_printf("\t\t.vb[16].vb0.access_type = 0x%x\n", (*ptr).vb[16].vb0.access_type); debug_printf("\t\t.vb[16].vb0.vb_index = 0x%x\n", (*ptr).vb[16].vb0.vb_index); debug_printf("\t\t.vb[16].start_addr = 0x%x\n", (*ptr).vb[16].start_addr); @@ -1133,7 +961,6 @@ void brw_dump_vertex_buffer_state(const struct brw_vertex_buffer_state *ptr) { debug_printf("\t\t.vb0.pitch = 0x%x\n", (*ptr).vb0.pitch); - debug_printf("\t\t.vb0.pad = 0x%x\n", (*ptr).vb0.pad); debug_printf("\t\t.vb0.access_type = 0x%x\n", (*ptr).vb0.access_type); debug_printf("\t\t.vb0.vb_index = 0x%x\n", (*ptr).vb0.vb_index); debug_printf("\t\t.start_addr = 0x%x\n", (*ptr).start_addr); @@ -1147,217 +974,163 @@ brw_dump_vertex_element_packet(const struct brw_vertex_element_packet *ptr) debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length); debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode); debug_printf("\t\t.ve[0].ve0.src_offset = 0x%x\n", (*ptr).ve[0].ve0.src_offset); - debug_printf("\t\t.ve[0].ve0.pad = 0x%x\n", (*ptr).ve[0].ve0.pad); debug_printf("\t\t.ve[0].ve0.src_format = 0x%x\n", (*ptr).ve[0].ve0.src_format); - debug_printf("\t\t.ve[0].ve0.pad0 = 0x%x\n", (*ptr).ve[0].ve0.pad0); debug_printf("\t\t.ve[0].ve0.valid = 0x%x\n", (*ptr).ve[0].ve0.valid); debug_printf("\t\t.ve[0].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[0].ve0.vertex_buffer_index); debug_printf("\t\t.ve[0].ve1.dst_offset = 0x%x\n", (*ptr).ve[0].ve1.dst_offset); - debug_printf("\t\t.ve[0].ve1.pad = 0x%x\n", (*ptr).ve[0].ve1.pad); debug_printf("\t\t.ve[0].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent3); debug_printf("\t\t.ve[0].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent2); debug_printf("\t\t.ve[0].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent1); debug_printf("\t\t.ve[0].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent0); debug_printf("\t\t.ve[1].ve0.src_offset = 0x%x\n", (*ptr).ve[1].ve0.src_offset); - debug_printf("\t\t.ve[1].ve0.pad = 0x%x\n", (*ptr).ve[1].ve0.pad); debug_printf("\t\t.ve[1].ve0.src_format = 0x%x\n", (*ptr).ve[1].ve0.src_format); - debug_printf("\t\t.ve[1].ve0.pad0 = 0x%x\n", (*ptr).ve[1].ve0.pad0); debug_printf("\t\t.ve[1].ve0.valid = 0x%x\n", (*ptr).ve[1].ve0.valid); debug_printf("\t\t.ve[1].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[1].ve0.vertex_buffer_index); debug_printf("\t\t.ve[1].ve1.dst_offset = 0x%x\n", (*ptr).ve[1].ve1.dst_offset); - debug_printf("\t\t.ve[1].ve1.pad = 0x%x\n", (*ptr).ve[1].ve1.pad); debug_printf("\t\t.ve[1].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent3); debug_printf("\t\t.ve[1].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent2); debug_printf("\t\t.ve[1].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent1); debug_printf("\t\t.ve[1].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent0); debug_printf("\t\t.ve[2].ve0.src_offset = 0x%x\n", (*ptr).ve[2].ve0.src_offset); - debug_printf("\t\t.ve[2].ve0.pad = 0x%x\n", (*ptr).ve[2].ve0.pad); debug_printf("\t\t.ve[2].ve0.src_format = 0x%x\n", (*ptr).ve[2].ve0.src_format); - debug_printf("\t\t.ve[2].ve0.pad0 = 0x%x\n", (*ptr).ve[2].ve0.pad0); debug_printf("\t\t.ve[2].ve0.valid = 0x%x\n", (*ptr).ve[2].ve0.valid); debug_printf("\t\t.ve[2].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[2].ve0.vertex_buffer_index); debug_printf("\t\t.ve[2].ve1.dst_offset = 0x%x\n", (*ptr).ve[2].ve1.dst_offset); - debug_printf("\t\t.ve[2].ve1.pad = 0x%x\n", (*ptr).ve[2].ve1.pad); debug_printf("\t\t.ve[2].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent3); debug_printf("\t\t.ve[2].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent2); debug_printf("\t\t.ve[2].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent1); debug_printf("\t\t.ve[2].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent0); debug_printf("\t\t.ve[3].ve0.src_offset = 0x%x\n", (*ptr).ve[3].ve0.src_offset); - debug_printf("\t\t.ve[3].ve0.pad = 0x%x\n", (*ptr).ve[3].ve0.pad); debug_printf("\t\t.ve[3].ve0.src_format = 0x%x\n", (*ptr).ve[3].ve0.src_format); - debug_printf("\t\t.ve[3].ve0.pad0 = 0x%x\n", (*ptr).ve[3].ve0.pad0); debug_printf("\t\t.ve[3].ve0.valid = 0x%x\n", (*ptr).ve[3].ve0.valid); debug_printf("\t\t.ve[3].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[3].ve0.vertex_buffer_index); debug_printf("\t\t.ve[3].ve1.dst_offset = 0x%x\n", (*ptr).ve[3].ve1.dst_offset); - debug_printf("\t\t.ve[3].ve1.pad = 0x%x\n", (*ptr).ve[3].ve1.pad); debug_printf("\t\t.ve[3].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent3); debug_printf("\t\t.ve[3].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent2); debug_printf("\t\t.ve[3].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent1); debug_printf("\t\t.ve[3].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent0); debug_printf("\t\t.ve[4].ve0.src_offset = 0x%x\n", (*ptr).ve[4].ve0.src_offset); - debug_printf("\t\t.ve[4].ve0.pad = 0x%x\n", (*ptr).ve[4].ve0.pad); debug_printf("\t\t.ve[4].ve0.src_format = 0x%x\n", (*ptr).ve[4].ve0.src_format); - debug_printf("\t\t.ve[4].ve0.pad0 = 0x%x\n", (*ptr).ve[4].ve0.pad0); debug_printf("\t\t.ve[4].ve0.valid = 0x%x\n", (*ptr).ve[4].ve0.valid); debug_printf("\t\t.ve[4].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[4].ve0.vertex_buffer_index); debug_printf("\t\t.ve[4].ve1.dst_offset = 0x%x\n", (*ptr).ve[4].ve1.dst_offset); - debug_printf("\t\t.ve[4].ve1.pad = 0x%x\n", (*ptr).ve[4].ve1.pad); debug_printf("\t\t.ve[4].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent3); debug_printf("\t\t.ve[4].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent2); debug_printf("\t\t.ve[4].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent1); debug_printf("\t\t.ve[4].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent0); debug_printf("\t\t.ve[5].ve0.src_offset = 0x%x\n", (*ptr).ve[5].ve0.src_offset); - debug_printf("\t\t.ve[5].ve0.pad = 0x%x\n", (*ptr).ve[5].ve0.pad); debug_printf("\t\t.ve[5].ve0.src_format = 0x%x\n", (*ptr).ve[5].ve0.src_format); - debug_printf("\t\t.ve[5].ve0.pad0 = 0x%x\n", (*ptr).ve[5].ve0.pad0); debug_printf("\t\t.ve[5].ve0.valid = 0x%x\n", (*ptr).ve[5].ve0.valid); debug_printf("\t\t.ve[5].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[5].ve0.vertex_buffer_index); debug_printf("\t\t.ve[5].ve1.dst_offset = 0x%x\n", (*ptr).ve[5].ve1.dst_offset); - debug_printf("\t\t.ve[5].ve1.pad = 0x%x\n", (*ptr).ve[5].ve1.pad); debug_printf("\t\t.ve[5].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent3); debug_printf("\t\t.ve[5].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent2); debug_printf("\t\t.ve[5].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent1); debug_printf("\t\t.ve[5].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent0); debug_printf("\t\t.ve[6].ve0.src_offset = 0x%x\n", (*ptr).ve[6].ve0.src_offset); - debug_printf("\t\t.ve[6].ve0.pad = 0x%x\n", (*ptr).ve[6].ve0.pad); debug_printf("\t\t.ve[6].ve0.src_format = 0x%x\n", (*ptr).ve[6].ve0.src_format); - debug_printf("\t\t.ve[6].ve0.pad0 = 0x%x\n", (*ptr).ve[6].ve0.pad0); debug_printf("\t\t.ve[6].ve0.valid = 0x%x\n", (*ptr).ve[6].ve0.valid); debug_printf("\t\t.ve[6].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[6].ve0.vertex_buffer_index); debug_printf("\t\t.ve[6].ve1.dst_offset = 0x%x\n", (*ptr).ve[6].ve1.dst_offset); - debug_printf("\t\t.ve[6].ve1.pad = 0x%x\n", (*ptr).ve[6].ve1.pad); debug_printf("\t\t.ve[6].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent3); debug_printf("\t\t.ve[6].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent2); debug_printf("\t\t.ve[6].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent1); debug_printf("\t\t.ve[6].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent0); debug_printf("\t\t.ve[7].ve0.src_offset = 0x%x\n", (*ptr).ve[7].ve0.src_offset); - debug_printf("\t\t.ve[7].ve0.pad = 0x%x\n", (*ptr).ve[7].ve0.pad); debug_printf("\t\t.ve[7].ve0.src_format = 0x%x\n", (*ptr).ve[7].ve0.src_format); - debug_printf("\t\t.ve[7].ve0.pad0 = 0x%x\n", (*ptr).ve[7].ve0.pad0); debug_printf("\t\t.ve[7].ve0.valid = 0x%x\n", (*ptr).ve[7].ve0.valid); debug_printf("\t\t.ve[7].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[7].ve0.vertex_buffer_index); debug_printf("\t\t.ve[7].ve1.dst_offset = 0x%x\n", (*ptr).ve[7].ve1.dst_offset); - debug_printf("\t\t.ve[7].ve1.pad = 0x%x\n", (*ptr).ve[7].ve1.pad); debug_printf("\t\t.ve[7].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent3); debug_printf("\t\t.ve[7].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent2); debug_printf("\t\t.ve[7].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent1); debug_printf("\t\t.ve[7].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent0); debug_printf("\t\t.ve[8].ve0.src_offset = 0x%x\n", (*ptr).ve[8].ve0.src_offset); - debug_printf("\t\t.ve[8].ve0.pad = 0x%x\n", (*ptr).ve[8].ve0.pad); debug_printf("\t\t.ve[8].ve0.src_format = 0x%x\n", (*ptr).ve[8].ve0.src_format); - debug_printf("\t\t.ve[8].ve0.pad0 = 0x%x\n", (*ptr).ve[8].ve0.pad0); debug_printf("\t\t.ve[8].ve0.valid = 0x%x\n", (*ptr).ve[8].ve0.valid); debug_printf("\t\t.ve[8].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[8].ve0.vertex_buffer_index); debug_printf("\t\t.ve[8].ve1.dst_offset = 0x%x\n", (*ptr).ve[8].ve1.dst_offset); - debug_printf("\t\t.ve[8].ve1.pad = 0x%x\n", (*ptr).ve[8].ve1.pad); debug_printf("\t\t.ve[8].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent3); debug_printf("\t\t.ve[8].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent2); debug_printf("\t\t.ve[8].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent1); debug_printf("\t\t.ve[8].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent0); debug_printf("\t\t.ve[9].ve0.src_offset = 0x%x\n", (*ptr).ve[9].ve0.src_offset); - debug_printf("\t\t.ve[9].ve0.pad = 0x%x\n", (*ptr).ve[9].ve0.pad); debug_printf("\t\t.ve[9].ve0.src_format = 0x%x\n", (*ptr).ve[9].ve0.src_format); - debug_printf("\t\t.ve[9].ve0.pad0 = 0x%x\n", (*ptr).ve[9].ve0.pad0); debug_printf("\t\t.ve[9].ve0.valid = 0x%x\n", (*ptr).ve[9].ve0.valid); debug_printf("\t\t.ve[9].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[9].ve0.vertex_buffer_index); debug_printf("\t\t.ve[9].ve1.dst_offset = 0x%x\n", (*ptr).ve[9].ve1.dst_offset); - debug_printf("\t\t.ve[9].ve1.pad = 0x%x\n", (*ptr).ve[9].ve1.pad); debug_printf("\t\t.ve[9].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent3); debug_printf("\t\t.ve[9].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent2); debug_printf("\t\t.ve[9].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent1); debug_printf("\t\t.ve[9].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent0); debug_printf("\t\t.ve[10].ve0.src_offset = 0x%x\n", (*ptr).ve[10].ve0.src_offset); - debug_printf("\t\t.ve[10].ve0.pad = 0x%x\n", (*ptr).ve[10].ve0.pad); debug_printf("\t\t.ve[10].ve0.src_format = 0x%x\n", (*ptr).ve[10].ve0.src_format); - debug_printf("\t\t.ve[10].ve0.pad0 = 0x%x\n", (*ptr).ve[10].ve0.pad0); debug_printf("\t\t.ve[10].ve0.valid = 0x%x\n", (*ptr).ve[10].ve0.valid); debug_printf("\t\t.ve[10].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[10].ve0.vertex_buffer_index); debug_printf("\t\t.ve[10].ve1.dst_offset = 0x%x\n", (*ptr).ve[10].ve1.dst_offset); - debug_printf("\t\t.ve[10].ve1.pad = 0x%x\n", (*ptr).ve[10].ve1.pad); debug_printf("\t\t.ve[10].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent3); debug_printf("\t\t.ve[10].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent2); debug_printf("\t\t.ve[10].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent1); debug_printf("\t\t.ve[10].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent0); debug_printf("\t\t.ve[11].ve0.src_offset = 0x%x\n", (*ptr).ve[11].ve0.src_offset); - debug_printf("\t\t.ve[11].ve0.pad = 0x%x\n", (*ptr).ve[11].ve0.pad); debug_printf("\t\t.ve[11].ve0.src_format = 0x%x\n", (*ptr).ve[11].ve0.src_format); - debug_printf("\t\t.ve[11].ve0.pad0 = 0x%x\n", (*ptr).ve[11].ve0.pad0); debug_printf("\t\t.ve[11].ve0.valid = 0x%x\n", (*ptr).ve[11].ve0.valid); debug_printf("\t\t.ve[11].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[11].ve0.vertex_buffer_index); debug_printf("\t\t.ve[11].ve1.dst_offset = 0x%x\n", (*ptr).ve[11].ve1.dst_offset); - debug_printf("\t\t.ve[11].ve1.pad = 0x%x\n", (*ptr).ve[11].ve1.pad); debug_printf("\t\t.ve[11].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent3); debug_printf("\t\t.ve[11].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent2); debug_printf("\t\t.ve[11].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent1); debug_printf("\t\t.ve[11].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent0); debug_printf("\t\t.ve[12].ve0.src_offset = 0x%x\n", (*ptr).ve[12].ve0.src_offset); - debug_printf("\t\t.ve[12].ve0.pad = 0x%x\n", (*ptr).ve[12].ve0.pad); debug_printf("\t\t.ve[12].ve0.src_format = 0x%x\n", (*ptr).ve[12].ve0.src_format); - debug_printf("\t\t.ve[12].ve0.pad0 = 0x%x\n", (*ptr).ve[12].ve0.pad0); debug_printf("\t\t.ve[12].ve0.valid = 0x%x\n", (*ptr).ve[12].ve0.valid); debug_printf("\t\t.ve[12].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[12].ve0.vertex_buffer_index); debug_printf("\t\t.ve[12].ve1.dst_offset = 0x%x\n", (*ptr).ve[12].ve1.dst_offset); - debug_printf("\t\t.ve[12].ve1.pad = 0x%x\n", (*ptr).ve[12].ve1.pad); debug_printf("\t\t.ve[12].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent3); debug_printf("\t\t.ve[12].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent2); debug_printf("\t\t.ve[12].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent1); debug_printf("\t\t.ve[12].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent0); debug_printf("\t\t.ve[13].ve0.src_offset = 0x%x\n", (*ptr).ve[13].ve0.src_offset); - debug_printf("\t\t.ve[13].ve0.pad = 0x%x\n", (*ptr).ve[13].ve0.pad); debug_printf("\t\t.ve[13].ve0.src_format = 0x%x\n", (*ptr).ve[13].ve0.src_format); - debug_printf("\t\t.ve[13].ve0.pad0 = 0x%x\n", (*ptr).ve[13].ve0.pad0); debug_printf("\t\t.ve[13].ve0.valid = 0x%x\n", (*ptr).ve[13].ve0.valid); debug_printf("\t\t.ve[13].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[13].ve0.vertex_buffer_index); debug_printf("\t\t.ve[13].ve1.dst_offset = 0x%x\n", (*ptr).ve[13].ve1.dst_offset); - debug_printf("\t\t.ve[13].ve1.pad = 0x%x\n", (*ptr).ve[13].ve1.pad); debug_printf("\t\t.ve[13].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent3); debug_printf("\t\t.ve[13].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent2); debug_printf("\t\t.ve[13].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent1); debug_printf("\t\t.ve[13].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent0); debug_printf("\t\t.ve[14].ve0.src_offset = 0x%x\n", (*ptr).ve[14].ve0.src_offset); - debug_printf("\t\t.ve[14].ve0.pad = 0x%x\n", (*ptr).ve[14].ve0.pad); debug_printf("\t\t.ve[14].ve0.src_format = 0x%x\n", (*ptr).ve[14].ve0.src_format); - debug_printf("\t\t.ve[14].ve0.pad0 = 0x%x\n", (*ptr).ve[14].ve0.pad0); debug_printf("\t\t.ve[14].ve0.valid = 0x%x\n", (*ptr).ve[14].ve0.valid); debug_printf("\t\t.ve[14].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[14].ve0.vertex_buffer_index); debug_printf("\t\t.ve[14].ve1.dst_offset = 0x%x\n", (*ptr).ve[14].ve1.dst_offset); - debug_printf("\t\t.ve[14].ve1.pad = 0x%x\n", (*ptr).ve[14].ve1.pad); debug_printf("\t\t.ve[14].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent3); debug_printf("\t\t.ve[14].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent2); debug_printf("\t\t.ve[14].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent1); debug_printf("\t\t.ve[14].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent0); debug_printf("\t\t.ve[15].ve0.src_offset = 0x%x\n", (*ptr).ve[15].ve0.src_offset); - debug_printf("\t\t.ve[15].ve0.pad = 0x%x\n", (*ptr).ve[15].ve0.pad); debug_printf("\t\t.ve[15].ve0.src_format = 0x%x\n", (*ptr).ve[15].ve0.src_format); - debug_printf("\t\t.ve[15].ve0.pad0 = 0x%x\n", (*ptr).ve[15].ve0.pad0); debug_printf("\t\t.ve[15].ve0.valid = 0x%x\n", (*ptr).ve[15].ve0.valid); debug_printf("\t\t.ve[15].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[15].ve0.vertex_buffer_index); debug_printf("\t\t.ve[15].ve1.dst_offset = 0x%x\n", (*ptr).ve[15].ve1.dst_offset); - debug_printf("\t\t.ve[15].ve1.pad = 0x%x\n", (*ptr).ve[15].ve1.pad); debug_printf("\t\t.ve[15].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent3); debug_printf("\t\t.ve[15].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent2); debug_printf("\t\t.ve[15].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent1); debug_printf("\t\t.ve[15].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent0); debug_printf("\t\t.ve[16].ve0.src_offset = 0x%x\n", (*ptr).ve[16].ve0.src_offset); - debug_printf("\t\t.ve[16].ve0.pad = 0x%x\n", (*ptr).ve[16].ve0.pad); debug_printf("\t\t.ve[16].ve0.src_format = 0x%x\n", (*ptr).ve[16].ve0.src_format); - debug_printf("\t\t.ve[16].ve0.pad0 = 0x%x\n", (*ptr).ve[16].ve0.pad0); debug_printf("\t\t.ve[16].ve0.valid = 0x%x\n", (*ptr).ve[16].ve0.valid); debug_printf("\t\t.ve[16].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[16].ve0.vertex_buffer_index); debug_printf("\t\t.ve[16].ve1.dst_offset = 0x%x\n", (*ptr).ve[16].ve1.dst_offset); - debug_printf("\t\t.ve[16].ve1.pad = 0x%x\n", (*ptr).ve[16].ve1.pad); debug_printf("\t\t.ve[16].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent3); debug_printf("\t\t.ve[16].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent2); debug_printf("\t\t.ve[16].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent1); debug_printf("\t\t.ve[16].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent0); debug_printf("\t\t.ve[17].ve0.src_offset = 0x%x\n", (*ptr).ve[17].ve0.src_offset); - debug_printf("\t\t.ve[17].ve0.pad = 0x%x\n", (*ptr).ve[17].ve0.pad); debug_printf("\t\t.ve[17].ve0.src_format = 0x%x\n", (*ptr).ve[17].ve0.src_format); - debug_printf("\t\t.ve[17].ve0.pad0 = 0x%x\n", (*ptr).ve[17].ve0.pad0); debug_printf("\t\t.ve[17].ve0.valid = 0x%x\n", (*ptr).ve[17].ve0.valid); debug_printf("\t\t.ve[17].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[17].ve0.vertex_buffer_index); debug_printf("\t\t.ve[17].ve1.dst_offset = 0x%x\n", (*ptr).ve[17].ve1.dst_offset); - debug_printf("\t\t.ve[17].ve1.pad = 0x%x\n", (*ptr).ve[17].ve1.pad); debug_printf("\t\t.ve[17].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent3); debug_printf("\t\t.ve[17].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent2); debug_printf("\t\t.ve[17].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent1); @@ -1368,13 +1141,10 @@ void brw_dump_vertex_element_state(const struct brw_vertex_element_state *ptr) { debug_printf("\t\t.ve0.src_offset = 0x%x\n", (*ptr).ve0.src_offset); - debug_printf("\t\t.ve0.pad = 0x%x\n", (*ptr).ve0.pad); debug_printf("\t\t.ve0.src_format = 0x%x\n", (*ptr).ve0.src_format); - debug_printf("\t\t.ve0.pad0 = 0x%x\n", (*ptr).ve0.pad0); debug_printf("\t\t.ve0.valid = 0x%x\n", (*ptr).ve0.valid); debug_printf("\t\t.ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve0.vertex_buffer_index); debug_printf("\t\t.ve1.dst_offset = 0x%x\n", (*ptr).ve1.dst_offset); - debug_printf("\t\t.ve1.pad = 0x%x\n", (*ptr).ve1.pad); debug_printf("\t\t.ve1.vfcomponent3 = 0x%x\n", (*ptr).ve1.vfcomponent3); debug_printf("\t\t.ve1.vfcomponent2 = 0x%x\n", (*ptr).ve1.vfcomponent2); debug_printf("\t\t.ve1.vfcomponent1 = 0x%x\n", (*ptr).ve1.vfcomponent1); @@ -1385,90 +1155,63 @@ void brw_dump_vf_statistics(const struct brw_vf_statistics *ptr) { debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable); - debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad); debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode); } void brw_dump_vs_unit_state(const struct brw_vs_unit_state *ptr) { - debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0); debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); - debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1); debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable); debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable); debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); - debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0); debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset); - debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1); debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); - debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3); debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); - debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0); debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); - debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0); debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); - debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1); debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); - debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2); debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); - debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3); - debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0); debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable); debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries); - debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1); debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size); - debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2); debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads); - debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3); debug_printf("\t\t.vs5.sampler_count = 0x%x\n", (*ptr).vs5.sampler_count); - debug_printf("\t\t.vs5.pad0 = 0x%x\n", (*ptr).vs5.pad0); debug_printf("\t\t.vs5.sampler_state_pointer = 0x%x\n", (*ptr).vs5.sampler_state_pointer); debug_printf("\t\t.vs6.vs_enable = 0x%x\n", (*ptr).vs6.vs_enable); debug_printf("\t\t.vs6.vert_cache_disable = 0x%x\n", (*ptr).vs6.vert_cache_disable); - debug_printf("\t\t.vs6.pad0 = 0x%x\n", (*ptr).vs6.pad0); } void brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr) { - debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0); debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count); - debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1); debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer); debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable); debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable); debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable); debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable); debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable); - debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0); debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset); - debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1); debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode); debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority); debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count); - debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3); debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow); debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space); - debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0); debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer); debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg); debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset); - debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0); debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length); - debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1); debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset); - debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2); debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length); - debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3); debug_printf("\t\t.wm4.stats_enable = 0x%x\n", (*ptr).wm4.stats_enable); debug_printf("\t\t.wm4.depth_buffer_clear = 0x%x\n", (*ptr).wm4.depth_buffer_clear); debug_printf("\t\t.wm4.sampler_count = 0x%x\n", (*ptr).wm4.sampler_count); @@ -1478,7 +1221,6 @@ brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr) debug_printf("\t\t.wm5.enable_32_pix = 0x%x\n", (*ptr).wm5.enable_32_pix); debug_printf("\t\t.wm5.enable_con_32_pix = 0x%x\n", (*ptr).wm5.enable_con_32_pix); debug_printf("\t\t.wm5.enable_con_64_pix = 0x%x\n", (*ptr).wm5.enable_con_64_pix); - debug_printf("\t\t.wm5.pad0 = 0x%x\n", (*ptr).wm5.pad0); debug_printf("\t\t.wm5.legacy_global_depth_bias = 0x%x\n", (*ptr).wm5.legacy_global_depth_bias); debug_printf("\t\t.wm5.line_stipple = 0x%x\n", (*ptr).wm5.line_stipple); debug_printf("\t\t.wm5.depth_offset = 0x%x\n", (*ptr).wm5.depth_offset); @@ -1495,17 +1237,11 @@ brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr) debug_printf("\t\t.wm5.max_threads = 0x%x\n", (*ptr).wm5.max_threads); debug_printf("\t\t.global_depth_offset_constant = %f\n", (*ptr).global_depth_offset_constant); debug_printf("\t\t.global_depth_offset_scale = %f\n", (*ptr).global_depth_offset_scale); - debug_printf("\t\t.wm8.pad0 = 0x%x\n", (*ptr).wm8.pad0); debug_printf("\t\t.wm8.grf_reg_count_1 = 0x%x\n", (*ptr).wm8.grf_reg_count_1); - debug_printf("\t\t.wm8.pad1 = 0x%x\n", (*ptr).wm8.pad1); debug_printf("\t\t.wm8.kernel_start_pointer_1 = 0x%x\n", (*ptr).wm8.kernel_start_pointer_1); - debug_printf("\t\t.wm9.pad0 = 0x%x\n", (*ptr).wm9.pad0); debug_printf("\t\t.wm9.grf_reg_count_2 = 0x%x\n", (*ptr).wm9.grf_reg_count_2); - debug_printf("\t\t.wm9.pad1 = 0x%x\n", (*ptr).wm9.pad1); debug_printf("\t\t.wm9.kernel_start_pointer_2 = 0x%x\n", (*ptr).wm9.kernel_start_pointer_2); - debug_printf("\t\t.wm10.pad0 = 0x%x\n", (*ptr).wm10.pad0); debug_printf("\t\t.wm10.grf_reg_count_3 = 0x%x\n", (*ptr).wm10.grf_reg_count_3); - debug_printf("\t\t.wm10.pad1 = 0x%x\n", (*ptr).wm10.pad1); debug_printf("\t\t.wm10.kernel_start_pointer_3 = 0x%x\n", (*ptr).wm10.kernel_start_pointer_3); } diff --git a/src/gallium/drivers/i965/brw_structs_dump.py b/src/gallium/drivers/i965/brw_structs_dump.py index 581515878e..6dba49ad91 100755 --- a/src/gallium/drivers/i965/brw_structs_dump.py +++ b/src/gallium/drivers/i965/brw_structs_dump.py @@ -40,6 +40,7 @@ copyright = ''' import os import sys +import re from pygccxml import parser from pygccxml import declarations @@ -53,6 +54,11 @@ from pygccxml.declarations import type_visitor enums = True +def vars_filter(variable): + name = variable.name + return not re.match('^pad\d*', name) and name != 'dword' + + class decl_dumper_t(decl_visitor.decl_visitor_t): def __init__(self, stream, instance = '', decl = None): @@ -69,7 +75,8 @@ class decl_dumper_t(decl_visitor.decl_visitor_t): assert self.decl.class_type in ('struct', 'union') for variable in class_.variables(recursive = False): - dump_type(self.stream, self._instance + '.' + variable.name, variable.type) + if vars_filter(variable): + dump_type(self.stream, self._instance + '.' + variable.name, variable.type) def visit_enumeration(self): if enums: -- cgit v1.2.3 From c796aed5ddad011d66e631c4cafdbf779e73f213 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 13:57:05 +0000 Subject: i965g: add lots of error checks and early returns Any allocation that may fail should be checked, and propogate the error upwards. At the highest level we will flush batch and retry. This is an alternate strategy to what the original DRI driver did of attempting to flush batch from the lowest levels (eg inside BEGIN_BATCH). The trouble with that strategy was that flushes could occur at unexpected times, and additionally there was a need for a wierd notification mechanism to propogate the 'lost context' state back up to higher levels. Propogating the errors directly gives us a lot of flexibility how to deal with these states, at the expense of a lot more checking in the code. Will add some sanity checks later to make sure that out-of-memory conditions are properly escalated and not lost halfway up the stack. --- src/gallium/drivers/i965/brw_batchbuffer.c | 19 +- src/gallium/drivers/i965/brw_batchbuffer.h | 3 +- src/gallium/drivers/i965/brw_cc.c | 73 ++++--- src/gallium/drivers/i965/brw_clip.c | 60 ++++-- src/gallium/drivers/i965/brw_clip_state.c | 60 +++--- src/gallium/drivers/i965/brw_context.c | 46 +++-- src/gallium/drivers/i965/brw_context.h | 2 +- src/gallium/drivers/i965/brw_curbe.c | 18 +- src/gallium/drivers/i965/brw_draw.c | 3 +- src/gallium/drivers/i965/brw_draw_upload.c | 18 +- src/gallium/drivers/i965/brw_eu.c | 13 +- src/gallium/drivers/i965/brw_eu.h | 8 +- src/gallium/drivers/i965/brw_gs.c | 69 ++++--- src/gallium/drivers/i965/brw_gs_state.c | 48 +++-- src/gallium/drivers/i965/brw_pipe_query.c | 31 +-- src/gallium/drivers/i965/brw_pipe_shader.c | 3 +- src/gallium/drivers/i965/brw_pipe_vertex.c | 2 +- src/gallium/drivers/i965/brw_screen_buffers.c | 16 +- src/gallium/drivers/i965/brw_screen_surface.c | 7 +- src/gallium/drivers/i965/brw_screen_texture.c | 17 +- src/gallium/drivers/i965/brw_sf.c | 52 +++-- src/gallium/drivers/i965/brw_sf_state.c | 86 ++++---- src/gallium/drivers/i965/brw_state.h | 71 +++---- src/gallium/drivers/i965/brw_state_cache.c | 115 +++++------ src/gallium/drivers/i965/brw_state_upload.c | 3 +- src/gallium/drivers/i965/brw_vs.c | 56 +++--- src/gallium/drivers/i965/brw_vs_state.c | 58 +++--- src/gallium/drivers/i965/brw_vs_surface_state.c | 97 ++++++---- src/gallium/drivers/i965/brw_winsys.h | 56 ++++-- src/gallium/drivers/i965/brw_wm.c | 78 ++++---- src/gallium/drivers/i965/brw_wm_constant_buffer.c | 87 +++++---- src/gallium/drivers/i965/brw_wm_sampler_state.c | 98 ++++++---- src/gallium/drivers/i965/brw_wm_state.c | 103 ++++++---- src/gallium/drivers/i965/brw_wm_surface_state.c | 226 ++++++++++++---------- src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 46 ++--- 35 files changed, 1003 insertions(+), 745 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index ca612e5ed0..e5f73bd6a3 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -38,17 +38,17 @@ #define USE_MALLOC_BUFFER 1 #define ALWAYS_EMIT_MI_FLUSH 1 -void +enum pipe_error brw_batchbuffer_reset(struct brw_batchbuffer *batch) { - if (batch->buf) { - batch->sws->bo_unreference(batch->buf); - batch->buf = NULL; - } + enum pipe_error ret; - batch->buf = batch->sws->bo_alloc(batch->sws, - BRW_BUFFER_TYPE_BATCH, - BRW_BATCH_SIZE, 4096); + ret = batch->sws->bo_alloc( batch->sws, + BRW_BUFFER_TYPE_BATCH, + BRW_BATCH_SIZE, 4096, + &batch->buf ); + if (ret) + return ret; if (batch->malloc_buffer) batch->map = batch->malloc_buffer; @@ -59,6 +59,7 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch) batch->size = BRW_BATCH_SIZE; batch->ptr = batch->map; + return PIPE_OK; } struct brw_batchbuffer * @@ -91,7 +92,7 @@ brw_batchbuffer_free(struct brw_batchbuffer *batch) batch->map = NULL; } - batch->sws->bo_unreference(batch->buf); + bo_reference(&batch->buf, NULL); FREE(batch); } diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index 1f04826aea..288a9d2755 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -65,7 +65,8 @@ void _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file, int line); -void brw_batchbuffer_reset(struct brw_batchbuffer *batch); +enum pipe_error +brw_batchbuffer_reset(struct brw_batchbuffer *batch); /* Unlike bmBufferData, this currently requires the buffer be mapped. diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index 20967f0191..8e25fe8585 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -57,10 +57,11 @@ static void calc_sane_viewport( const struct pipe_viewport_state *vp, svp->far = 1; } -static int prepare_cc_vp( struct brw_context *brw ) +static enum pipe_error prepare_cc_vp( struct brw_context *brw ) { struct brw_cc_viewport ccv; struct sane_viewport svp; + enum pipe_error ret; memset(&ccv, 0, sizeof(ccv)); @@ -70,10 +71,12 @@ static int prepare_cc_vp( struct brw_context *brw ) ccv.min_depth = svp.near; ccv.max_depth = svp.far; - brw->sws->bo_unreference(brw->cc.vp_bo); - brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); - - return 0; + ret = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0, + &brw->cc.vp_bo ); + if (ret) + return ret; + + return PIPE_OK; } const struct brw_tracked_state brw_cc_vp = { @@ -123,11 +126,13 @@ cc_unit_populate_key(const struct brw_context *brw, /** * Creates the state cache entry for the given CC unit key. */ -static struct brw_winsys_buffer * -cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) +static enum pipe_error +cc_unit_create_from_key(struct brw_context *brw, + struct brw_cc_unit_key *key, + struct brw_winsys_buffer **bo_out) { struct brw_cc_unit_state cc; - struct brw_winsys_buffer *bo; + enum pipe_error ret; memset(&cc, 0, sizeof(cc)); @@ -143,38 +148,48 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key) cc.cc6 = key->cc6; cc.cc7 = key->cc7; - bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT, - key, sizeof(*key), - &brw->cc.vp_bo, 1, - &cc, sizeof(cc), - NULL, NULL); + ret = brw_upload_cache(&brw->cache, BRW_CC_UNIT, + key, sizeof(*key), + &brw->cc.vp_bo, 1, + &cc, sizeof(cc), + NULL, NULL, + bo_out); + if (ret) + return ret; - /* Emit CC viewport relocation */ - brw->sws->bo_emit_reloc(bo, - BRW_USAGE_STATE, - 0, - offsetof(struct brw_cc_unit_state, cc4), - brw->cc.vp_bo); - return bo; + /* Emit CC viewport relocation */ + ret = brw->sws->bo_emit_reloc(*bo_out, + BRW_USAGE_STATE, + 0, + offsetof(struct brw_cc_unit_state, cc4), + brw->cc.vp_bo); + if (ret) + return ret; + + return PIPE_OK; } static int prepare_cc_unit( struct brw_context *brw ) { struct brw_cc_unit_key key; + enum pipe_error ret; cc_unit_populate_key(brw, &key); - brw->sws->bo_unreference(brw->cc.state_bo); - brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT, - &key, sizeof(key), - &brw->cc.vp_bo, 1, - NULL); - - if (brw->cc.state_bo == NULL) - brw->cc.state_bo = cc_unit_create_from_key(brw, &key); + if (brw_search_cache(&brw->cache, BRW_CC_UNIT, + &key, sizeof(key), + &brw->cc.vp_bo, 1, + NULL, + &brw->cc.state_bo)) + return PIPE_OK; + + ret = cc_unit_create_from_key(brw, &key, + &brw->cc.state_bo); + if (ret) + return ret; - return 0; + return PIPE_OK; } const struct brw_tracked_state brw_cc_unit = { diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 1a52fa771b..35e1d2fdbd 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -48,9 +48,12 @@ #define BACK_UNFILLED_BIT 0x2 -static void compile_clip_prog( struct brw_context *brw, - struct brw_clip_prog_key *key ) +static enum pipe_error +compile_clip_prog( struct brw_context *brw, + struct brw_clip_prog_key *key, + struct brw_winsys_buffer **bo_out ) { + enum pipe_error ret; struct brw_clip_compile c; const GLuint *program; GLuint program_size; @@ -123,31 +126,39 @@ static void compile_clip_prog( struct brw_context *brw, break; default: assert(0); - return; + return PIPE_ERROR_BAD_INPUT; } /* get the program */ - program = brw_get_program(&c.func, &program_size); + ret = brw_get_program(&c.func, &program, &program_size); + if (ret) + return ret; /* Upload */ - brw->sws->bo_unreference(brw->clip.prog_bo); - brw->clip.prog_bo = brw_upload_cache( &brw->cache, - BRW_CLIP_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->clip.prog_data ); + ret = brw_upload_cache( &brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->clip.prog_data, + bo_out ); + if (ret) + return ret; + + return PIPE_OK; } /* Calculate interpolants for triangle and line rasterization. */ -static int upload_clip_prog(struct brw_context *brw) +static enum pipe_error +upload_clip_prog(struct brw_context *brw) { + enum pipe_error ret; struct brw_clip_prog_key key; /* Populate the key, starting from the almost-complete version from @@ -166,15 +177,22 @@ static int upload_clip_prog(struct brw_context *brw) /* PIPE_NEW_CLIP */ key.nr_userclip = brw->curr.ucp.nr; - brw->sws->bo_unreference(brw->clip.prog_bo); - brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG, - &key, sizeof(key), - NULL, 0, - &brw->clip.prog_data); - if (brw->clip.prog_bo == NULL) - compile_clip_prog( brw, &key ); + /* Already cached? + */ + if (brw_search_cache(&brw->cache, BRW_CLIP_PROG, + &key, sizeof(key), + NULL, 0, + &brw->clip.prog_data, + &brw->clip.prog_bo)) + return PIPE_OK; + + /* Compile new program: + */ + ret = compile_clip_prog( brw, &key, &brw->clip.prog_bo ); + if (ret) + return ret; - return 0; + return PIPE_OK; } diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 6f8309fea9..d4e3c43c61 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -72,12 +72,13 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) key->depth_clamp = 0; // XXX: add this to gallium: ctx->Transform.DepthClamp; } -static struct brw_winsys_buffer * +static enum pipe_error clip_unit_create_from_key(struct brw_context *brw, - struct brw_clip_unit_key *key) + struct brw_clip_unit_key *key, + struct brw_winsys_buffer **bo_out) { struct brw_clip_unit_state clip; - struct brw_winsys_buffer *bo; + enum pipe_error ret; memset(&clip, 0, sizeof(clip)); @@ -141,39 +142,50 @@ clip_unit_create_from_key(struct brw_context *brw, clip.viewport_ymin = -1; clip.viewport_ymax = 1; - bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, - key, sizeof(*key), - &brw->clip.prog_bo, 1, - &clip, sizeof(clip), - NULL, NULL); + ret = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, + key, sizeof(*key), + &brw->clip.prog_bo, 1, + &clip, sizeof(clip), + NULL, NULL, + bo_out); + if (ret) + return ret; /* Emit clip program relocation */ assert(brw->clip.prog_bo); - brw->sws->bo_emit_reloc(bo, - BRW_USAGE_STATE, - clip.thread0.grf_reg_count << 1, - offsetof(struct brw_clip_unit_state, thread0), - brw->clip.prog_bo); - - return bo; + ret = brw->sws->bo_emit_reloc(*bo_out, + BRW_USAGE_STATE, + clip.thread0.grf_reg_count << 1, + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_bo); + if (ret) + return ret; + + return PIPE_OK; } static int upload_clip_unit( struct brw_context *brw ) { struct brw_clip_unit_key key; + enum pipe_error ret; clip_unit_populate_key(brw, &key); - brw->sws->bo_unreference(brw->clip.state_bo); - brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT, - &key, sizeof(key), - &brw->clip.prog_bo, 1, - NULL); - if (brw->clip.state_bo == NULL) { - brw->clip.state_bo = clip_unit_create_from_key(brw, &key); - } + if (brw_search_cache(&brw->cache, BRW_CLIP_UNIT, + &key, sizeof(key), + &brw->clip.prog_bo, 1, + NULL, + &brw->clip.state_bo)) + return PIPE_OK; + + /* Create new: + */ + ret = clip_unit_create_from_key(brw, &key, + &brw->clip.state_bo); + if (ret) + return ret; - return 0; + return PIPE_OK; } const struct brw_tracked_state brw_clip_unit = { diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index aaf7d1834e..2cee7a7a3c 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -72,29 +72,33 @@ static void brw_destroy_context( struct pipe_context *pipe ) brw->curr.fb.nr_cbufs = 0; pipe_surface_reference(&brw->curr.fb.zsbuf, NULL); - brw->sws->bo_unreference(brw->curbe.curbe_bo); - brw->sws->bo_unreference(brw->vs.prog_bo); - brw->sws->bo_unreference(brw->vs.state_bo); - brw->sws->bo_unreference(brw->vs.bind_bo); - brw->sws->bo_unreference(brw->gs.prog_bo); - brw->sws->bo_unreference(brw->gs.state_bo); - brw->sws->bo_unreference(brw->clip.prog_bo); - brw->sws->bo_unreference(brw->clip.state_bo); - brw->sws->bo_unreference(brw->clip.vp_bo); - brw->sws->bo_unreference(brw->sf.prog_bo); - brw->sws->bo_unreference(brw->sf.state_bo); - brw->sws->bo_unreference(brw->sf.vp_bo); + bo_reference(&brw->curbe.curbe_bo, NULL); + bo_reference(&brw->vs.prog_bo, NULL); + bo_reference(&brw->vs.state_bo, NULL); + bo_reference(&brw->vs.bind_bo, NULL); + bo_reference(&brw->gs.prog_bo, NULL); + bo_reference(&brw->gs.state_bo, NULL); + bo_reference(&brw->clip.prog_bo, NULL); + bo_reference(&brw->clip.state_bo, NULL); + bo_reference(&brw->clip.vp_bo, NULL); + bo_reference(&brw->sf.prog_bo, NULL); + bo_reference(&brw->sf.state_bo, NULL); + bo_reference(&brw->sf.vp_bo, NULL); + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) - brw->sws->bo_unreference(brw->wm.sdc_bo[i]); - brw->sws->bo_unreference(brw->wm.bind_bo); + bo_reference(&brw->wm.sdc_bo[i], NULL); + + bo_reference(&brw->wm.bind_bo, NULL); + for (i = 0; i < BRW_WM_MAX_SURF; i++) - brw->sws->bo_unreference(brw->wm.surf_bo[i]); - brw->sws->bo_unreference(brw->wm.sampler_bo); - brw->sws->bo_unreference(brw->wm.prog_bo); - brw->sws->bo_unreference(brw->wm.state_bo); - brw->sws->bo_unreference(brw->cc.prog_bo); - brw->sws->bo_unreference(brw->cc.state_bo); - brw->sws->bo_unreference(brw->cc.vp_bo); + bo_reference(&brw->wm.surf_bo[i], NULL); + + bo_reference(&brw->wm.sampler_bo, NULL); + bo_reference(&brw->wm.prog_bo, NULL); + bo_reference(&brw->wm.state_bo, NULL); + bo_reference(&brw->cc.prog_bo, NULL); + bo_reference(&brw->cc.state_bo, NULL); + bo_reference(&brw->cc.vp_bo, NULL); } diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 09d34615c7..580251d2f1 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -744,7 +744,7 @@ struct brw_context * brw_queryobj.c */ void brw_init_query(struct brw_context *brw); -void brw_prepare_query_begin(struct brw_context *brw); +enum pipe_error brw_prepare_query_begin(struct brw_context *brw); void brw_emit_query_begin(struct brw_context *brw); void brw_emit_query_end(struct brw_context *brw); diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 1e2e232204..ca7774a7cc 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -160,10 +160,11 @@ static GLfloat fixed_plane[6][4] = { * cache mechanism, but maybe would benefit from a comparison against * the current uploaded set of constants. */ -static int prepare_curbe_buffer(struct brw_context *brw) +static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) { const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); + enum pipe_error ret; GLfloat *buf; GLuint i; @@ -267,17 +268,20 @@ static int prepare_curbe_buffer(struct brw_context *brw) (brw->curbe.need_new_bo || brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)) { - brw->sws->bo_unreference(brw->curbe.curbe_bo); - brw->curbe.curbe_bo = NULL; + bo_reference(&brw->curbe.curbe_bo, NULL); } if (brw->curbe.curbe_bo == NULL) { /* Allocate a single page for CURBE entries for this batchbuffer. * They're generally around 64b. */ - brw->curbe.curbe_bo = brw->sws->bo_alloc(brw->sws, - BRW_BUFFER_TYPE_CURBE, - 4096, 1 << 6); + ret = brw->sws->bo_alloc(brw->sws, + BRW_BUFFER_TYPE_CURBE, + 4096, 1 << 6, + &brw->curbe.curbe_bo); + if (ret) + return ret; + brw->curbe.curbe_next_offset = 0; } @@ -313,7 +317,7 @@ static int prepare_curbe_buffer(struct brw_context *brw) return 0; } -static int emit_curbe_buffer(struct brw_context *brw) +static enum pipe_error emit_curbe_buffer(struct brw_context *brw) { GLuint sz = brw->curbe.total_size; diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 6d6b1c7c5c..88cb31ad54 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -280,6 +280,5 @@ void brw_draw_cleanup( struct brw_context *brw ) u_upload_destroy( brw->vb.upload_vertex ); u_upload_destroy( brw->vb.upload_index ); - brw->sws->bo_unreference(brw->ib.bo); - brw->ib.bo = NULL; + bo_reference(&brw->ib.bo, NULL); } diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index 4fa7d549eb..188605a0c1 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -251,9 +251,8 @@ static int brw_prepare_vertices(struct brw_context *brw) brw->vb.vb[i].vertex_count = (vb->stride == 0 ? 1 : (bo->size - offset) / vb->stride); - brw->sws->bo_unreference(brw->vb.vb[i].bo); - brw->vb.vb[i].bo = bo; - brw->sws->bo_reference(brw->vb.vb[i].bo); + + bo_reference( &brw->vb.vb[i].bo, bo ); /* Don't need to retain this reference. We have a reference on * the underlying winsys buffer: @@ -417,6 +416,7 @@ const struct brw_tracked_state brw_vertices = { static int brw_prepare_indices(struct brw_context *brw) { struct pipe_buffer *index_buffer = brw->curr.index_buffer; + struct pipe_buffer *upload_buf = NULL; struct brw_winsys_buffer *bo = NULL; GLuint offset; GLuint index_size; @@ -438,7 +438,6 @@ static int brw_prepare_indices(struct brw_context *brw) /* Turn userbuffer into a proper hardware buffer? */ if (brw_buffer_is_user_buffer(index_buffer)) { - struct pipe_buffer *upload_buf; ret = u_upload_buffer( brw->vb.upload_index, 0, @@ -450,8 +449,6 @@ static int brw_prepare_indices(struct brw_context *brw) return ret; bo = brw_buffer(upload_buf)->bo; - brw->sws->bo_reference(bo); - pipe_buffer_reference( &upload_buf, NULL ); /* XXX: annotate the userbuffer with the upload information so * that successive calls don't get re-uploaded. @@ -459,8 +456,6 @@ static int brw_prepare_indices(struct brw_context *brw) } else { bo = brw_buffer(index_buffer)->bo; - brw->sws->bo_reference(bo); - ib_size = bo->size; offset = 0; } @@ -486,15 +481,12 @@ static int brw_prepare_indices(struct brw_context *brw) if (brw->ib.bo != bo || brw->ib.size != ib_size) { - brw->sws->bo_unreference(brw->ib.bo); - brw->ib.bo = bo; + bo_reference(&brw->ib.bo, bo); brw->ib.size = ib_size; brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; } - else { - brw->sws->bo_unreference(bo); - } + pipe_buffer_reference( &upload_buf, NULL ); brw_add_validated_bo(brw, brw->ib.bo); return 0; } diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c index de43b14512..a8fcb5f97e 100644 --- a/src/gallium/drivers/i965/brw_eu.c +++ b/src/gallium/drivers/i965/brw_eu.c @@ -118,16 +118,23 @@ void brw_init_compile( struct brw_context *brw, struct brw_compile *p ) } -const GLuint *brw_get_program( struct brw_compile *p, - GLuint *sz ) +enum pipe_error brw_get_program( struct brw_compile *p, + const GLuint **data, + GLuint *sz ) { GLuint i; for (i = 0; i < 8; i++) brw_NOP(p); + /* Is the generated program malformed for some reason? + */ + if (p->error) + return PIPE_ERROR_BAD_INPUT; + *sz = p->nr_insn * sizeof(struct brw_instruction); - return (const GLuint *)p->store; + *data = (const GLuint *)p->store; + return PIPE_OK; } diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h index 7bddc3859c..565f4ef1c5 100644 --- a/src/gallium/drivers/i965/brw_eu.h +++ b/src/gallium/drivers/i965/brw_eu.h @@ -34,6 +34,7 @@ #define BRW_EU_H #include "util/u_debug.h" +#include "pipe/p_error.h" #include "brw_structs.h" #include "brw_defines.h" @@ -132,6 +133,8 @@ struct brw_compile { struct brw_eu_label *first_label; /**< linked list of labels */ struct brw_eu_call *first_call; /**< linked list of CALs */ + + boolean error; }; @@ -772,7 +775,10 @@ void brw_set_predicate_control( struct brw_compile *p, GLuint pc ); void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional ); void brw_init_compile( struct brw_context *, struct brw_compile *p ); -const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz ); + +enum pipe_error brw_get_program( struct brw_compile *p, + const GLuint **program, + GLuint *sz ); /* Helpers for regular instructions: diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index 693d8bfdf8..ce77be24f6 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -40,10 +40,12 @@ -static void compile_gs_prog( struct brw_context *brw, - struct brw_gs_prog_key *key ) +static enum pipe_error compile_gs_prog( struct brw_context *brw, + struct brw_gs_prog_key *key, + struct brw_winsys_buffer **bo_out ) { struct brw_gs_compile c; + enum pipe_error ret; const GLuint *program; GLuint program_size; @@ -57,9 +59,9 @@ static void compile_gs_prog( struct brw_context *brw, c.nr_attrs = c.key.nr_attrs; if (BRW_IS_IGDNG(brw)) - c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ + c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else - c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ c.nr_bytes = c.nr_regs * REG_SIZE; @@ -93,40 +95,47 @@ static void compile_gs_prog( struct brw_context *brw, if (key->hint_gs_always) brw_gs_lines( &c ); else { - return; + return PIPE_OK; } break; case PIPE_PRIM_TRIANGLES: if (key->hint_gs_always) brw_gs_tris( &c ); else { - return; + return PIPE_OK; } break; case PIPE_PRIM_POINTS: if (key->hint_gs_always) brw_gs_points( &c ); else { - return; + return PIPE_OK; } - break; + break; default: - return; + assert(0); + return PIPE_ERROR_BAD_INPUT; } /* get the program */ - program = brw_get_program(&c.func, &program_size); + ret = brw_get_program(&c.func, &program, &program_size); + if (ret) + return ret; /* Upload */ - brw->sws->bo_unreference(brw->gs.prog_bo); - brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->gs.prog_data ); + ret = brw_upload_cache( &brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->gs.prog_data, + bo_out ); + if (ret) + return ret; + + return PIPE_OK; } static const unsigned gs_prim[PIPE_PRIM_MAX] = { @@ -166,6 +175,8 @@ static void populate_key( struct brw_context *brw, static int prepare_gs_prog(struct brw_context *brw) { struct brw_gs_prog_key key; + enum pipe_error ret; + /* Populate the key: */ populate_key(brw, &key); @@ -175,17 +186,21 @@ static int prepare_gs_prog(struct brw_context *brw) brw->gs.prog_active = key.need_gs_prog; } - if (brw->gs.prog_active) { - brw->sws->bo_unreference(brw->gs.prog_bo); - brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, - &key, sizeof(key), - NULL, 0, - &brw->gs.prog_data); - if (brw->gs.prog_bo == NULL) - compile_gs_prog( brw, &key ); - } + if (!brw->gs.prog_active) + return PIPE_OK; + + if (brw_search_cache(&brw->cache, BRW_GS_PROG, + &key, sizeof(key), + NULL, 0, + &brw->gs.prog_data, + &brw->gs.prog_bo)) + return PIPE_OK; + + ret = compile_gs_prog( brw, &key, &brw->gs.prog_bo ); + if (ret) + return ret; - return 0; + return PIPE_OK; } diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c index f27f886a65..18a66da538 100644 --- a/src/gallium/drivers/i965/brw_gs_state.c +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -69,11 +69,13 @@ gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key) key->urb_size = brw->urb.vsize; } -static struct brw_winsys_buffer * -gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) +static enum pipe_error +gs_unit_create_from_key(struct brw_context *brw, + struct brw_gs_unit_key *key, + struct brw_winsys_buffer **bo_out) { struct brw_gs_unit_state gs; - struct brw_winsys_buffer *bo; + enum pipe_error ret; memset(&gs, 0, sizeof(gs)); @@ -104,40 +106,46 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key) if (BRW_DEBUG & DEBUG_STATS) gs.thread4.stats_enable = 1; - bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT, - key, sizeof(*key), - &brw->gs.prog_bo, 1, - &gs, sizeof(gs), - NULL, NULL); + ret = brw_upload_cache(&brw->cache, BRW_GS_UNIT, + key, sizeof(*key), + &brw->gs.prog_bo, 1, + &gs, sizeof(gs), + NULL, NULL, + bo_out); + if (ret) + return ret; if (key->prog_active) { /* Emit GS program relocation */ - brw->sws->bo_emit_reloc(bo, + brw->sws->bo_emit_reloc(*bo_out, BRW_USAGE_STATE, gs.thread0.grf_reg_count << 1, offsetof(struct brw_gs_unit_state, thread0), brw->gs.prog_bo); } - return bo; + return PIPE_OK; } -static int prepare_gs_unit(struct brw_context *brw) +static enum pipe_error prepare_gs_unit(struct brw_context *brw) { struct brw_gs_unit_key key; + enum pipe_error ret; gs_unit_populate_key(brw, &key); - brw->sws->bo_unreference(brw->gs.state_bo); - brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT, - &key, sizeof(key), - &brw->gs.prog_bo, 1, - NULL); - if (brw->gs.state_bo == NULL) { - brw->gs.state_bo = gs_unit_create_from_key(brw, &key); - } + if (brw_search_cache(&brw->cache, BRW_GS_UNIT, + &key, sizeof(key), + &brw->gs.prog_bo, 1, + NULL, + &brw->gs.state_bo)) + return PIPE_OK; + + ret = gs_unit_create_from_key(brw, &key, &brw->gs.state_bo); + if (ret) + return ret; - return 0; + return PIPE_OK; } const struct brw_tracked_state brw_gs_unit = { diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c index 3370ebd262..6a01173787 100644 --- a/src/gallium/drivers/i965/brw_pipe_query.c +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -72,8 +72,7 @@ brw_query_get_result(struct pipe_context *pipe, } brw->sws->bo_unmap(query->bo); - brw->sws->bo_unreference(query->bo); - query->bo = NULL; + bo_reference(&query->bo, NULL); } *result = query->result; @@ -100,10 +99,9 @@ brw_query_create(struct pipe_context *pipe, unsigned type ) static void brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q) { - struct brw_context *brw = brw_context(pipe); struct brw_query_object *query = (struct brw_query_object *)q; - brw->sws->bo_unreference(query->bo); + bo_reference(&query->bo, NULL); FREE(query); } @@ -114,9 +112,8 @@ brw_query_begin(struct pipe_context *pipe, struct pipe_query *q) struct brw_query_object *query = (struct brw_query_object *)q; /* Reset our driver's tracking of query state. */ - brw->sws->bo_unreference(query->bo); + bo_reference(&query->bo, NULL); query->result = 0; - query->bo = NULL; query->first_index = -1; query->last_index = -1; @@ -139,8 +136,7 @@ brw_query_end(struct pipe_context *pipe, struct pipe_query *q) brw_emit_query_end(brw); brw_context_flush( brw ); - brw->sws->bo_unreference(brw->query.bo); - brw->query.bo = NULL; + bo_reference(&brw->query.bo, NULL); } remove_from_list(query); @@ -153,24 +149,30 @@ brw_query_end(struct pipe_context *pipe, struct pipe_query *q) */ /** Called to set up the query BO and account for its aperture space */ -void +enum pipe_error brw_prepare_query_begin(struct brw_context *brw) { + enum pipe_error ret; + /* Skip if we're not doing any queries. */ if (is_empty_list(&brw->query.active_head)) - return; + return PIPE_OK; /* Get a new query BO if we're going to need it. */ if (brw->query.bo == NULL || brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) { - brw->sws->bo_unreference(brw->query.bo); - brw->query.bo = NULL; - brw->query.bo = brw->sws->bo_alloc(brw->sws, BRW_BUFFER_TYPE_QUERY, 4096, 1); + ret = brw->sws->bo_alloc(brw->sws, BRW_BUFFER_TYPE_QUERY, 4096, 1, + &brw->query.bo); + if (ret) + return ret; + brw->query.index = 0; } brw_add_validated_bo(brw, brw->query.bo); + + return PIPE_OK; } /** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */ @@ -213,8 +215,7 @@ brw_emit_query_begin(struct brw_context *brw) FALSE, &tmp ); - brw->sws->bo_reference(brw->query.bo); - query->bo = brw->query.bo; + bo_reference( &query->bo, brw->query.bo ); query->first_index = brw->query.index; } query->last_index = brw->query.index; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 2833f2bce0..662c43c3e5 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -146,10 +146,9 @@ fail: static void brw_delete_fs_state( struct pipe_context *pipe, void *prog ) { - struct brw_context *brw = brw_context(pipe); struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog; - brw->sws->bo_unreference(fs->const_buffer); + bo_reference(&fs->const_buffer, NULL); FREE( (void *)fs->tokens ); FREE( fs ); } diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index 97e9a23688..73bba5b088 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -56,7 +56,7 @@ brw_pipe_vertex_cleanup( struct brw_context *brw ) */ #if 0 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - brw->sws->bo_unreference(brw->vb.inputs[i].bo); + bo_reference(&brw->vb.inputs[i].bo, NULL); brw->vb.inputs[i].bo = NULL; } #endif diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c index ba54740225..7ae386ffb3 100644 --- a/src/gallium/drivers/i965/brw_screen_buffers.c +++ b/src/gallium/drivers/i965/brw_screen_buffers.c @@ -43,15 +43,11 @@ brw_buffer_unmap( struct pipe_screen *screen, static void brw_buffer_destroy( struct pipe_buffer *buffer ) { - struct brw_screen *bscreen = brw_screen( buffer->screen ); - struct brw_winsys_screen *sws = bscreen->sws; struct brw_buffer *buf = brw_buffer( buffer ); assert(!p_atomic_read(&buffer->reference.count)); - if (buf->bo) - sws->bo_unreference(buf->bo); - + bo_reference(&buf->bo, NULL); FREE(buf); } @@ -66,6 +62,7 @@ brw_buffer_create(struct pipe_screen *screen, struct brw_winsys_screen *sws = bscreen->sws; struct brw_buffer *buf; unsigned buffer_type; + enum pipe_error ret; buf = CALLOC_STRUCT(brw_buffer); if (!buf) @@ -101,10 +98,11 @@ brw_buffer_create(struct pipe_screen *screen, break; } - buf->bo = sws->bo_alloc( sws, - buffer_type, - size, - alignment ); + ret = sws->bo_alloc( sws, buffer_type, + size, alignment, + &buf->bo ); + if (ret != PIPE_OK) + return NULL; return &buf->base; } diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c index 1c408e9f2e..21a7382873 100644 --- a/src/gallium/drivers/i965/brw_screen_surface.c +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -150,9 +150,7 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen, surface->pitch = tex->pitch; surface->tiling = tex->tiling; - surface->bo = tex->bo; - brw_screen->sws->bo_reference(surface->bo); - + bo_reference( &surface->bo, tex->bo ); pipe_texture_reference( &surface->base.texture, &tex->base ); surface->ss.ss0.surface_format = tex->ss.ss0.surface_format; @@ -244,11 +242,10 @@ static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen, static void brw_tex_surface_destroy( struct pipe_surface *surf ) { struct brw_surface *surface = brw_surface(surf); - struct brw_screen *screen = brw_screen(surf->texture->screen); /* Unreference texture, shared buffer: */ - screen->sws->bo_unreference(surface->bo); + bo_reference(&surface->bo, NULL); pipe_texture_reference( &surface->base.texture, NULL ); diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index ba6dc7dfde..355abf0b89 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -187,6 +187,7 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, struct brw_screen *bscreen = brw_screen(screen); struct brw_texture *tex; enum brw_buffer_type buffer_type; + enum pipe_error ret; tex = CALLOC_STRUCT(brw_texture); if (tex == NULL) @@ -235,10 +236,13 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, buffer_type = BRW_BUFFER_TYPE_TEXTURE; } - tex->bo = bscreen->sws->bo_alloc( bscreen->sws, - buffer_type, - tex->pitch * tex->total_height * tex->cpp, - 64 ); + ret = bscreen->sws->bo_alloc( bscreen->sws, + buffer_type, + tex->pitch * tex->total_height * tex->cpp, + 64, + &tex->bo ); + if (ret) + goto fail; tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); @@ -289,7 +293,7 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, return &tex->base; fail: - bscreen->sws->bo_unreference(tex->bo); + bo_reference(&tex->bo, NULL); FREE(tex); return NULL; } @@ -306,7 +310,8 @@ static struct pipe_texture *brw_texture_blanket(struct pipe_screen *screen, static void brw_texture_destroy(struct pipe_texture *pt) { - //bscreen->sws->bo_unreference(tex->bo); + struct brw_texture *tex = brw_texture(pt); + bo_reference(&tex->bo, NULL); FREE(pt); } diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index 013d839e37..24d1015bbd 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -40,9 +40,11 @@ #include "brw_sf.h" #include "brw_state.h" -static void compile_sf_prog( struct brw_context *brw, - struct brw_sf_prog_key *key ) +static enum pipe_error compile_sf_prog( struct brw_context *brw, + struct brw_sf_prog_key *key, + struct brw_winsys_buffer **bo_out ) { + enum pipe_error ret; struct brw_sf_compile c; const GLuint *program; GLuint program_size; @@ -87,28 +89,35 @@ static void compile_sf_prog( struct brw_context *brw, break; default: assert(0); - return; + return PIPE_ERROR_BAD_INPUT; } /* get the program */ - program = brw_get_program(&c.func, &program_size); + ret = brw_get_program(&c.func, &program, &program_size); + if (ret) + return ret; /* Upload */ - brw->sws->bo_unreference(brw->sf.prog_bo); - brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->sf.prog_data ); + ret = brw_upload_cache( &brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->sf.prog_data, + bo_out); + if (ret) + return ret; + + return PIPE_OK; } /* Calculate interpolants for triangle and line rasterization. */ -static int upload_sf_prog(struct brw_context *brw) +static enum pipe_error upload_sf_prog(struct brw_context *brw) { + enum pipe_error ret; struct brw_sf_prog_key key; memset(&key, 0, sizeof(key)); @@ -161,15 +170,18 @@ static int upload_sf_prog(struct brw_context *brw) PIPE_WINDING_CCW); } - brw->sws->bo_unreference(brw->sf.prog_bo); - brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG, - &key, sizeof(key), - NULL, 0, - &brw->sf.prog_data); - if (brw->sf.prog_bo == NULL) - compile_sf_prog( brw, &key ); + if (brw_search_cache(&brw->cache, BRW_SF_PROG, + &key, sizeof(key), + NULL, 0, + &brw->sf.prog_data, + &brw->sf.prog_bo)) + return PIPE_OK; - return 0; + ret = compile_sf_prog( brw, &key, &brw->sf.prog_bo ); + if (ret) + return ret; + + return PIPE_OK; } diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index 31343ff245..f030f26c19 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -39,11 +39,12 @@ #include "brw_debug.h" #include "brw_pipe_rast.h" -static int upload_sf_vp(struct brw_context *brw) +static enum pipe_error upload_sf_vp(struct brw_context *brw) { const struct pipe_viewport_state *vp = &brw->curr.vp; const struct pipe_scissor_state *scissor = &brw->curr.scissor; struct brw_sf_viewport sfv; + enum pipe_error ret; memset(&sfv, 0, sizeof(sfv)); @@ -61,10 +62,12 @@ static int upload_sf_vp(struct brw_context *brw) sfv.scissor.ymin = scissor->miny; sfv.scissor.ymax = scissor->maxy; /* -1 ?? */ - brw->sws->bo_unreference(brw->sf.vp_bo); - brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); + ret = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0, + &brw->sf.vp_bo ); + if (ret) + return ret; - return 0; + return PIPE_OK; } const struct brw_tracked_state brw_sf_vp = { @@ -128,12 +131,13 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) rast->point_size_max); } -static struct brw_winsys_buffer * +static enum pipe_error sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, - struct brw_winsys_buffer **reloc_bufs) + struct brw_winsys_buffer **reloc_bufs, + struct brw_winsys_buffer **bo_out) { struct brw_sf_unit_state sf; - struct brw_winsys_buffer *bo; + enum pipe_error ret; int chipset_max_threads; memset(&sf, 0, sizeof(sf)); @@ -273,51 +277,65 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.sf6.dest_org_hbias = 0x0; } - bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT, - key, sizeof(*key), - reloc_bufs, 2, - &sf, sizeof(sf), - NULL, NULL); + ret = brw_upload_cache(&brw->cache, BRW_SF_UNIT, + key, sizeof(*key), + reloc_bufs, 2, + &sf, sizeof(sf), + NULL, NULL, + bo_out); + if (ret) + return ret; /* STATE_PREFETCH command description describes this state as being * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. */ /* Emit SF program relocation */ - brw->sws->bo_emit_reloc(bo, - BRW_USAGE_STATE, - sf.thread0.grf_reg_count << 1, - offsetof(struct brw_sf_unit_state, thread0), - brw->sf.prog_bo); + ret = brw->sws->bo_emit_reloc(*bo_out, + BRW_USAGE_STATE, + sf.thread0.grf_reg_count << 1, + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_bo); + if (ret) + return ret; - /* Emit SF viewport relocation */ - brw->sws->bo_emit_reloc(bo, - BRW_USAGE_STATE, - sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), - offsetof(struct brw_sf_unit_state, sf5), - brw->sf.vp_bo); - return bo; + /* Emit SF viewport relocation */ + ret = brw->sws->bo_emit_reloc(*bo_out, + BRW_USAGE_STATE, + sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), + offsetof(struct brw_sf_unit_state, sf5), + brw->sf.vp_bo); + if (ret) + return ret; + + return PIPE_OK; } -static int upload_sf_unit( struct brw_context *brw ) +static enum pipe_error upload_sf_unit( struct brw_context *brw ) { struct brw_sf_unit_key key; struct brw_winsys_buffer *reloc_bufs[2]; + enum pipe_error ret; sf_unit_populate_key(brw, &key); reloc_bufs[0] = brw->sf.prog_bo; reloc_bufs[1] = brw->sf.vp_bo; - brw->sws->bo_unreference(brw->sf.state_bo); - brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT, - &key, sizeof(key), - reloc_bufs, 2, - NULL); - if (brw->sf.state_bo == NULL) { - brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs); - } - return 0; + if (brw_search_cache(&brw->cache, BRW_SF_UNIT, + &key, sizeof(key), + reloc_bufs, 2, + NULL, + &brw->sf.state_bo)) + return PIPE_OK; + + + ret = sf_unit_create_from_key(brw, &key, reloc_bufs, + &brw->sf.state_bo); + if (ret) + return ret; + + return PIPE_OK; } const struct brw_tracked_state brw_sf_unit = { diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index 94d2cb6f82..e219a1d870 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -44,8 +44,8 @@ brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo) assert(brw->state.validated_bo_count < Elements(brw->state.validated_bos)); if (bo != NULL) { - brw->sws->bo_reference(bo); - brw->state.validated_bos[brw->state.validated_bo_count++] = bo; + bo_reference( &brw->state.validated_bos[brw->state.validated_bo_count++], + bo ); } } @@ -106,37 +106,42 @@ void brw_destroy_state(struct brw_context *brw); /*********************************************************************** * brw_state_cache.c */ -struct brw_winsys_buffer *brw_cache_data(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs); - -struct brw_winsys_buffer *brw_cache_data_sz(struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *data, - GLuint data_size, - struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs); - -struct brw_winsys_buffer *brw_upload_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_sz, - struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs, - const void *data, - GLuint data_sz, - const void *aux, - void *aux_return ); - -struct brw_winsys_buffer *brw_search_cache( struct brw_cache *cache, - enum brw_cache_id cache_id, - const void *key, - GLuint key_size, - struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs, - void *aux_return); +enum pipe_error brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + struct brw_winsys_buffer **reloc_bufs, + GLuint nr_reloc_bufs, + struct brw_winsys_buffer **bo_out ); + +enum pipe_error brw_cache_data_sz(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + struct brw_winsys_buffer **reloc_bufs, + GLuint nr_reloc_bufs, + struct brw_winsys_buffer **bo_out); + +enum pipe_error brw_upload_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + struct brw_winsys_buffer **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_sz, + const void *aux, + void *aux_return , + struct brw_winsys_buffer **bo_out); + +boolean brw_search_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + struct brw_winsys_buffer **reloc_bufs, + GLuint nr_reloc_bufs, + void *aux_return, + struct brw_winsys_buffer **bo_out); + void brw_state_cache_check_size( struct brw_context *brw ); void brw_init_caches( struct brw_context *brw ); diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c index cbd1f02d77..f8369d31ec 100644 --- a/src/gallium/drivers/i965/brw_state_cache.c +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -109,9 +109,8 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, if (bo == cache->last_bo[cache_id]) return; /* no change */ - cache->sws->bo_unreference(cache->last_bo[cache_id]); - cache->last_bo[cache_id] = bo; - cache->sws->bo_reference(cache->last_bo[cache_id]); + bo_reference( &cache->last_bo[cache_id], bo ); + cache->brw->state.dirty.cache |= 1 << cache_id; } @@ -174,14 +173,15 @@ rehash(struct brw_cache *cache) /** * Returns the buffer object matching cache_id and key, or NULL. */ -struct brw_winsys_buffer * +boolean brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs, - void *aux_return) + void *aux_return, + struct brw_winsys_buffer **bo_out) { struct brw_cache_item *item; GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); @@ -189,20 +189,20 @@ brw_search_cache(struct brw_cache *cache, item = search_cache(cache, cache_id, hash, key, key_size, reloc_bufs, nr_reloc_bufs); - if (item == NULL) - return NULL; - - if (aux_return) - *(void **)aux_return = (void *)((char *)item->key + item->key_size); - - update_cache_last(cache, cache_id, item->bo); - - cache->sws->bo_reference(item->bo); - return item->bo; + if (item) { + if (aux_return) + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + + update_cache_last(cache, cache_id, item->bo); + bo_reference(bo_out, item->bo); + return TRUE; + } + + return FALSE; } -struct brw_winsys_buffer * +enum pipe_error brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, @@ -212,14 +212,15 @@ brw_upload_cache( struct brw_cache *cache, const void *data, GLuint data_size, const void *aux, - void *aux_return ) + void *aux_return, + struct brw_winsys_buffer **bo_out) { struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); GLuint relocs_size = nr_reloc_bufs * sizeof(struct brw_winsys_buffer *); GLuint aux_size = cache->aux_size[cache_id]; + enum pipe_error ret; void *tmp; - struct brw_winsys_buffer *bo; int i; /* Create the buffer object to contain the data. For now, use a @@ -227,9 +228,12 @@ brw_upload_cache( struct brw_cache *cache, * may want to take advantage of hardware distinctions between * these various entities. */ - bo = cache->sws->bo_alloc(cache->sws, - cache->buffer_type, - data_size, 1 << 6); + ret = cache->sws->bo_alloc(cache->sws, + cache->buffer_type, + data_size, 1 << 6, + bo_out); + if (ret) + return ret; /* Set up the memory containing the key, aux_data, and reloc_bufs */ @@ -240,7 +244,7 @@ brw_upload_cache( struct brw_cache *cache, memcpy((char *)tmp + key_size + aux_size, reloc_bufs, relocs_size); for (i = 0; i < nr_reloc_bufs; i++) { if (reloc_bufs[i] != NULL) - cache->sws->bo_reference(reloc_bufs[i]); + p_atomic_inc(&reloc_bufs[i]->reference.count); } item->cache_id = cache_id; @@ -249,9 +253,7 @@ brw_upload_cache( struct brw_cache *cache, item->key_size = key_size; item->reloc_bufs = (struct brw_winsys_buffer **)((char *)tmp + key_size + aux_size); item->nr_reloc_bufs = nr_reloc_bufs; - - item->bo = bo; - cache->sws->bo_reference(bo); + bo_reference( &item->bo, *bo_out ); item->data_size = data_size; if (cache->n_items > cache->size * 1.5) @@ -273,28 +275,28 @@ brw_upload_cache( struct brw_cache *cache, data_size, cache_id); /* Copy data to the buffer */ - cache->sws->bo_subdata(bo, + cache->sws->bo_subdata(item->bo, cache_id, 0, data_size, data); - update_cache_last(cache, cache_id, bo); + update_cache_last(cache, cache_id, item->bo); - return bo; + return PIPE_OK; } /** * This doesn't really work with aux data. Use search/upload instead */ -struct brw_winsys_buffer * +enum pipe_error brw_cache_data_sz(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, GLuint data_size, struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs) + GLuint nr_reloc_bufs, + struct brw_winsys_buffer **bo_out) { - struct brw_winsys_buffer *bo; struct brw_cache_item *item; GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs); @@ -302,17 +304,17 @@ brw_cache_data_sz(struct brw_cache *cache, reloc_bufs, nr_reloc_bufs); if (item) { update_cache_last(cache, cache_id, item->bo); - cache->sws->bo_reference(item->bo); - return item->bo; - } - bo = brw_upload_cache(cache, cache_id, - data, data_size, - reloc_bufs, nr_reloc_bufs, - data, data_size, - NULL, NULL); + bo_reference(bo_out, item->bo); + return PIPE_OK; + } - return bo; + return brw_upload_cache(cache, cache_id, + data, data_size, + reloc_bufs, nr_reloc_bufs, + data, data_size, + NULL, NULL, + bo_out); } @@ -323,15 +325,16 @@ brw_cache_data_sz(struct brw_cache *cache, * better to use, as the potentially changing offsets in the data-used-as-key * will result in excessive cache misses. */ -struct brw_winsys_buffer * +enum pipe_error brw_cache_data(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs) + GLuint nr_reloc_bufs, + struct brw_winsys_buffer **bo_out) { return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id], - reloc_bufs, nr_reloc_bufs); + reloc_bufs, nr_reloc_bufs, bo_out); } @@ -506,11 +509,13 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) int j; next = c->next; + for (j = 0; j < c->nr_reloc_bufs; j++) - brw->sws->bo_unreference(c->reloc_bufs[j]); - brw->sws->bo_unreference(c->bo); - free((void *)c->key); - free(c); + bo_reference(&c->reloc_bufs[j], NULL); + + bo_reference(&c->bo, NULL); + FREE((void *)c->key); + FREE(c); } cache->items[i] = NULL; } @@ -551,10 +556,12 @@ brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo) *prev = c->next; for (j = 0; j < c->nr_reloc_bufs; j++) - cache->sws->bo_unreference(c->reloc_bufs[j]); - cache->sws->bo_unreference(c->bo); - free((void *)c->key); - free(c); + bo_reference(&c->reloc_bufs[j], NULL); + + bo_reference(&c->bo, NULL); + + FREE((void *)c->key); + FREE(c); cache->n_items--; } else { prev = &c->next; @@ -590,10 +597,10 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache) brw_clear_cache(brw, cache); for (i = 0; i < BRW_MAX_CACHE; i++) { - brw->sws->bo_unreference(cache->last_bo[i]); - free(cache->name[i]); + bo_reference(&cache->last_bo[i], NULL); + FREE(cache->name[i]); } - free(cache->items); + FREE(cache->items); cache->items = NULL; cache->size = 0; } diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index a71af4d2b9..fdcdd59129 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -140,8 +140,7 @@ brw_clear_validated_bos(struct brw_context *brw) /* Clear the last round of validated bos */ for (i = 0; i < brw->state.validated_bo_count; i++) { - brw->sws->bo_unreference(brw->state.validated_bos[i]); - brw->state.validated_bos[i] = NULL; + bo_reference(&brw->state.validated_bos[i], NULL); } brw->state.validated_bo_count = 0; } diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index 26a28114d9..966940ceac 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -39,10 +39,12 @@ -static void do_vs_prog( struct brw_context *brw, - struct brw_vertex_shader *vp, - struct brw_vs_prog_key *key ) +static enum pipe_error do_vs_prog( struct brw_context *brw, + struct brw_vertex_shader *vp, + struct brw_vs_prog_key *key, + struct brw_winsys_buffer **bo_out) { + enum pipe_error ret; GLuint program_size; const GLuint *program; struct brw_vs_compile c; @@ -66,22 +68,29 @@ static void do_vs_prog( struct brw_context *brw, /* get the program */ - program = brw_get_program(&c.func, &program_size); - - brw->sws->bo_unreference(brw->vs.prog_bo); - brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG, - &c.key, sizeof(c.key), - NULL, 0, - program, program_size, - &c.prog_data, - &brw->vs.prog_data ); + ret = brw_get_program(&c.func, &program, &program_size); + if (ret) + return ret; + + ret = brw_upload_cache( &brw->cache, BRW_VS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->vs.prog_data, + bo_out); + if (ret) + return ret; + + return PIPE_OK; } -static int brw_upload_vs_prog(struct brw_context *brw) +static enum pipe_error brw_upload_vs_prog(struct brw_context *brw) { struct brw_vs_prog_key key; struct brw_vertex_shader *vp = brw->curr.vertex_shader; + enum pipe_error ret; memset(&key, 0, sizeof(key)); @@ -95,15 +104,18 @@ static int brw_upload_vs_prog(struct brw_context *brw) /* Make an early check for the key. */ - brw->sws->bo_unreference(brw->vs.prog_bo); - brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG, - &key, sizeof(key), - NULL, 0, - &brw->vs.prog_data); - if (brw->vs.prog_bo == NULL) - do_vs_prog(brw, vp, &key); - - return 0; + if (brw_search_cache(&brw->cache, BRW_VS_PROG, + &key, sizeof(key), + NULL, 0, + &brw->vs.prog_data, + &brw->vs.prog_bo)) + return PIPE_OK; + + ret = do_vs_prog(brw, vp, &key, &brw->vs.prog_bo); + if (ret) + return ret; + + return PIPE_OK; } diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c index 26d5d005fa..22a4d7f01b 100644 --- a/src/gallium/drivers/i965/brw_vs_state.c +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -78,11 +78,13 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) } } -static struct brw_winsys_buffer * -vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) +static enum pipe_error +vs_unit_create_from_key(struct brw_context *brw, + struct brw_vs_unit_key *key, + struct brw_winsys_buffer **bo_out) { + enum pipe_error ret; struct brw_vs_unit_state vs; - struct brw_winsys_buffer *bo; int chipset_max_threads; memset(&vs, 0, sizeof(vs)); @@ -141,38 +143,46 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) */ vs.vs6.vs_enable = 1; - bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT, - key, sizeof(*key), - &brw->vs.prog_bo, 1, - &vs, sizeof(vs), - NULL, NULL); + ret = brw_upload_cache(&brw->cache, BRW_VS_UNIT, + key, sizeof(*key), + &brw->vs.prog_bo, 1, + &vs, sizeof(vs), + NULL, NULL, + bo_out); + if (ret) + return ret; /* Emit VS program relocation */ - brw->sws->bo_emit_reloc(bo, - BRW_USAGE_STATE, - vs.thread0.grf_reg_count << 1, - offsetof(struct brw_vs_unit_state, thread0), - brw->vs.prog_bo); - - return bo; + ret = brw->sws->bo_emit_reloc(*bo_out, + BRW_USAGE_STATE, + vs.thread0.grf_reg_count << 1, + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_bo); + if (ret) + return ret; + + return PIPE_OK; } static int prepare_vs_unit(struct brw_context *brw) { struct brw_vs_unit_key key; + enum pipe_error ret; vs_unit_populate_key(brw, &key); - brw->sws->bo_unreference(brw->vs.state_bo); - brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT, - &key, sizeof(key), - &brw->vs.prog_bo, 1, - NULL); - if (brw->vs.state_bo == NULL) { - brw->vs.state_bo = vs_unit_create_from_key(brw, &key); - } + if (brw_search_cache(&brw->cache, BRW_VS_UNIT, + &key, sizeof(key), + &brw->vs.prog_bo, 1, + NULL, + &brw->vs.state_bo)) + return PIPE_OK; + + ret = vs_unit_create_from_key(brw, &key, &brw->vs.state_bo); + if (ret) + return ret; - return 0; + return PIPE_OK; } const struct brw_tracked_state brw_vs_unit = { diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c index 32fb9b2a8b..b12df0ec03 100644 --- a/src/gallium/drivers/i965/brw_vs_surface_state.c +++ b/src/gallium/drivers/i965/brw_vs_surface_state.c @@ -83,22 +83,23 @@ brw_update_vs_constant_surface( struct brw_context *brw, { struct brw_surface_key key; struct pipe_buffer *cb = brw->curr.vs_constants; + enum pipe_error ret; assert(surf == 0); /* If we're in this state update atom, we need to update VS constants, so * free the old buffer and create a new one for the new contents. */ - brw->sws->bo_unreference(vp->const_buffer); - vp->const_buffer = brw_vs_update_constant_buffer(brw); + ret = brw_vs_update_constant_buffer(brw, &vp->const_buffer); + if (ret) + return ret; /* If there's no constant buffer, then no surface BO is needed to point at * it. */ - if (vp->const_buffer == 0) { - drm_intel_bo_unreference(brw->vs.surf_bo[surf]); - brw->vs.surf_bo[surf] = NULL; - return; + if (vp->const_buffer == NULL) { + bo_reference(brw->vs.surf_bo[surf], NULL); + return PIPE_OK; } memset(&key, 0, sizeof(key)); @@ -118,15 +119,20 @@ brw_update_vs_constant_surface( struct brw_context *brw, key.width, key.height, key.depth, key.cpp, key.pitch); */ - drm_intel_bo_unreference(brw->vs.surf_bo[surf]); - brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->vs.surf_bo[surf] == NULL) { - brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); - } + if (brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL, + &brw->vs.surf_bo[surf])) + return PIPE_OK; + + ret = brw_create_constant_surface(brw, &key + &brw->vs.surf_bo[surf]); + if (ret) + return ret; + + return PIPE_OK; } #endif @@ -134,18 +140,20 @@ brw_update_vs_constant_surface( struct brw_context *brw, /** * Constructs the binding table for the VS surface state. */ -static struct brw_winsys_buffer * -brw_vs_get_binding_table(struct brw_context *brw) +static enum pipe_error +brw_vs_get_binding_table(struct brw_context *brw, + struct brw_winsys_buffer **bo_out) { #if 0 - struct brw_winsys_buffer *bind_bo; - - bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, BRW_VS_MAX_SURF, - NULL); - - if (bind_bo == NULL) { + if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + NULL, + bo_out)) + { + return PIPE_OK; + } + else { GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); uint32_t *data = malloc(data_size); int i; @@ -156,11 +164,14 @@ brw_vs_get_binding_table(struct brw_context *brw) else data[i] = 0; - bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, BRW_VS_MAX_SURF, - data, data_size, - NULL, NULL); + ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, BRW_VS_MAX_SURF, + data, data_size, + NULL, NULL, + bo_out); + if (ret) + return ret; /* Emit binding table relocations to surface state */ for (i = 0; i < BRW_VS_MAX_SURF; i++) { @@ -168,18 +179,19 @@ brw_vs_get_binding_table(struct brw_context *brw) /* The presumed offsets were set in the data values for * brw_upload_cache. */ - drm_intel_bo_emit_reloc(bind_bo, i * 4, - brw->vs.surf_bo[i], 0, - BRW_USAGE_STATE); + ret = sws->bo_emit_reloc(*bo_out, i * 4, + brw->vs.surf_bo[i], 0, + BRW_USAGE_STATE); + if (ret) + return ret; } } - free(data); + FREE(data); + return PIPE_OK; } - - return bind_bo; #else - return NULL; + return PIPE_OK; #endif } @@ -190,8 +202,10 @@ brw_vs_get_binding_table(struct brw_context *brw) * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and * CACHE_NEW_SURF_BIND for the binding table upload. */ -static int prepare_vs_surfaces(struct brw_context *brw ) +static enum pipe_error prepare_vs_surfaces(struct brw_context *brw ) { + enum pipe_error ret; + #if 0 int i; int nr_surfaces = 0; @@ -215,11 +229,12 @@ static int prepare_vs_surfaces(struct brw_context *brw ) * just slightly increases our working set size. */ if (brw->vs.nr_surfaces != 0) { - brw->sws->bo_unreference(brw->vs.bind_bo); - brw->vs.bind_bo = brw_vs_get_binding_table(brw); + ret = brw_vs_get_binding_table(brw, &brw->vs.bind_bo); + if (ret) + return ret; } - return 0; + return PIPE_OK; } const struct brw_tracked_state brw_vs_surfaces = { diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index d941fbcebe..f61c541ad1 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -28,6 +28,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_error.h" +#include "pipe/p_refcnt.h" struct brw_winsys; struct pipe_fence_handle; @@ -36,10 +37,13 @@ struct pipe_fence_handle; */ #define BRW_BATCH_SIZE (32*1024) +struct brw_winsys_screen; /* Need a tiny bit of information inside the abstract buffer struct: */ struct brw_winsys_buffer { + struct pipe_reference reference; + struct brw_winsys_screen *sws; unsigned *offset; unsigned size; }; @@ -105,6 +109,10 @@ enum brw_buffer_data_type { BRW_DATA_MAX }; + + + + struct brw_winsys_screen { @@ -116,33 +124,33 @@ struct brw_winsys_screen { /** * Create a buffer. */ - struct brw_winsys_buffer *(*bo_alloc)( struct brw_winsys_screen *sws, - enum brw_buffer_type type, - unsigned size, - unsigned alignment ); + enum pipe_error (*bo_alloc)( struct brw_winsys_screen *sws, + enum brw_buffer_type type, + unsigned size, + unsigned alignment, + struct brw_winsys_buffer **bo_out ); - /* Reference and unreference buffers: + /* Destroy a buffer when our refcount goes to zero: */ - void (*bo_reference)( struct brw_winsys_buffer *buffer ); - void (*bo_unreference)( struct brw_winsys_buffer *buffer ); + void (*bo_destroy)( struct brw_winsys_buffer *buffer ); /* delta -- added to b2->offset, and written into buffer * offset -- location above value is written to within buffer */ - int (*bo_emit_reloc)( struct brw_winsys_buffer *buffer, - enum brw_buffer_usage usage, - unsigned delta, - unsigned offset, - struct brw_winsys_buffer *b2); + enum pipe_error (*bo_emit_reloc)( struct brw_winsys_buffer *buffer, + enum brw_buffer_usage usage, + unsigned delta, + unsigned offset, + struct brw_winsys_buffer *b2); - int (*bo_exec)( struct brw_winsys_buffer *buffer, - unsigned bytes_used ); + enum pipe_error (*bo_exec)( struct brw_winsys_buffer *buffer, + unsigned bytes_used ); - int (*bo_subdata)(struct brw_winsys_buffer *buffer, - enum brw_buffer_data_type data_type, - size_t offset, - size_t size, - const void *data); + enum pipe_error (*bo_subdata)(struct brw_winsys_buffer *buffer, + enum brw_buffer_data_type data_type, + size_t offset, + size_t size, + const void *data); boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer); boolean (*bo_references)(struct brw_winsys_buffer *a, @@ -175,6 +183,16 @@ struct brw_winsys_screen { }; +static INLINE void +bo_reference(struct brw_winsys_buffer **ptr, struct brw_winsys_buffer *buf) +{ + struct brw_winsys_buffer *old_buf = *ptr; + + if (pipe_reference((struct pipe_reference **)ptr, &buf->reference)) + old_buf->sws->bo_destroy(old_buf); +} + + /** * Create brw pipe_screen. */ diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 815ae8c51a..93f90bf329 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -137,30 +137,26 @@ brw_wm_linear_shader_emit(struct brw_context *brw, struct brw_wm_compile *c) * Depending on the instructions used (i.e. flow control instructions) * we'll use one of two code generators. */ -static int do_wm_prog( struct brw_context *brw, - struct brw_fragment_shader *fp, - struct brw_wm_prog_key *key) +static enum pipe_error do_wm_prog( struct brw_context *brw, + struct brw_fragment_shader *fp, + struct brw_wm_prog_key *key, + struct brw_winsys_buffer **bo_out) { + enum pipe_error ret; struct brw_wm_compile *c; const GLuint *program; GLuint program_size; - c = brw->wm.compile_data; - if (c == NULL) { - brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data)); - c = brw->wm.compile_data; - if (c == NULL) { - /* Ouch - big out of memory problem. Can't continue - * without triggering a segfault, no way to signal, - * so just return. - */ + if (brw->wm.compile_data == NULL) { + brw->wm.compile_data = MALLOC(sizeof(*brw->wm.compile_data)); + if (!brw->wm.compile_data) return PIPE_ERROR_OUT_OF_MEMORY; - } - } else { - memset(c, 0, sizeof(*brw->wm.compile_data)); } - memcpy(&c->key, key, sizeof(*key)); + c = brw->wm.compile_data; + memset(c, 0, sizeof *c); + + c->key = *key; c->fp = fp; c->env_param = NULL; /*brw->intel.ctx.FragmentProgram.Parameters;*/ @@ -190,17 +186,21 @@ static int do_wm_prog( struct brw_context *brw, /* get the program */ - program = brw_get_program(&c->func, &program_size); - - brw->sws->bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG, - &c->key, sizeof(c->key), - NULL, 0, - program, program_size, - &c->prog_data, - &brw->wm.prog_data ); - - return 0; + ret = brw_get_program(&c->func, &program, &program_size); + if (ret) + return ret; + + ret = brw_upload_cache( &brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + NULL, 0, + program, program_size, + &c->prog_data, + &brw->wm.prog_data, + bo_out ); + if (ret) + return ret; + + return PIPE_OK; } @@ -267,24 +267,28 @@ static void brw_wm_populate_key( struct brw_context *brw, } -static int brw_prepare_wm_prog(struct brw_context *brw) +static enum pipe_error brw_prepare_wm_prog(struct brw_context *brw) { struct brw_wm_prog_key key; struct brw_fragment_shader *fs = brw->curr.fragment_shader; + enum pipe_error ret; brw_wm_populate_key(brw, &key); /* Make an early check for the key. */ - brw->sws->bo_unreference(brw->wm.prog_bo); - brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, - &key, sizeof(key), - NULL, 0, - &brw->wm.prog_data); - if (brw->wm.prog_bo == NULL) - return do_wm_prog(brw, fs, &key); - - return 0; + if (brw_search_cache(&brw->cache, BRW_WM_PROG, + &key, sizeof(key), + NULL, 0, + &brw->wm.prog_data, + &brw->wm.prog_bo)) + return PIPE_OK; + + ret = do_wm_prog(brw, fs, &key, &brw->wm.prog_bo); + if (ret) + return ret; + + return PIPE_OK; } diff --git a/src/gallium/drivers/i965/brw_wm_constant_buffer.c b/src/gallium/drivers/i965/brw_wm_constant_buffer.c index 50ecef29a4..14568265dd 100644 --- a/src/gallium/drivers/i965/brw_wm_constant_buffer.c +++ b/src/gallium/drivers/i965/brw_wm_constant_buffer.c @@ -6,12 +6,14 @@ * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ -struct brw_winsys_buffer * +enum pipe_error brw_create_constant_surface( struct brw_context *brw, - struct brw_surface_key *key ) + struct brw_surface_key *key, + struct brw_winsys_buffer **bo_out ) { const GLint w = key->width - 1; struct brw_winsys_buffer *bo; + enum pipe_error ret; memset(&surf, 0, sizeof(surf)); @@ -28,22 +30,27 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ - bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, - key, sizeof(*key), - &key->bo, key->bo ? 1 : 0, - &surf, sizeof(surf), - NULL, NULL); + ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, key->bo ? 1 : 0, + &surf, sizeof(surf), + NULL, NULL, + &bo_out); + if (ret) + return ret; if (key->bo) { /* Emit relocation to surface contents */ - brw->sws->bo_emit_reloc(bo, - BRW_USAGE_SAMPLER, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); + ret = brw->sws->bo_emit_reloc(*bo_out, + BRW_USAGE_SAMPLER, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + if (ret) + return ret; } - return bo; + return PIPE_OK; } @@ -52,7 +59,7 @@ brw_create_constant_surface( struct brw_context *brw, * Update the surface state for a WM constant buffer. * The constant buffer will be (re)allocated here if needed. */ -static void +static enum pipe_error brw_update_wm_constant_surface( struct brw_context *brw, GLuint surf) { @@ -60,20 +67,21 @@ brw_update_wm_constant_surface( struct brw_context *brw, struct brw_fragment_shader *fp = brw->curr.fragment_shader; struct pipe_buffer *cbuf = brw->curr.fragment_constants; int pitch = cbuf->size / (4 * sizeof(float)); + enum pipe_error ret; /* If we're in this state update atom, we need to update WM constants, so * free the old buffer and create a new one for the new contents. */ - brw->sws->bo_unreference(fp->const_buffer); - fp->const_buffer = brw_wm_update_constant_buffer(brw); + ret = brw_wm_update_constant_buffer(brw, &fp->const_buffer); + if (ret) + return ret; /* If there's no constant buffer, then no surface BO is needed to point at * it. */ if (cbuf == NULL) { - drm_intel_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = NULL; - return; + bo_reference(&brw->wm.surf_bo[surf], NULL); + return PIPE_OK; } memset(&key, 0, sizeof(key)); @@ -97,16 +105,20 @@ brw_update_wm_constant_surface( struct brw_context *brw, key.width, key.height, key.depth, key.cpp, key.pitch); */ - brw->sws->bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, 1, - NULL); - if (brw->wm.surf_bo[surf] == NULL) { - brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key); - } + if (brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, 1, + NULL, + &brw->wm.surf_bo[surf])) + return PIPE_OK; + + ret = brw_create_constant_surface(brw, &key, &brw->wm.surf_bo[surf]); + if (ret) + return ret; + brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + return PIPE_OK; } /** @@ -117,28 +129,33 @@ brw_update_wm_constant_surface( struct brw_context *brw, * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for * inclusion in the binding table. */ -static void prepare_wm_constant_surface(struct brw_context *brw ) +static enum pipe_error prepare_wm_constant_surface(struct brw_context *brw ) { struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; - drm_intel_bo_unreference(fp->const_buffer); - fp->const_buffer = brw_wm_update_constant_buffer(brw); + ret = brw_wm_update_constant_buffer(brw, + &fp->const_buffer); + if (ret) + return ret; /* If there's no constant buffer, then no surface BO is needed to point at * it. */ if (fp->const_buffer == 0) { if (brw->wm.surf_bo[surf] != NULL) { - drm_intel_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = NULL; + bo_reference(&brw->wm.surf_bo[surf], NULL); brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } - return; + return PIPE_OK; } - brw_update_wm_constant_surface(ctx, surf); + ret = brw_update_wm_constant_surface(ctx, surf); + if (ret) + return ret; + + return PIPE_OK } const struct brw_tracked_state brw_wm_constant_surface = { diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c index 2fddb4ad89..2861aa979f 100644 --- a/src/gallium/drivers/i965/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c @@ -43,16 +43,22 @@ -static struct brw_winsys_buffer * +static enum pipe_error upload_default_color( struct brw_context *brw, - const GLfloat *color ) + const GLfloat *color, + struct brw_winsys_buffer **bo_out ) { struct brw_sampler_default_color sdc; + enum pipe_error ret; COPY_4V(sdc.color, color); - return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc, - NULL, 0 ); + ret = brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc, + NULL, 0, bo_out ); + if (ret) + return ret; + + return PIPE_OK; } @@ -111,9 +117,10 @@ brw_wm_sampler_populate_key(struct brw_context *brw, } -static void +static enum pipe_error brw_wm_sampler_update_default_colors(struct brw_context *brw) { + enum pipe_error ret; int nr = MIN2(brw->curr.num_textures, brw->curr.num_samplers); int i; @@ -121,8 +128,7 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw) for (i = 0; i < nr; i++) { const struct brw_texture *tex = brw_texture(brw->curr.texture[i]); const struct brw_sampler *sampler = brw->curr.sampler[i]; - - brw->sws->bo_unreference(brw->wm.sdc_bo[i]); + const float *bc; if (pf_is_depth_or_stencil(tex->base.format)) { float bordercolor[4] = { @@ -131,15 +137,25 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw) sampler->border_color[0], sampler->border_color[0] }; - /* GL specs that border color for depth textures is taken from the - * R channel, while the hardware uses A. Spam R into all the - * channels for safety. - */ - brw->wm.sdc_bo[i] = upload_default_color(brw, bordercolor); - } else { - brw->wm.sdc_bo[i] = upload_default_color(brw, sampler->border_color); + + bc = bordercolor; + } + else { + bc = sampler->border_color; } + + /* GL specs that border color for depth textures is taken from the + * R channel, while the hardware uses A. Spam R into all the + * channels for safety. + */ + ret = upload_default_color(brw, + bc, + &brw->wm.sdc_bo[i]); + if (ret) + return ret; } + + return PIPE_OK; } @@ -149,6 +165,7 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw) static int upload_wm_samplers( struct brw_context *brw ) { struct wm_sampler_key key; + enum pipe_error ret; int i; brw_wm_sampler_update_default_colors(brw); @@ -159,35 +176,40 @@ static int upload_wm_samplers( struct brw_context *brw ) brw->state.dirty.cache |= CACHE_NEW_SAMPLER; } - brw->sws->bo_unreference(brw->wm.sampler_bo); - brw->wm.sampler_bo = NULL; - if (brw->wm.sampler_count == 0) - return 0; + if (brw->wm.sampler_count == 0) { + bo_reference(&brw->wm.sampler_bo, NULL); + return PIPE_OK; + } - brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER, - &key, sizeof(key), - brw->wm.sdc_bo, key.sampler_count, - NULL); + if (brw_search_cache(&brw->cache, BRW_SAMPLER, + &key, sizeof(key), + brw->wm.sdc_bo, key.sampler_count, + NULL, + &brw->wm.sampler_bo)) + return PIPE_OK; /* If we didnt find it in the cache, compute the state and put it in the * cache. */ - if (brw->wm.sampler_bo == NULL) { - brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER, - &key, sizeof(key), - brw->wm.sdc_bo, key.sampler_count, - &key.sampler, sizeof(key.sampler), - NULL, NULL); - - /* Emit SDC relocations */ - for (i = 0; i < key.sampler_count; i++) { - brw->sws->bo_emit_reloc(brw->wm.sampler_bo, - BRW_USAGE_SAMPLER, - 0, - i * sizeof(struct brw_sampler_state) + - offsetof(struct brw_sampler_state, ss2), - brw->wm.sdc_bo[i]); - } + ret = brw_upload_cache(&brw->cache, BRW_SAMPLER, + &key, sizeof(key), + brw->wm.sdc_bo, key.sampler_count, + &key.sampler, sizeof(key.sampler), + NULL, NULL, + &brw->wm.sampler_bo); + if (ret) + return ret; + + /* Emit SDC relocations */ + for (i = 0; i < key.sampler_count; i++) { + ret = brw->sws->bo_emit_reloc(brw->wm.sampler_bo, + BRW_USAGE_SAMPLER, + 0, + i * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + brw->wm.sdc_bo[i]); + if (ret) + return ret; } return 0; diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index ccbb647bcd..86dc10540d 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -138,12 +138,13 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) /** * Setup wm hardware state. See page 225 of Volume 2 */ -static struct brw_winsys_buffer * +static enum pipe_error wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, - struct brw_winsys_buffer **reloc_bufs) + struct brw_winsys_buffer **reloc_bufs, + struct brw_winsys_buffer **bo_out) { struct brw_wm_unit_state wm; - struct brw_winsys_buffer *bo; + enum pipe_error ret; memset(&wm, 0, sizeof(wm)); @@ -222,45 +223,56 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, if (BRW_DEBUG & DEBUG_STATS || key->stats_wm) wm.wm4.stats_enable = 1; - bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, - key, sizeof(*key), - reloc_bufs, 3, - &wm, sizeof(wm), - NULL, NULL); + ret = brw_upload_cache(&brw->cache, BRW_WM_UNIT, + key, sizeof(*key), + reloc_bufs, 3, + &wm, sizeof(wm), + NULL, NULL, + bo_out); + if (ret) + return ret; /* Emit WM program relocation */ - brw->sws->bo_emit_reloc(bo, - BRW_USAGE_STATE, - wm.thread0.grf_reg_count << 1, - offsetof(struct brw_wm_unit_state, thread0), - brw->wm.prog_bo); + ret = brw->sws->bo_emit_reloc(*bo_out, + BRW_USAGE_STATE, + wm.thread0.grf_reg_count << 1, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); + if (ret) + return ret; /* Emit scratch space relocation */ if (key->total_scratch != 0) { - brw->sws->bo_emit_reloc(bo, - BRW_USAGE_SCRATCH, - wm.thread2.per_thread_scratch_space, - offsetof(struct brw_wm_unit_state, thread2), - brw->wm.scratch_bo); + ret = brw->sws->bo_emit_reloc(*bo_out, + BRW_USAGE_SCRATCH, + wm.thread2.per_thread_scratch_space, + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.scratch_bo); + if (ret) + return ret; } /* Emit sampler state relocation */ if (key->sampler_count != 0) { - brw->sws->bo_emit_reloc(bo, - BRW_USAGE_STATE, - wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), - offsetof(struct brw_wm_unit_state, wm4), - brw->wm.sampler_bo); + ret = brw->sws->bo_emit_reloc(*bo_out, + BRW_USAGE_STATE, + wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); + if (ret) + return ret; } - return bo; + return PIPE_OK; } -static int upload_wm_unit( struct brw_context *brw ) +static enum pipe_error upload_wm_unit( struct brw_context *brw ) { struct brw_wm_unit_key key; struct brw_winsys_buffer *reloc_bufs[3]; + enum pipe_error ret; + wm_unit_populate_key(brw, &key); /* Allocate the necessary scratch space if we haven't already. Don't @@ -271,15 +283,19 @@ static int upload_wm_unit( struct brw_context *brw ) if (key.total_scratch) { GLuint total = key.total_scratch * key.max_threads; - if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) { - brw->sws->bo_unreference(brw->wm.scratch_bo); - brw->wm.scratch_bo = NULL; - } + /* Do we need a new buffer: + */ + if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) + bo_reference(&brw->wm.scratch_bo, NULL); + if (brw->wm.scratch_bo == NULL) { - brw->wm.scratch_bo = brw->sws->bo_alloc(brw->sws, - BRW_BUFFER_TYPE_SHADER_SCRATCH, - total, - 4096); + ret = brw->sws->bo_alloc(brw->sws, + BRW_BUFFER_TYPE_SHADER_SCRATCH, + total, + 4096, + &brw->wm.scratch_bo); + if (ret) + return ret; } } @@ -287,16 +303,19 @@ static int upload_wm_unit( struct brw_context *brw ) reloc_bufs[1] = brw->wm.scratch_bo; reloc_bufs[2] = brw->wm.sampler_bo; - brw->sws->bo_unreference(brw->wm.state_bo); - brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT, - &key, sizeof(key), - reloc_bufs, 3, - NULL); - if (brw->wm.state_bo == NULL) { - brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs); - } + if (brw_search_cache(&brw->cache, BRW_WM_UNIT, + &key, sizeof(key), + reloc_bufs, 3, + NULL, + &brw->wm.state_bo)) + return PIPE_OK; + + ret = wm_unit_create_from_key(brw, &key, reloc_bufs, + &brw->wm.state_bo); + if (ret) + return ret; - return 0; + return PIPE_OK; } const struct brw_tracked_state brw_wm_unit = { diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index b055dde20c..e5d0329967 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -40,31 +40,40 @@ -static void +static enum pipe_error brw_update_texture_surface( struct brw_context *brw, struct brw_texture *tex, - GLuint surf ) + struct brw_winsys_buffer **bo_out) { - brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &tex->ss, sizeof tex->ss, - &tex->bo, 1, - NULL); - - if (brw->wm.surf_bo[surf] == NULL) { - brw->wm.surf_bo[surf] = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, - &tex->ss, sizeof tex->ss, - &tex->bo, 1, - &tex->ss, sizeof tex->ss, - NULL, NULL); + enum pipe_error ret; + + if (brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &tex->ss, sizeof tex->ss, + &tex->bo, 1, + NULL, + bo_out)) + return PIPE_OK; + + ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, + &tex->ss, sizeof tex->ss, + &tex->bo, 1, + &tex->ss, sizeof tex->ss, + NULL, NULL, + bo_out); + if (ret) + return ret; - /* Emit relocation to surface contents */ - brw->sws->bo_emit_reloc(brw->wm.surf_bo[surf], - BRW_USAGE_SAMPLER, - 0, - offsetof(struct brw_surface_state, ss1), - tex->bo); - } + /* Emit relocation to surface contents */ + ret = brw->sws->bo_emit_reloc(*bo_out, + BRW_USAGE_SAMPLER, + 0, + offsetof(struct brw_surface_state, ss1), + tex->bo); + if (ret) + return ret; + + return PIPE_OK; } @@ -79,13 +88,14 @@ brw_update_texture_surface( struct brw_context *brw, * While it is only used for the front/back buffer currently, it should be * usable for further buffers when doing ARB_draw_buffer support. */ -static void -brw_update_renderbuffer_surface(struct brw_context *brw, - struct brw_surface *surface, - unsigned int unit) +static enum pipe_error +brw_update_render_surface(struct brw_context *brw, + struct brw_surface *surface, + struct brw_winsys_buffer **bo_out) { struct brw_surf_ss0 blend_ss0 = brw->curr.blend->ss0; struct brw_surface_state ss; + enum pipe_error ret; /* Surfaces are potentially shared between contexts, so can't * scribble the in-place ss0 value in the surface. @@ -98,30 +108,35 @@ brw_update_renderbuffer_surface(struct brw_context *brw, ss.ss0.writedisable_red = blend_ss0.writedisable_red; ss.ss0.writedisable_alpha = blend_ss0.writedisable_alpha; - brw->sws->bo_unreference(brw->wm.surf_bo[unit]); - brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &ss, sizeof(ss), - &surface->bo, 1, - NULL); - - if (brw->wm.surf_bo[unit] == NULL) { - - brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache, - BRW_SS_SURFACE, - &ss, sizeof ss, - &surface->bo, 1, - &ss, sizeof ss, - NULL, NULL); + if (brw_search_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &ss, sizeof(ss), + &surface->bo, 1, + NULL, + bo_out)) + return PIPE_OK; + + ret = brw_upload_cache(&brw->surface_cache, + BRW_SS_SURFACE, + &ss, sizeof ss, + &surface->bo, 1, + &ss, sizeof ss, + NULL, NULL, + bo_out); + if (ret) + return ret; /* XXX: we will only be rendering to this surface: */ - brw->sws->bo_emit_reloc(brw->wm.surf_bo[unit], - BRW_USAGE_RENDER_TARGET, - ss.ss1.base_addr - surface->bo->offset[0], /* XXX */ - offsetof(struct brw_surface_state, ss1), - surface->bo); - } + ret = brw->sws->bo_emit_reloc(*bo_out, + BRW_USAGE_RENDER_TARGET, + ss.ss1.base_addr - surface->bo->offset[0], /* XXX */ + offsetof(struct brw_surface_state, ss1), + surface->bo); + if (ret) + return ret; + + return PIPE_OK; } @@ -129,60 +144,60 @@ brw_update_renderbuffer_surface(struct brw_context *brw, * Constructs the binding table for the WM surface state, which maps unit * numbers to surface state objects. */ -static struct brw_winsys_buffer * -brw_wm_get_binding_table(struct brw_context *brw) +static enum pipe_error +brw_wm_get_binding_table(struct brw_context *brw, + struct brw_winsys_buffer **bo_out ) { - struct brw_winsys_buffer *bind_bo; + enum pipe_error ret; + uint32_t data[BRW_WM_MAX_SURF]; + GLuint data_size = brw->wm.nr_surfaces * sizeof data[0]; + int i; assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); + assert(brw->wm.nr_surfaces > 0); /* Note there is no key for this search beyond the values in the * relocation array: */ - bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->wm.surf_bo, brw->wm.nr_surfaces, - NULL); - - if (bind_bo == NULL) { - uint32_t data[BRW_WM_MAX_SURF]; - GLuint data_size = brw->wm.nr_surfaces * sizeof data[0]; - int i; - - for (i = 0; i < brw->wm.nr_surfaces; i++) - data[i] = brw->wm.surf_bo[i]->offset[0]; - - bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->wm.surf_bo, brw->wm.nr_surfaces, - data, data_size, - NULL, NULL); - - /* Emit binding table relocations to surface state */ - for (i = 0; i < brw->wm.nr_surfaces; i++) { - brw->sws->bo_emit_reloc(bind_bo, - BRW_USAGE_STATE, - 0, - i * sizeof(GLuint), - brw->wm.surf_bo[i]); - } + if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->wm.surf_bo, + brw->wm.nr_surfaces, + NULL, + bo_out)) + return PIPE_OK; + + for (i = 0; i < brw->wm.nr_surfaces; i++) + data[i] = brw->wm.surf_bo[i]->offset[0]; + + ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->wm.surf_bo, brw->wm.nr_surfaces, + data, data_size, + NULL, NULL, + bo_out); + if (ret) + return ret; + + /* Emit binding table relocations to surface state */ + for (i = 0; i < brw->wm.nr_surfaces; i++) { + ret = brw->sws->bo_emit_reloc(*bo_out, + BRW_USAGE_STATE, + 0, + i * sizeof(GLuint), + brw->wm.surf_bo[i]); + if (ret) + return ret; } - return bind_bo; + return PIPE_OK; } -static int prepare_wm_surfaces(struct brw_context *brw ) +static enum pipe_error prepare_wm_surfaces(struct brw_context *brw ) { - GLuint i; + enum pipe_error ret; int nr_surfaces = 0; - - /* Unreference old buffers - */ - for (i = 0; i < brw->wm.nr_surfaces; i++) { - brw->sws->bo_unreference(brw->wm.surf_bo[i]); - brw->wm.surf_bo[i] = NULL; - } - + GLuint i; /* PIPE_NEW_COLOR_BUFFERS | PIPE_NEW_BLEND * @@ -192,38 +207,51 @@ static int prepare_wm_surfaces(struct brw_context *brw ) * XXX: no color buffer case */ for (i = 0; i < brw->curr.fb.nr_cbufs; i++) { - brw_update_renderbuffer_surface(brw, - brw_surface(brw->curr.fb.cbufs[i]), - nr_surfaces++); + ret = brw_update_render_surface(brw, + brw_surface(brw->curr.fb.cbufs[i]), + &brw->wm.surf_bo[nr_surfaces++]); + if (ret) + return ret; } /* PIPE_NEW_TEXTURE */ for (i = 0; i < brw->curr.num_textures; i++) { - brw_update_texture_surface(brw, - brw_texture(brw->curr.texture[i]), - nr_surfaces++); + ret = brw_update_texture_surface(brw, + brw_texture(brw->curr.texture[i]), + &brw->wm.surf_bo[nr_surfaces++]); + if (ret) + return ret; } /* PIPE_NEW_FRAGMENT_CONSTANTS */ #if 0 if (brw->curr.fragment_constants) { - brw_update_fragment_constant_surface(brw, - brw->curr.fragment_constants, - nr_surfaces++); + ret = brw_update_fragment_constant_surface(brw, + brw->curr.fragment_constants, + &brw->wm.surf_bo[nr_surfaces++]); + if (ret) + return ret; } #endif if (brw->wm.nr_surfaces != nr_surfaces) { + + /* Unreference any left-over old buffers + */ + for (i = nr_surfaces; i < brw->wm.nr_surfaces; i++) + bo_reference(&brw->wm.surf_bo[i], NULL); + brw->wm.nr_surfaces = nr_surfaces; brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; } - brw->sws->bo_unreference(brw->wm.bind_bo); - brw->wm.bind_bo = brw_wm_get_binding_table(brw); + ret = brw_wm_get_binding_table(brw, &brw->wm.bind_bo); + if (ret) + return ret; - return 0; + return PIPE_OK; } const struct brw_tracked_state brw_wm_surfaces = { diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c index b1edca818a..fc465d7c14 100644 --- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c +++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c @@ -134,11 +134,12 @@ const char *data_types[BRW_DATA_MAX] = }; -static struct brw_winsys_buffer * +static enum pipe_error xlib_brw_bo_alloc( struct brw_winsys_screen *sws, - enum brw_buffer_type type, - unsigned size, - unsigned alignment ) + enum brw_buffer_type type, + unsigned size, + unsigned alignment, + struct brw_winsys_buffer **bo_out ) { struct xlib_brw_winsys *xbw = xlib_brw_winsys(sws); struct xlib_brw_buffer *buf; @@ -148,12 +149,13 @@ xlib_brw_bo_alloc( struct brw_winsys_screen *sws, buf = CALLOC_STRUCT(xlib_brw_buffer); if (!buf) - return NULL; + return PIPE_ERROR_OUT_OF_MEMORY; + + pipe_reference_init(&buf->base.reference, 1); buf->offset = align(xbw->offset, alignment); buf->type = type; buf->virtual = MALLOC(size); - buf->cheesy_refcount = 1; buf->base.offset = &buf->offset; /* hmm, cheesy */ buf->base.size = size; @@ -161,36 +163,25 @@ xlib_brw_bo_alloc( struct brw_winsys_screen *sws, if (xbw->offset > MAX_VRAM) goto err; - return &buf->base; + /* XXX: possibly rentrant call to bo_destroy: + */ + bo_reference(bo_out, &buf->base); + return PIPE_OK; err: assert(0); + FREE(buf->virtual); FREE(buf); - return NULL; -} - -static void -xlib_brw_bo_reference( struct brw_winsys_buffer *buffer ) -{ - struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); - - buf->cheesy_refcount++; + return PIPE_ERROR_OUT_OF_MEMORY; } static void -xlib_brw_bo_unreference( struct brw_winsys_buffer *buffer ) +xlib_brw_bo_destroy( struct brw_winsys_buffer *buffer ) { struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); - /* As a special favor in this call only, buffer is allowed to be - * NULL: - */ - if (buffer == NULL) - return; - - if (--buf->cheesy_refcount == 0) { - FREE(buffer); - } + FREE(buf->virtual); + FREE(buf); } static int @@ -378,8 +369,7 @@ xlib_create_brw_winsys_screen( void ) ws->base.destroy = xlib_brw_winsys_destroy; ws->base.bo_alloc = xlib_brw_bo_alloc; - ws->base.bo_reference = xlib_brw_bo_reference; - ws->base.bo_unreference = xlib_brw_bo_unreference; + ws->base.bo_destroy = xlib_brw_bo_destroy; ws->base.bo_emit_reloc = xlib_brw_bo_emit_reloc; ws->base.bo_exec = xlib_brw_bo_exec; ws->base.bo_subdata = xlib_brw_bo_subdata; -- cgit v1.2.3 From 203adb8ea68da0fbb2e4643e36e273f31c29980f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 15:15:04 +0000 Subject: i965g: remove old dumping code --- src/gallium/drivers/i965/Makefile | 1 - src/gallium/drivers/i965/SConscript | 1 - src/gallium/drivers/i965/brw_state_dump.c | 230 ------------------------------ 3 files changed, 232 deletions(-) delete mode 100644 src/gallium/drivers/i965/brw_state_dump.c (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 6c0d3541d7..f0a5bc7ee5 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -41,7 +41,6 @@ C_SOURCES = \ brw_sf_state.c \ brw_state_batch.c \ brw_state_cache.c \ - brw_state_dump.c \ brw_state_upload.c \ brw_structs_dump.c \ brw_swtnl.c \ diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript index d38ad6fe7e..9c2faaf4b4 100644 --- a/src/gallium/drivers/i965/SConscript +++ b/src/gallium/drivers/i965/SConscript @@ -50,7 +50,6 @@ i965 = env.ConvenienceLibrary( 'brw_state_batch.c', 'brw_state_cache.c', # 'brw_state_debug.c', - 'brw_state_dump.c', 'brw_state_upload.c', 'brw_swtnl.c', 'brw_urb.c', diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c deleted file mode 100644 index 388331ee62..0000000000 --- a/src/gallium/drivers/i965/brw_state_dump.c +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright © 2007 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Eric Anholt - * - */ - -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "brw_winsys.h" - -/** - * Prints out a header, the contents, and the message associated with - * the hardware state data given. - * - * \param name Name of the state object - * \param data Pointer to the base of the state object - * \param hw_offset Hardware offset of the base of the state data. - * \param index Index of the DWORD being output. - */ -static void -state_out(const char *name, void *data, uint32_t hw_offset, int index, - char *fmt, ...) -{ - va_list va; - - debug_printf("%8s: 0x%08x: 0x%08x: ", - name, hw_offset + index * 4, ((uint32_t *)data)[index]); - va_start(va, fmt); - debug_vprintf(fmt, va); - va_end(va); -} - -/** Generic, undecoded state buffer debug printout */ -static void -state_struct_out(struct brw_winsys_screen *sws, - const char *name, - struct brw_winsys_buffer *buffer, - unsigned int state_size) -{ - int i; - void *data; - - if (buffer == NULL) - return; - - data = sws->bo_map(buffer, BRW_DATA_OTHER, GL_FALSE); - for (i = 0; i < state_size / 4; i++) { - state_out(name, data, buffer->offset[0], i, - "dword %d\n", i); - } - sws->bo_unmap(buffer); -} - -static const char * -get_965_surfacetype(unsigned int surfacetype) -{ - switch (surfacetype) { - case 0: return "1D"; - case 1: return "2D"; - case 2: return "3D"; - case 3: return "CUBE"; - case 4: return "BUFFER"; - case 7: return "NULL"; - default: return "unknown"; - } -} - -static const char * -get_965_surface_format(unsigned int surface_format) -{ - switch (surface_format) { - case 0x000: return "r32g32b32a32_float"; - case 0x0c1: return "b8g8r8a8_unorm"; - case 0x100: return "b5g6r5_unorm"; - case 0x102: return "b5g5r5a1_unorm"; - case 0x104: return "b4g4r4a4_unorm"; - default: return "unknown"; - } -} - -static void dump_wm_surface_state(struct brw_context *brw) -{ - int i; - - for (i = 0; i < brw->wm.nr_surfaces; i++) { - struct brw_winsys_buffer *surf_bo = brw->wm.surf_bo[i]; - unsigned int surfoff; - struct brw_surface_state *surf; - char name[20]; - - if (surf_bo == NULL) { - debug_printf(" WM SS%d: NULL\n", i); - continue; - } - surf = (struct brw_surface_state *)brw->sws->bo_map(surf_bo, - BRW_DATA_OTHER, - GL_FALSE); - surfoff = surf_bo->offset[0]; - - sprintf(name, "WM SS%d", i); - state_out(name, surf, surfoff, 0, "%s %s\n", - get_965_surfacetype(surf->ss0.surface_type), - get_965_surface_format(surf->ss0.surface_format)); - state_out(name, surf, surfoff, 1, "offset\n"); - state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", - surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count); - state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n", - surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not "); - state_out(name, surf, surfoff, 4, "mip base %d\n", - surf->ss4.min_lod); - state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", - surf->ss5.x_offset, surf->ss5.y_offset); - - brw->sws->bo_unmap(surf_bo); - } -} - -static void dump_sf_viewport_state(struct brw_context *brw) -{ - const char *name = "SF VP"; - struct brw_sf_viewport *vp; - uint32_t vp_off; - - if (brw->sf.vp_bo == NULL) - return; - - vp = (struct brw_sf_viewport *)brw->sws->bo_map(brw->sf.vp_bo, - BRW_DATA_OTHER, - GL_FALSE); - vp_off = brw->sf.vp_bo->offset[0]; - - state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); - state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); - state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); - state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); - state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); - state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); - - state_out(name, vp, vp_off, 6, "top left = %d,%d\n", - vp->scissor.xmin, vp->scissor.ymin); - state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", - vp->scissor.xmax, vp->scissor.ymax); - - brw->sws->bo_unmap(brw->sf.vp_bo); -} - -static void brw_debug_prog(struct brw_winsys_screen *sws, - const char *name, - struct brw_winsys_buffer *prog) -{ - unsigned int i; - uint32_t *data; - - if (prog == NULL) - return; - - data = (uint32_t *)sws->bo_map(prog, - BRW_DATA_OTHER, - GL_FALSE); - - for (i = 0; i < prog->size / 4 / 4; i++) { - debug_printf("%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", - name, (unsigned int)prog->offset + i * 4 * 4, - data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); - /* Stop at the end of the program. It'd be nice to keep track of the actual - * intended program size instead of guessing like this. - */ - if (data[i * 4 + 0] == 0 && - data[i * 4 + 1] == 0 && - data[i * 4 + 2] == 0 && - data[i * 4 + 3] == 0) - break; - } - - sws->bo_unmap(prog); -} - - -/** - * Print additional debug information associated with the batchbuffer - * when DEBUG_BATCH is set. - * - * For 965, this means mapping the state buffers that would have been referenced - * by the batchbuffer and dumping them. - * - * The buffer offsets printed rely on the buffer containing the last offset - * it was validated at. - */ -void brw_debug_batch(struct brw_context *brw) -{ - struct brw_winsys_screen *sws = brw->sws; - - state_struct_out(sws, "WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces); - dump_wm_surface_state(brw); - - state_struct_out(sws, "VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state)); - brw_debug_prog(sws, "VS prog", brw->vs.prog_bo); - - state_struct_out(sws, "GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state)); - brw_debug_prog(sws, "GS prog", brw->gs.prog_bo); - - state_struct_out(sws, "SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state)); - dump_sf_viewport_state(brw); - brw_debug_prog(sws, "SF prog", brw->sf.prog_bo); - - state_struct_out(sws, "WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state)); - brw_debug_prog(sws, "WM prog", brw->wm.prog_bo); -} -- cgit v1.2.3 From aa9773d056a8799050304f75c1bf4c1f470e7e53 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 15:34:18 +0000 Subject: i965g: disassemble more than one instruction at a time --- src/gallium/drivers/i965/brw_context.h | 4 +++- src/gallium/drivers/i965/brw_disasm.c | 28 +++++++++++++++++++++++----- src/gallium/drivers/i965/brw_vs_emit.c | 4 +--- src/gallium/drivers/i965/brw_wm_emit.c | 6 +----- src/gallium/drivers/i965/brw_wm_glsl.c | 4 +--- src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 22 ++++++++-------------- 6 files changed, 37 insertions(+), 31 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 580251d2f1..e0c1c57ed7 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -794,7 +794,9 @@ int brw_upload_urb_fence(struct brw_context *brw); int brw_upload_cs_urb_state(struct brw_context *brw); /* brw_disasm.c */ -int brw_disasm (FILE *file, struct brw_instruction *inst); +int brw_disasm (FILE *file, + const struct brw_instruction *inst, + unsigned count); /*====================================================================== * Inline conversion functions. These are better-typed than the diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index 29fe848005..df0c7b9a2b 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -455,7 +455,7 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr) return err; } -static int dest (FILE *file, struct brw_instruction *inst) +static int dest (FILE *file, const struct brw_instruction *inst) { int err = 0; @@ -621,7 +621,7 @@ static int src_da16 (FILE *file, } -static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { +static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) { switch (type) { case BRW_REGISTER_TYPE_UD: format (file, "0x%08xUD", inst->bits3.ud); @@ -650,7 +650,7 @@ static int imm (FILE *file, GLuint type, struct brw_instruction *inst) { return 0; } -static int src0 (FILE *file, struct brw_instruction *inst) +static int src0 (FILE *file, const struct brw_instruction *inst) { if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE) return imm (file, inst->bits1.da1.src0_reg_type, @@ -710,7 +710,7 @@ static int src0 (FILE *file, struct brw_instruction *inst) } } -static int src1 (FILE *file, struct brw_instruction *inst) +static int src1 (FILE *file, const struct brw_instruction *inst) { if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE) return imm (file, inst->bits1.da1.src1_reg_type, @@ -770,7 +770,7 @@ static int src1 (FILE *file, struct brw_instruction *inst) } } -int brw_disasm (FILE *file, struct brw_instruction *inst) +static int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) { int err = 0; int space = 0; @@ -900,3 +900,21 @@ int brw_disasm (FILE *file, struct brw_instruction *inst) newline (file); return err; } + + +int brw_disasm (FILE *file, + const struct brw_instruction *inst, + unsigned count) +{ + int i, err; + + for (i = 0; i < count; i++) { + err = brw_disasm_insn(stderr, &inst[i]); + if (err) + return err; + } + + fprintf(file, "\n"); + return 0; +} + diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 95e2b8e2cb..d86e2104d8 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -1627,8 +1627,6 @@ void brw_vs_emit(struct brw_vs_compile *c) int i; debug_printf("vs-native:\n"); - for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i]); - debug_printf("\n"); + brw_disasm(stderr, p->store, p->nr_insn); } } diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index a705d8b344..1c38f80cda 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -1512,11 +1512,7 @@ void brw_wm_emit( struct brw_wm_compile *c ) } if (BRW_DEBUG & DEBUG_WM) { - int i; - debug_printf("wm-native:\n"); - for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i]); - debug_printf("\n"); + brw_disasm(stderr, p->store, p->nr_insn); } } diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index a06b0a446e..284f819bf8 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -2003,9 +2003,7 @@ static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_ if (BRW_DEBUG & DEBUG_WM) { debug_printf("wm-native:\n"); - for (i = 0; i < p->nr_insn; i++) - brw_disasm(stderr, &p->store[i]); - debug_printf("\n"); + brw_disasm(stderr, p->store, p->nr_insn); } } diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c index 54cf56c811..d129067ba3 100644 --- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c +++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c @@ -47,7 +47,9 @@ #define MAX_VRAM (128*1024*1024) -extern int brw_disasm (FILE *file, struct brw_instruction *inst); +extern int brw_disasm (FILE *file, + const struct brw_instruction *inst, + unsigned count ); struct xlib_brw_buffer { @@ -236,7 +238,11 @@ xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer, brw_dump_cc_unit_state( data ); break; case BRW_DATA_GS_WM_PROG: - brw_disasm( stderr, data ); /* disassem */ + case BRW_DATA_GS_SF_PROG: + case BRW_DATA_GS_VS_PROG: + case BRW_DATA_GS_GS_PROG: + case BRW_DATA_GS_CLIP_PROG: + brw_disasm( stderr, data, size / sizeof(struct brw_instruction) ); break; case BRW_DATA_GS_SAMPLER_DEFAULT_COLOR: brw_dump_sampler_default_color( data ); @@ -247,9 +253,6 @@ xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer, case BRW_DATA_GS_WM_UNIT: brw_dump_wm_unit_state( data ); break; - case BRW_DATA_GS_SF_PROG: - brw_disasm( stderr, data ); /* disassem */ - break; case BRW_DATA_GS_SF_VP: brw_dump_sf_viewport( data ); break; @@ -259,24 +262,15 @@ xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer, case BRW_DATA_GS_VS_UNIT: brw_dump_vs_unit_state( data ); break; - case BRW_DATA_GS_VS_PROG: - brw_disasm( stderr, data ); /* disassem */ - break; case BRW_DATA_GS_GS_UNIT: brw_dump_gs_unit_state( data ); break; - case BRW_DATA_GS_GS_PROG: - brw_disasm( stderr, data ); /* disassem */ - break; case BRW_DATA_GS_CLIP_VP: brw_dump_clipper_viewport( data ); break; case BRW_DATA_GS_CLIP_UNIT: brw_dump_clip_unit_state( data ); break; - case BRW_DATA_GS_CLIP_PROG: - brw_disasm( stderr, data ); /* disassem */ - break; case BRW_DATA_SS_SURFACE: brw_dump_surface_state( data ); break; -- cgit v1.2.3 From 674c390aaf9d797dbedda1285d4fdacb3d334a67 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 17:41:35 +0000 Subject: i965g: add const qualifiers --- src/gallium/drivers/i965/intel_decode.c | 40 ++++++++++++++++----------------- src/gallium/drivers/i965/intel_decode.h | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c index 1fb1b66cc8..3166958bad 100644 --- a/src/gallium/drivers/i965/intel_decode.c +++ b/src/gallium/drivers/i965/intel_decode.c @@ -70,7 +70,7 @@ int_as_float(uint32_t intval) } static void -instr_out(uint32_t *data, uint32_t hw_offset, unsigned int index, +instr_out(const uint32_t *data, uint32_t hw_offset, unsigned int index, char *fmt, ...) { va_list va; @@ -84,7 +84,7 @@ instr_out(uint32_t *data, uint32_t hw_offset, unsigned int index, static int -decode_mi(uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { unsigned int opcode; @@ -149,7 +149,7 @@ decode_mi(uint32_t *data, int count, uint32_t hw_offset, int *failures) } static int -decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { unsigned int opcode, len; char *format = NULL; @@ -306,7 +306,7 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures) } static int -decode_3d_1c(uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { switch ((data[0] & 0x00f80000) >> 19) { case 0x11: @@ -333,7 +333,7 @@ decode_3d_1c(uint32_t *data, int count, uint32_t hw_offset, int *failures) /** Sets the string dstname to describe the destination of the PS instruction */ static void -i915_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask) +i915_get_instruction_dst(const uint32_t *data, int i, char *dstname, int do_mask) { uint32_t a0 = data[i]; int dst_nr = (a0 >> 14) & 0xf; @@ -466,7 +466,7 @@ i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name) } static void -i915_get_instruction_src0(uint32_t *data, int i, char *srcname) +i915_get_instruction_src0(const uint32_t *data, int i, char *srcname) { uint32_t a0 = data[i]; uint32_t a1 = data[i + 1]; @@ -484,7 +484,7 @@ i915_get_instruction_src0(uint32_t *data, int i, char *srcname) } static void -i915_get_instruction_src1(uint32_t *data, int i, char *srcname) +i915_get_instruction_src1(const uint32_t *data, int i, char *srcname) { uint32_t a1 = data[i + 1]; uint32_t a2 = data[i + 2]; @@ -502,7 +502,7 @@ i915_get_instruction_src1(uint32_t *data, int i, char *srcname) } static void -i915_get_instruction_src2(uint32_t *data, int i, char *srcname) +i915_get_instruction_src2(const uint32_t *data, int i, char *srcname) { uint32_t a2 = data[i + 2]; int src_nr = (a2 >> 16) & 0x1f; @@ -559,7 +559,7 @@ i915_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name) } static void -i915_decode_alu1(uint32_t *data, uint32_t hw_offset, +i915_decode_alu1(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix, char *op_name) { char dst[100], src0[100]; @@ -574,7 +574,7 @@ i915_decode_alu1(uint32_t *data, uint32_t hw_offset, } static void -i915_decode_alu2(uint32_t *data, uint32_t hw_offset, +i915_decode_alu2(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix, char *op_name) { char dst[100], src0[100], src1[100]; @@ -590,7 +590,7 @@ i915_decode_alu2(uint32_t *data, uint32_t hw_offset, } static void -i915_decode_alu3(uint32_t *data, uint32_t hw_offset, +i915_decode_alu3(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix, char *op_name) { char dst[100], src0[100], src1[100], src2[100]; @@ -607,7 +607,7 @@ i915_decode_alu3(uint32_t *data, uint32_t hw_offset, } static void -i915_decode_tex(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix, +i915_decode_tex(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix, char *tex_name) { uint32_t t0 = data[i]; @@ -629,7 +629,7 @@ i915_decode_tex(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix, } static void -i915_decode_dcl(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix) +i915_decode_dcl(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix) { uint32_t d0 = data[i]; char *sampletype; @@ -710,7 +710,7 @@ i915_decode_dcl(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix) } static void -i915_decode_instruction(uint32_t *data, uint32_t hw_offset, +i915_decode_instruction(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix) { switch ((data[i] >> 24) & 0x1f) { @@ -800,7 +800,7 @@ i915_decode_instruction(uint32_t *data, uint32_t hw_offset, } static int -decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830) +decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830) { unsigned int len, i, c, opcode, word, map, sampler, instr; char *format; @@ -1073,7 +1073,7 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i } static int -decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset, +decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { char immediate = (data[0] & (1 << 23)) == 0; @@ -1260,7 +1260,7 @@ decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset, } static int -decode_3d(uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { unsigned int opcode; @@ -1406,7 +1406,7 @@ get_965_prim_type(uint32_t data) } static int -decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { unsigned int opcode, len; int i; @@ -1667,7 +1667,7 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures) } static int -decode_3d_i830(uint32_t *data, int count, uint32_t hw_offset, int *failures) +decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failures) { unsigned int opcode; @@ -1741,7 +1741,7 @@ decode_3d_i830(uint32_t *data, int count, uint32_t hw_offset, int *failures) * \param hw_offset hardware address for the buffer */ int -intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid) +intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid) { int index = 0; int failures = 0; diff --git a/src/gallium/drivers/i965/intel_decode.h b/src/gallium/drivers/i965/intel_decode.h index c50644a46b..7683097b86 100644 --- a/src/gallium/drivers/i965/intel_decode.h +++ b/src/gallium/drivers/i965/intel_decode.h @@ -25,5 +25,5 @@ * */ -int intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid); +int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid); void intel_decode_context_reset(void); -- cgit v1.2.3 From 205871c76ad2e655a9180900359d8f9ac690a912 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 17:42:13 +0000 Subject: i965g: use Elements in loops over arrays --- src/gallium/drivers/i965/brw_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index 2cee7a7a3c..8e1421e738 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -85,12 +85,12 @@ static void brw_destroy_context( struct pipe_context *pipe ) bo_reference(&brw->sf.state_bo, NULL); bo_reference(&brw->sf.vp_bo, NULL); - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) + for (i = 0; i < Elements(brw->wm.sdc_bo); i++) bo_reference(&brw->wm.sdc_bo[i], NULL); bo_reference(&brw->wm.bind_bo, NULL); - for (i = 0; i < BRW_WM_MAX_SURF; i++) + for (i = 0; i < Elements(brw->wm.surf_bo); i++) bo_reference(&brw->wm.surf_bo[i], NULL); bo_reference(&brw->wm.sampler_bo, NULL); -- cgit v1.2.3 From a70e6178d4841f490ff318b6017a1ddacfadf752 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 17:42:38 +0000 Subject: i965g: correct size of surf_bo array --- src/gallium/drivers/i965/brw_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index e0c1c57ed7..f853255261 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -705,7 +705,7 @@ struct brw_context /** Binding table of pointers to surf_bo entries */ struct brw_winsys_buffer *bind_bo; - struct brw_winsys_buffer *surf_bo[PIPE_MAX_COLOR_BUFS]; + struct brw_winsys_buffer *surf_bo[BRW_WM_MAX_SURF]; struct brw_winsys_buffer *prog_bo; struct brw_winsys_buffer *state_bo; -- cgit v1.2.3 From b229ee342f2cef5396a251525d5b499760280933 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 17:43:57 +0000 Subject: brw: push more dumping into the winsys --- src/gallium/drivers/i965/brw_batchbuffer.c | 22 +----- src/gallium/drivers/i965/brw_vs_emit.c | 2 - src/gallium/drivers/i965/brw_winsys.h | 8 ++ src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 111 ++++++++++++++++++++------- 4 files changed, 93 insertions(+), 50 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index e5f73bd6a3..76a7d2d2af 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -54,7 +54,7 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch) batch->map = batch->malloc_buffer; else batch->map = batch->sws->bo_map(batch->buf, - BRW_DATA_OTHER, + BRW_DATA_BATCH_BUFFER, GL_TRUE); batch->size = BRW_BATCH_SIZE; @@ -136,7 +136,7 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, if (batch->use_malloc_buffer) { batch->sws->bo_subdata(batch->buf, - BRW_DATA_OTHER, + BRW_DATA_BATCH_BUFFER, 0, used, batch->map ); batch->map = NULL; @@ -150,19 +150,6 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, batch->sws->bo_exec(batch->buf, used ); - if (1 /*BRW_DEBUG & DEBUG_BATCH*/) { - void *ptr = batch->sws->bo_map(batch->buf, - BRW_DATA_OTHER, - GL_FALSE); - - intel_decode(ptr, - used / 4, - batch->buf->offset[0], - batch->chipset.pci_id); - - batch->sws->bo_unmap(batch->buf); - } - if (BRW_DEBUG & DEBUG_SYNC) { /* Abuse map/unmap to achieve wait-for-fence. * @@ -170,10 +157,7 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, * interface. */ debug_printf("waiting for idle\n"); - batch->sws->bo_map(batch->buf, - BRW_DATA_OTHER, - GL_TRUE); - batch->sws->bo_unmap(batch->buf); + batch->sws->bo_wait_idle(batch->buf); } /* Reset the buffer: diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index d86e2104d8..3217777acb 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -1624,8 +1624,6 @@ void brw_vs_emit(struct brw_vs_compile *c) post_vs_emit(c, end_inst, last_inst); if (BRW_DEBUG & DEBUG_VS) { - int i; - debug_printf("vs-native:\n"); brw_disasm(stderr, p->store, p->nr_insn); } diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index f61c541ad1..e041b0acaf 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -105,6 +105,8 @@ enum brw_buffer_data_type { BRW_DATA_GS_CLIP_PROG, BRW_DATA_SS_SURFACE, BRW_DATA_SS_SURF_BIND, + BRW_DATA_CONSTANT_BUFFER, + BRW_DATA_BATCH_BUFFER, BRW_DATA_OTHER, BRW_DATA_MAX }; @@ -176,6 +178,12 @@ struct brw_winsys_screen { void (*bo_unmap)(struct brw_winsys_buffer *buffer); /*@}*/ + + /* Wait for buffer to go idle. Similar to map+unmap, but doesn't + * mark buffer contents as dirty. + */ + void (*bo_wait_idle)(struct brw_winsys_buffer *buffer); + /** * Destroy the winsys. */ diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c index d129067ba3..5aec332761 100644 --- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c +++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c @@ -51,14 +51,19 @@ extern int brw_disasm (FILE *file, const struct brw_instruction *inst, unsigned count ); +extern int intel_decode(const uint32_t *data, + int count, + uint32_t hw_offset, + uint32_t devid); + struct xlib_brw_buffer { struct brw_winsys_buffer base; + char *virtual; unsigned offset; unsigned type; - char *virtual; - unsigned cheesy_refcount; int map_count; + boolean modified; }; @@ -68,7 +73,10 @@ struct xlib_brw_buffer struct xlib_brw_winsys { struct brw_winsys_screen base; - unsigned offset; + struct brw_chipset chipset; + + unsigned size; + unsigned used; }; static struct xlib_brw_winsys * @@ -157,14 +165,15 @@ xlib_brw_bo_alloc( struct brw_winsys_screen *sws, pipe_reference_init(&buf->base.reference, 1); - buf->offset = align(xbw->offset, alignment); + buf->offset = align(xbw->used, alignment); buf->type = type; buf->virtual = MALLOC(size); buf->base.offset = &buf->offset; /* hmm, cheesy */ buf->base.size = size; + buf->base.sws = sws; - xbw->offset = align(xbw->offset, alignment) + size; - if (xbw->offset > MAX_VRAM) + xbw->used = align(xbw->used, alignment) + size; + if (xbw->used > MAX_VRAM) goto err; /* XXX: possibly rentrant call to bo_destroy: @@ -184,7 +193,6 @@ xlib_brw_bo_destroy( struct brw_winsys_buffer *buffer ) { struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); - FREE(buf->virtual); FREE(buf); } @@ -217,19 +225,11 @@ xlib_brw_bo_exec( struct brw_winsys_buffer *buffer, return 0; } -static int -xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer, - enum brw_buffer_data_type data_type, - size_t offset, - size_t size, - const void *data) +static void dump_data( struct xlib_brw_winsys *xbw, + enum brw_buffer_data_type data_type, + const void *data, + size_t size ) { - struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); - - debug_printf("%s buf %p off %d sz %d data %p %s\n", - __FUNCTION__, - (void *)buffer, offset, size, data, data_types[data_type]); - switch (data_type) { case BRW_DATA_GS_CC_VP: brw_dump_cc_viewport( data ); @@ -278,12 +278,39 @@ xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer, break; case BRW_DATA_OTHER: break; + case BRW_DATA_BATCH_BUFFER: + intel_decode(data, size / 4, 0, xbw->chipset.pci_id); + break; + case BRW_DATA_CONSTANT_BUFFER: + break; default: assert(0); break; } +} + + +static int +xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer, + enum brw_buffer_data_type data_type, + size_t offset, + size_t size, + const void *data) +{ + struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); + struct xlib_brw_winsys *xbw = xlib_brw_winsys(buffer->sws); + + debug_printf("%s buf %p off %d sz %d %s\n", + __FUNCTION__, + (void *)buffer, offset, size, data_types[data_type]); + + if (1) + dump_data( xbw, data_type, data, size ); + assert(buf->base.size >= offset + size); memcpy(buf->virtual + offset, data, size); + + return 0; } @@ -324,7 +351,7 @@ xlib_brw_check_aperture_space( struct brw_winsys_screen *iws, static void * xlib_brw_bo_map(struct brw_winsys_buffer *buffer, enum brw_buffer_data_type data_type, - boolean write) + boolean write) { struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); @@ -332,6 +359,9 @@ xlib_brw_bo_map(struct brw_winsys_buffer *buffer, write ? "read/write" : "read", write ? data_types[data_type] : ""); + if (write) + buf->modified = 1; + buf->map_count++; return buf->virtual; } @@ -345,14 +375,30 @@ xlib_brw_bo_unmap(struct brw_winsys_buffer *buffer) --buf->map_count; assert(buf->map_count >= 0); + + if (buf->map_count == 0 && + buf->modified) { + + buf->modified = 0; + + /* Consider dumping new buffer contents here. + */ + } +} + + +static void +xlib_brw_bo_wait_idle( struct brw_winsys_buffer *buffer ) +{ } static void -xlib_brw_winsys_destroy( struct brw_winsys_screen *screen ) +xlib_brw_winsys_destroy( struct brw_winsys_screen *sws ) { - /* XXX: free all buffers */ - FREE(screen); + struct xlib_brw_winsys *xbw = xlib_brw_winsys(sws); + + FREE(xbw); } static struct brw_winsys_screen * @@ -364,6 +410,8 @@ xlib_create_brw_winsys_screen( void ) if (!ws) return NULL; + ws->used = 0; + ws->base.destroy = xlib_brw_winsys_destroy; ws->base.bo_alloc = xlib_brw_bo_alloc; ws->base.bo_destroy = xlib_brw_bo_destroy; @@ -375,6 +423,7 @@ xlib_create_brw_winsys_screen( void ) ws->base.check_aperture_space = xlib_brw_check_aperture_space; ws->base.bo_map = xlib_brw_bo_map; ws->base.bo_unmap = xlib_brw_bo_unmap; + ws->base.bo_wait_idle = xlib_brw_bo_wait_idle; return &ws->base; } @@ -388,12 +437,14 @@ static void xlib_i965_display_surface(struct xmesa_buffer *xm_buffer, struct pipe_surface *surf) { - /* struct brw_texture *texture = brw_texture(surf->texture); */ - - debug_printf("%s tex %p, sz %dx%d\n", __FUNCTION__, - (void *)surf->texture, - surf->texture->width[0], - surf->texture->height[0]); + struct brw_surface *surface = brw_surface(surf); + struct xlib_brw_buffer *bo = xlib_brw_buffer(surface->bo); + + debug_printf("%s offset %x+%x sz %dx%d\n", __FUNCTION__, + bo->offset, + surface->draw_offset, + surf->width, + surf->height); } static void @@ -419,6 +470,8 @@ xlib_create_i965_screen( void ) if (screen == NULL) goto fail; + xlib_brw_winsys(winsys)->chipset = brw_screen(screen)->chipset; + screen->flush_frontbuffer = xlib_i965_flush_frontbuffer; return screen; -- cgit v1.2.3 From 8f0e51be4784273baef692583940964bc04b78ef Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 19:57:59 +0000 Subject: i965g: correct sense of writedisable flags --- src/gallium/drivers/i965/brw_pipe_blend.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c index f6da9254ef..872151222d 100644 --- a/src/gallium/drivers/i965/brw_pipe_blend.c +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -146,10 +146,10 @@ static void *brw_create_blend_state( struct pipe_context *pipe, /* Per-surface color mask -- just follow global state: */ - blend->ss0.writedisable_red = (templ->colormask & PIPE_MASK_R) ? 1 : 0; - blend->ss0.writedisable_green = (templ->colormask & PIPE_MASK_G) ? 1 : 0; - blend->ss0.writedisable_blue = (templ->colormask & PIPE_MASK_B) ? 1 : 0; - blend->ss0.writedisable_alpha = (templ->colormask & PIPE_MASK_A) ? 1 : 0; + blend->ss0.writedisable_red = (templ->colormask & PIPE_MASK_R) ? 0 : 1; + blend->ss0.writedisable_green = (templ->colormask & PIPE_MASK_G) ? 0 : 1; + blend->ss0.writedisable_blue = (templ->colormask & PIPE_MASK_B) ? 0 : 1; + blend->ss0.writedisable_alpha = (templ->colormask & PIPE_MASK_A) ? 0 : 1; return (void *)blend; } -- cgit v1.2.3 From 658da189b62c4086c08950f3da5767e628235b55 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 19:58:02 +0000 Subject: i965g: remove duplicate viewport state in brw_context --- src/gallium/drivers/i965/brw_cc.c | 2 +- src/gallium/drivers/i965/brw_context.h | 5 +---- src/gallium/drivers/i965/brw_sf_state.c | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index 8e25fe8585..ba16fc4f6b 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -66,7 +66,7 @@ static enum pipe_error prepare_cc_vp( struct brw_context *brw ) memset(&ccv, 0, sizeof(ccv)); /* PIPE_NEW_VIEWPORT */ - calc_sane_viewport( &brw->curr.vp, &svp ); + calc_sane_viewport( &brw->curr.viewport, &svp ); ccv.min_depth = svp.near; ccv.max_depth = svp.far; diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index f853255261..177fe2172d 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -514,18 +514,15 @@ struct brw_context unsigned num_vertex_buffers; struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; struct pipe_framebuffer_state fb; - struct pipe_viewport_state vp; struct pipe_clip_state ucp; struct pipe_buffer *vertex_constants; struct pipe_buffer *fragment_constants; - struct pipe_viewport_state viewport; struct brw_blend_constant_color bcc; struct brw_polygon_stipple bps; - - /** * Index buffer for this draw_prims call. * diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index f030f26c19..bd8fc65b9e 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -41,7 +41,7 @@ static enum pipe_error upload_sf_vp(struct brw_context *brw) { - const struct pipe_viewport_state *vp = &brw->curr.vp; + const struct pipe_viewport_state *vp = &brw->curr.viewport; const struct pipe_scissor_state *scissor = &brw->curr.scissor; struct brw_sf_viewport sfv; enum pipe_error ret; -- cgit v1.2.3 From 963728665aa0d48d4fdbba4276084528f221ee39 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 20:34:27 +0000 Subject: i965g: make the winsys responsible for all buffer->offset handling The winsys now inserts the presumed offset into referring buffers from inside of bo_emit_reloc(). Remove the many locally coded places where this was happening in the driver and eliminate the worry of getting it wrong. No longer need to expose offset values to the driver at all, so no need to worry about what to do in the driver when they change. Just use zero values wherever we had offsets previously -- the relocations will fix it all up for us. --- src/gallium/drivers/i965/brw_batchbuffer.c | 11 +++++------ src/gallium/drivers/i965/brw_cc.c | 2 +- src/gallium/drivers/i965/brw_clip_state.c | 2 +- src/gallium/drivers/i965/brw_gs_state.c | 4 ++-- src/gallium/drivers/i965/brw_screen_texture.c | 8 +++++--- src/gallium/drivers/i965/brw_sf_state.c | 6 ++++-- src/gallium/drivers/i965/brw_vs_state.c | 2 +- src/gallium/drivers/i965/brw_winsys.h | 1 - src/gallium/drivers/i965/brw_wm_sampler_state.c | 2 +- src/gallium/drivers/i965/brw_wm_state.c | 13 ++++--------- src/gallium/drivers/i965/brw_wm_surface_state.c | 7 +++++-- src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 1 - 12 files changed, 29 insertions(+), 30 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index 76a7d2d2af..a55be6faab 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -115,7 +115,7 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, file, line, used); if (ALWAYS_EMIT_MI_FLUSH) { - *(GLuint *) (batch->ptr) = ((MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE); + *(GLuint *) (batch->ptr) = MI_FLUSH | BRW_FLUSH_STATE_CACHE; batch->ptr += 4; used = batch->ptr - batch->map; } @@ -192,12 +192,11 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch, if (ret != 0) return ret; - /* - * Using the old buffer offset, write in what the right data would be, in case - * the buffer doesn't move and we can short-circuit the relocation processing - * in the kernel + /* bo_emit_reloc was resposible for writing a zero into the + * batchbuffer if necessary. Just need to update our pointer. */ - brw_batchbuffer_emit_dword (batch, buffer->offset[0] + delta); + batch->ptr += 4; + return 0; } diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index ba16fc4f6b..78d83929e0 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -142,7 +142,7 @@ cc_unit_create_from_key(struct brw_context *brw, cc.cc3 = key->cc3; /* CACHE_NEW_CC_VP */ - cc.cc4.cc_viewport_state_offset = *(brw->cc.vp_bo->offset) >> 5; /* reloc */ + cc.cc4.cc_viewport_state_offset = 0; cc.cc5 = key->cc5; cc.cc6 = key->cc6; diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index d4e3c43c61..157e6edf19 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -84,7 +84,7 @@ clip_unit_create_from_key(struct brw_context *brw, clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; /* reloc */ - clip.thread0.kernel_start_pointer = *(brw->clip.prog_bo->offset) >> 6; + clip.thread0.kernel_start_pointer = 0; clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; clip.thread1.single_program_flow = 1; diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c index 18a66da538..36a99fd0e9 100644 --- a/src/gallium/drivers/i965/brw_gs_state.c +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -80,8 +80,8 @@ gs_unit_create_from_key(struct brw_context *brw, memset(&gs, 0, sizeof(gs)); gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; - if (key->prog_active) /* reloc */ - gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset[0] >> 6; + /* reloc */ + gs.thread0.kernel_start_pointer = 0; gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; gs.thread1.single_program_flow = 1; diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 355abf0b89..8e684aa076 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -211,8 +211,10 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, /* && bscreen->use_texture_tiling */ /* && bscreen->kernel_exec_fencing */) { - if (bscreen->chipset.is_965 && - pf_is_depth_or_stencil(templ->format)) + if (1) + tex->tiling = BRW_TILING_NONE; + else if (bscreen->chipset.is_965 && + pf_is_depth_or_stencil(templ->format)) tex->tiling = BRW_TILING_Y; else tex->tiling = BRW_TILING_X; @@ -256,7 +258,7 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, /* XXX: what happens when tex->bo->offset changes??? */ - tex->ss.ss1.base_addr = tex->bo->offset[0]; /* reloc */ + tex->ss.ss1.base_addr = 0; /* reloc */ tex->ss.ss2.mip_count = tex->base.last_level; tex->ss.ss2.width = tex->base.width[0] - 1; tex->ss.ss2.height = tex->base.height[0] - 1; diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index bd8fc65b9e..689483b4bc 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -142,7 +142,8 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, memset(&sf, 0, sizeof(sf)); sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; - sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset[0] >> 6; /* reloc */ + /* reloc */ + sf.thread0.kernel_start_pointer = 0; sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -175,7 +176,8 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread4.stats_enable = 1; /* CACHE_NEW_SF_VP */ - sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset[0] >> 5; /* reloc */ + /* reloc */ + sf.sf5.sf_viewport_state_offset = 0; sf.sf5.viewport_transform = 1; diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c index 22a4d7f01b..a5b30eba47 100644 --- a/src/gallium/drivers/i965/brw_vs_state.c +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -89,7 +89,7 @@ vs_unit_create_from_key(struct brw_context *brw, memset(&vs, 0, sizeof(vs)); - vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset[0] >> 6; /* reloc */ + vs.thread0.kernel_start_pointer = 0; /* reloc */ vs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; /* Choosing multiple program flow means that we may get 2-vertex threads, diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index e041b0acaf..f4a1e9d8ed 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -44,7 +44,6 @@ struct brw_winsys_screen; struct brw_winsys_buffer { struct pipe_reference reference; struct brw_winsys_screen *sws; - unsigned *offset; unsigned size; }; diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c index 2861aa979f..174836b39d 100644 --- a/src/gallium/drivers/i965/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c @@ -87,7 +87,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw, entry->ss0 = sampler->ss0; entry->ss1 = sampler->ss1; - entry->ss2.default_color_pointer = brw->wm.sdc_bo[i]->offset[0] >> 5; /* reloc */ + entry->ss2.default_color_pointer = 0; /* reloc */ entry->ss3 = sampler->ss3; /* Cube-maps on 965 and later must use the same wrap mode for all 3 diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index 86dc10540d..56789ce7a4 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -149,7 +149,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, memset(&wm, 0, sizeof(wm)); wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; - wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset[0] >> 6; /* reloc */ + wm.thread0.kernel_start_pointer = 0; /* reloc */ wm.thread1.depth_coef_urb_read_offset = 1; wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -159,8 +159,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.thread1.binding_table_entry_count = key->nr_surfaces; if (key->total_scratch != 0) { - wm.thread2.scratch_space_base_pointer = - brw->wm.scratch_bo->offset[0] >> 10; /* reloc */ + wm.thread2.scratch_space_base_pointer = 0; /* reloc */ wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; } else { wm.thread2.scratch_space_base_pointer = 0; @@ -178,12 +177,8 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, else wm.wm4.sampler_count = (key->sampler_count + 1) / 4; - if (brw->wm.sampler_bo != NULL) { - /* reloc */ - wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset[0] >> 5; - } else { - wm.wm4.sampler_state_pointer = 0; - } + /* reloc */ + wm.wm4.sampler_state_pointer = 0; wm.wm5.program_uses_depth = key->uses_depth; wm.wm5.program_computes_depth = key->computes_depth; diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index e5d0329967..ed365b03b9 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -130,7 +130,7 @@ brw_update_render_surface(struct brw_context *brw, */ ret = brw->sws->bo_emit_reloc(*bo_out, BRW_USAGE_RENDER_TARGET, - ss.ss1.base_addr - surface->bo->offset[0], /* XXX */ + 0, offsetof(struct brw_surface_state, ss1), surface->bo); if (ret) @@ -167,8 +167,11 @@ brw_wm_get_binding_table(struct brw_context *brw, bo_out)) return PIPE_OK; + /* Upload zero data, will all be overwitten with relocation + * offsets: + */ for (i = 0; i < brw->wm.nr_surfaces; i++) - data[i] = brw->wm.surf_bo[i]->offset[0]; + data[i] = 0; ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c index 5aec332761..f46d9961c6 100644 --- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c +++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c @@ -168,7 +168,6 @@ xlib_brw_bo_alloc( struct brw_winsys_screen *sws, buf->offset = align(xbw->used, alignment); buf->type = type; buf->virtual = MALLOC(size); - buf->base.offset = &buf->offset; /* hmm, cheesy */ buf->base.size = size; buf->base.sws = sws; -- cgit v1.2.3 From 3763457892c2d0c654c0eca7585e4d3a863f7714 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 21:09:51 +0000 Subject: i965g: propogate map-buffer-range semantics down to winsys --- src/gallium/drivers/i965/brw_batchbuffer.c | 45 ++++++++--------------- src/gallium/drivers/i965/brw_batchbuffer.h | 9 ----- src/gallium/drivers/i965/brw_pipe_query.c | 2 +- src/gallium/drivers/i965/brw_screen_buffers.c | 51 ++++++++++++++++++++++++++- src/gallium/drivers/i965/brw_winsys.h | 18 +++++++++- src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 19 ++++++++-- 6 files changed, 100 insertions(+), 44 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index a55be6faab..d725e8b27e 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -35,7 +35,6 @@ #include "brw_structs.h" #include "intel_decode.h" -#define USE_MALLOC_BUFFER 1 #define ALWAYS_EMIT_MI_FLUSH 1 enum pipe_error @@ -50,14 +49,18 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch) if (ret) return ret; - if (batch->malloc_buffer) - batch->map = batch->malloc_buffer; - else - batch->map = batch->sws->bo_map(batch->buf, - BRW_DATA_BATCH_BUFFER, - GL_TRUE); - batch->size = BRW_BATCH_SIZE; + + /* With map_range semantics, the winsys can decide whether to + * inject a malloc'ed bounce buffer instead of mapping directly. + */ + batch->map = batch->sws->bo_map(batch->buf, + BRW_DATA_BATCH_BUFFER, + 0, batch->size, + GL_TRUE, + GL_TRUE, + GL_TRUE); + batch->ptr = batch->map; return PIPE_OK; } @@ -68,11 +71,6 @@ brw_batchbuffer_alloc(struct brw_winsys_screen *sws, { struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer); - batch->use_malloc_buffer = USE_MALLOC_BUFFER; - if (batch->use_malloc_buffer) { - batch->malloc_buffer = MALLOC(BRW_BATCH_SIZE); - } - batch->sws = sws; batch->chipset = chipset; brw_batchbuffer_reset(batch); @@ -83,11 +81,7 @@ brw_batchbuffer_alloc(struct brw_winsys_screen *sws, void brw_batchbuffer_free(struct brw_batchbuffer *batch) { - if (batch->malloc_buffer) { - FREE(batch->malloc_buffer); - batch->map = NULL; - } - else if (batch->map) { + if (batch->map) { batch->sws->bo_unmap(batch->buf); batch->map = NULL; } @@ -134,18 +128,9 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, batch->ptr += 4; used = batch->ptr - batch->map; - if (batch->use_malloc_buffer) { - batch->sws->bo_subdata(batch->buf, - BRW_DATA_BATCH_BUFFER, - 0, used, - batch->map ); - batch->map = NULL; - } - else { - batch->sws->bo_unmap(batch->buf); - batch->map = NULL; - } - + batch->sws->bo_flush_range(batch->buf, 0, used); + batch->sws->bo_unmap(batch->buf); + batch->map = NULL; batch->ptr = NULL; batch->sws->bo_exec(batch->buf, used ); diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h index 288a9d2755..7473f5bea4 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.h +++ b/src/gallium/drivers/i965/brw_batchbuffer.h @@ -28,15 +28,6 @@ struct brw_batchbuffer { struct brw_winsys_buffer *buf; struct brw_chipset chipset; - /* Main-memory copy of the batch-buffer, built up incrementally & - * then copied as one to the true buffer. - * - * XXX: is this still necessary? - * XXX: if so, can this be hidden inside the GEM-specific winsys code? - */ - boolean use_malloc_buffer; - uint8_t *malloc_buffer; - /** * Values exported to speed up the writing the batchbuffer, * instead of having to go trough a accesor function for diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c index 6a01173787..2eb862635c 100644 --- a/src/gallium/drivers/i965/brw_pipe_query.c +++ b/src/gallium/drivers/i965/brw_pipe_query.c @@ -63,7 +63,7 @@ brw_query_get_result(struct pipe_context *pipe, if (brw->sws->bo_is_busy(query->bo) && !wait) return FALSE; - map = brw->sws->bo_map(query->bo, BRW_DATA_OTHER, GL_FALSE); + map = bo_map_read(brw->sws, query->bo); if (map == NULL) return FALSE; diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c index 7ae386ffb3..d8141a3f5b 100644 --- a/src/gallium/drivers/i965/brw_screen_buffers.c +++ b/src/gallium/drivers/i965/brw_screen_buffers.c @@ -11,6 +11,29 @@ +static void * +brw_buffer_map_range( struct pipe_screen *screen, + struct pipe_buffer *buffer, + unsigned offset, + unsigned length, + unsigned usage ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf = brw_buffer( buffer ); + + if (buf->user_buffer) + return buf->user_buffer; + + return sws->bo_map( buf->bo, + BRW_DATA_OTHER, + offset, + length, + (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE, + (usage & PIPE_BUFFER_USAGE_DISCARD) ? TRUE : FALSE, + (usage & PIPE_BUFFER_USAGE_FLUSH_EXPLICIT) ? TRUE : FALSE); +} + static void * brw_buffer_map( struct pipe_screen *screen, struct pipe_buffer *buffer, @@ -25,9 +48,33 @@ brw_buffer_map( struct pipe_screen *screen, return sws->bo_map( buf->bo, BRW_DATA_OTHER, - (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE ); + 0, + buf->base.size, + (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE, + FALSE, + FALSE); } + +static void +brw_buffer_flush_mapped_range( struct pipe_screen *screen, + struct pipe_buffer *buffer, + unsigned offset, + unsigned length ) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_winsys_screen *sws = bscreen->sws; + struct brw_buffer *buf = brw_buffer( buffer ); + + if (buf->user_buffer) + return; + + sws->bo_flush_range( buf->bo, + offset, + length ); +} + + static void brw_buffer_unmap( struct pipe_screen *screen, struct pipe_buffer *buffer ) @@ -148,6 +195,8 @@ void brw_screen_buffer_init(struct brw_screen *brw_screen) brw_screen->base.buffer_create = brw_buffer_create; brw_screen->base.user_buffer_create = brw_user_buffer_create; brw_screen->base.buffer_map = brw_buffer_map; + brw_screen->base.buffer_map_range = brw_buffer_map_range; + brw_screen->base.buffer_flush_mapped_range = brw_buffer_flush_mapped_range; brw_screen->base.buffer_unmap = brw_buffer_unmap; brw_screen->base.buffer_destroy = brw_buffer_destroy; } diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index f4a1e9d8ed..e72b928b06 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -169,7 +169,15 @@ struct brw_winsys_screen { */ void *(*bo_map)(struct brw_winsys_buffer *buffer, enum brw_buffer_data_type data_type, - boolean write); + unsigned offset, + unsigned length, + boolean write, + boolean discard, + boolean flush_explicit ); + + void (*bo_flush_range)( struct brw_winsys_buffer *buffer, + unsigned offset, + unsigned length ); /** * Unmap a buffer. @@ -189,6 +197,14 @@ struct brw_winsys_screen { void (*destroy)(struct brw_winsys_screen *iws); }; +static INLINE void * +bo_map_read( struct brw_winsys_screen *sws, struct brw_winsys_buffer *buf ) +{ + return sws->bo_map( buf, + BRW_DATA_OTHER, + 0, buf->size, + FALSE, FALSE, FALSE ); +} static INLINE void bo_reference(struct brw_winsys_buffer **ptr, struct brw_winsys_buffer *buf) diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c index f46d9961c6..ab5df56bc0 100644 --- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c +++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c @@ -350,7 +350,11 @@ xlib_brw_check_aperture_space( struct brw_winsys_screen *iws, static void * xlib_brw_bo_map(struct brw_winsys_buffer *buffer, enum brw_buffer_data_type data_type, - boolean write) + unsigned offset, + unsigned length, + boolean write, + boolean discard, + boolean explicit) { struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); @@ -365,6 +369,15 @@ xlib_brw_bo_map(struct brw_winsys_buffer *buffer, return buf->virtual; } + +static void +xlib_brw_bo_flush_range( struct brw_winsys_buffer *buffer, + unsigned offset, + unsigned length ) +{ +} + + static void xlib_brw_bo_unmap(struct brw_winsys_buffer *buffer) { @@ -380,7 +393,8 @@ xlib_brw_bo_unmap(struct brw_winsys_buffer *buffer) buf->modified = 0; - /* Consider dumping new buffer contents here. + /* Consider dumping new buffer contents here, using the + * flush-range info to minimize verbosity. */ } } @@ -421,6 +435,7 @@ xlib_create_brw_winsys_screen( void ) ws->base.bo_references = xlib_brw_bo_references; ws->base.check_aperture_space = xlib_brw_check_aperture_space; ws->base.bo_map = xlib_brw_bo_map; + ws->base.bo_flush_range = xlib_brw_bo_flush_range; ws->base.bo_unmap = xlib_brw_bo_unmap; ws->base.bo_wait_idle = xlib_brw_bo_wait_idle; -- cgit v1.2.3 From 4c196ed7a8e06933d11b96ac520afa39252fc5c7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Nov 2009 22:43:36 +0000 Subject: i965g: pass relocation information in an array with bo_subdata Makes it easier to dump as we get all of the information about the upload in a single hit. Opens the window to simplification in the driver if these relocation arrays can be maintained statically rather than being recreated whenever we check for a new upload. Still needs some cleanup to avoid uglyness introduced with the delta values. --- src/gallium/drivers/i965/brw_cc.c | 27 ++++---- src/gallium/drivers/i965/brw_clip_state.c | 35 ++++++---- src/gallium/drivers/i965/brw_context.h | 4 +- src/gallium/drivers/i965/brw_curbe.c | 3 +- src/gallium/drivers/i965/brw_gs_state.c | 36 ++++++---- src/gallium/drivers/i965/brw_sf_state.c | 73 ++++++++++---------- src/gallium/drivers/i965/brw_state.h | 16 ++--- src/gallium/drivers/i965/brw_state_cache.c | 81 ++++++++++++----------- src/gallium/drivers/i965/brw_vs_state.c | 28 ++++---- src/gallium/drivers/i965/brw_vs_surface_state.c | 69 +++++++------------ src/gallium/drivers/i965/brw_winsys.h | 28 +++++++- src/gallium/drivers/i965/brw_wm_constant_buffer.c | 25 +++---- src/gallium/drivers/i965/brw_wm_sampler_state.c | 27 ++++---- src/gallium/drivers/i965/brw_wm_state.c | 61 ++++++++--------- src/gallium/drivers/i965/brw_wm_surface_state.c | 70 +++++++++----------- src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 31 +++++++-- 16 files changed, 327 insertions(+), 287 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index 78d83929e0..94e2c99c3e 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -129,6 +129,7 @@ cc_unit_populate_key(const struct brw_context *brw, static enum pipe_error cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key, + struct brw_winsys_reloc *reloc, struct brw_winsys_buffer **bo_out) { struct brw_cc_unit_state cc; @@ -141,50 +142,48 @@ cc_unit_create_from_key(struct brw_context *brw, cc.cc2 = key->cc2; cc.cc3 = key->cc3; - /* CACHE_NEW_CC_VP */ cc.cc4.cc_viewport_state_offset = 0; cc.cc5 = key->cc5; cc.cc6 = key->cc6; cc.cc7 = key->cc7; - + ret = brw_upload_cache(&brw->cache, BRW_CC_UNIT, key, sizeof(*key), - &brw->cc.vp_bo, 1, + reloc, Elements(reloc), &cc, sizeof(cc), NULL, NULL, bo_out); if (ret) return ret; - - /* Emit CC viewport relocation */ - ret = brw->sws->bo_emit_reloc(*bo_out, - BRW_USAGE_STATE, - 0, - offsetof(struct brw_cc_unit_state, cc4), - brw->cc.vp_bo); - if (ret) - return ret; - return PIPE_OK; } static int prepare_cc_unit( struct brw_context *brw ) { struct brw_cc_unit_key key; + struct brw_winsys_reloc reloc[1]; enum pipe_error ret; cc_unit_populate_key(brw, &key); + /* CACHE_NEW_CC_VP */ + make_reloc(&reloc[0], + BRW_USAGE_STATE, + 0, + offsetof(struct brw_cc_unit_state, cc4), + brw->cc.vp_bo); + if (brw_search_cache(&brw->cache, BRW_CC_UNIT, &key, sizeof(key), - &brw->cc.vp_bo, 1, + reloc, 1, NULL, &brw->cc.state_bo)) return PIPE_OK; ret = cc_unit_create_from_key(brw, &key, + reloc, &brw->cc.state_bo); if (ret) return ret; diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 157e6edf19..3f2b9701e6 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -75,6 +75,7 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) static enum pipe_error clip_unit_create_from_key(struct brw_context *brw, struct brw_clip_unit_key *key, + struct brw_winsys_reloc *reloc, struct brw_winsys_buffer **bo_out) { struct brw_clip_unit_state clip; @@ -82,7 +83,6 @@ clip_unit_create_from_key(struct brw_context *brw, memset(&clip, 0, sizeof(clip)); - clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; /* reloc */ clip.thread0.kernel_start_pointer = 0; @@ -144,36 +144,44 @@ clip_unit_create_from_key(struct brw_context *brw, ret = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT, key, sizeof(*key), - &brw->clip.prog_bo, 1, + reloc, 1, &clip, sizeof(clip), NULL, NULL, bo_out); if (ret) return ret; - /* Emit clip program relocation */ - assert(brw->clip.prog_bo); - ret = brw->sws->bo_emit_reloc(*bo_out, - BRW_USAGE_STATE, - clip.thread0.grf_reg_count << 1, - offsetof(struct brw_clip_unit_state, thread0), - brw->clip.prog_bo); - if (ret) - return ret; - return PIPE_OK; } static int upload_clip_unit( struct brw_context *brw ) { struct brw_clip_unit_key key; + struct brw_winsys_reloc reloc[1]; + unsigned grf_reg_count; enum pipe_error ret; clip_unit_populate_key(brw, &key); + grf_reg_count = align(key.total_grf, 16) / 16 - 1; + + /* clip program relocation + * + * XXX: these reloc structs are long lived and only need to be + * updated when the bound BO changes. Hopefully the stuff mixed in + * in the delta's is non-orthogonal. + */ + assert(brw->clip.prog_bo); + make_reloc(&reloc[0], + BRW_USAGE_STATE, + grf_reg_count << 1, + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_bo); + + if (brw_search_cache(&brw->cache, BRW_CLIP_UNIT, &key, sizeof(key), - &brw->clip.prog_bo, 1, + reloc, 1, NULL, &brw->clip.state_bo)) return PIPE_OK; @@ -181,6 +189,7 @@ static int upload_clip_unit( struct brw_context *brw ) /* Create new: */ ret = clip_unit_create_from_key(brw, &key, + reloc, &brw->clip.state_bo); if (ret) return ret; diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 177fe2172d..67fad0d9a5 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -383,8 +383,8 @@ struct brw_cache_item { GLuint hash; GLuint key_size; /* for variable-sized keys */ const void *key; - struct brw_winsys_buffer **reloc_bufs; - GLuint nr_reloc_bufs; + struct brw_winsys_reloc *relocs; + GLuint nr_relocs; struct brw_winsys_buffer *bo; GLuint data_size; diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index ca7774a7cc..0a5cfcc7cf 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -295,7 +295,8 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) brw->curbe.curbe_offset, BRW_DATA_OTHER, bufsz, - buf); + buf, + NULL, 0); } brw_add_validated_bo(brw, brw->curbe.curbe_bo); diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c index 36a99fd0e9..1b0de17aec 100644 --- a/src/gallium/drivers/i965/brw_gs_state.c +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -72,15 +72,18 @@ gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key) static enum pipe_error gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key, + struct brw_winsys_reloc *reloc, + unsigned nr_reloc, struct brw_winsys_buffer **bo_out) { struct brw_gs_unit_state gs; enum pipe_error ret; + memset(&gs, 0, sizeof(gs)); + /* maybe-reloc: populate the background */ gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; - /* reloc */ gs.thread0.kernel_start_pointer = 0; gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -108,22 +111,13 @@ gs_unit_create_from_key(struct brw_context *brw, ret = brw_upload_cache(&brw->cache, BRW_GS_UNIT, key, sizeof(*key), - &brw->gs.prog_bo, 1, + reloc, nr_reloc, &gs, sizeof(gs), NULL, NULL, bo_out); if (ret) return ret; - if (key->prog_active) { - /* Emit GS program relocation */ - brw->sws->bo_emit_reloc(*bo_out, - BRW_USAGE_STATE, - gs.thread0.grf_reg_count << 1, - offsetof(struct brw_gs_unit_state, thread0), - brw->gs.prog_bo); - } - return PIPE_OK; } @@ -131,17 +125,33 @@ static enum pipe_error prepare_gs_unit(struct brw_context *brw) { struct brw_gs_unit_key key; enum pipe_error ret; + struct brw_winsys_reloc reloc[1]; + unsigned nr_reloc = 0; + unsigned grf_reg_count; gs_unit_populate_key(brw, &key); + grf_reg_count = (align(key.total_grf, 16) / 16 - 1); + + /* GS program relocation */ + if (key.prog_active) { + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_STATE, + grf_reg_count << 1, + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_bo); + } + if (brw_search_cache(&brw->cache, BRW_GS_UNIT, &key, sizeof(key), - &brw->gs.prog_bo, 1, + reloc, nr_reloc, NULL, &brw->gs.state_bo)) return PIPE_OK; - ret = gs_unit_create_from_key(brw, &key, &brw->gs.state_bo); + ret = gs_unit_create_from_key(brw, &key, + reloc, nr_reloc, + &brw->gs.state_bo); if (ret) return ret; diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index 689483b4bc..a911482149 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -132,8 +132,9 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) } static enum pipe_error -sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, - struct brw_winsys_buffer **reloc_bufs, +sf_unit_create_from_key(struct brw_context *brw, + struct brw_sf_unit_key *key, + struct brw_winsys_reloc *reloc, struct brw_winsys_buffer **bo_out) { struct brw_sf_unit_state sf; @@ -141,7 +142,8 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, int chipset_max_threads; memset(&sf, 0, sizeof(sf)); - sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; + + sf.thread0.grf_reg_count = 0; /* reloc */ sf.thread0.kernel_start_pointer = 0; @@ -177,18 +179,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, /* CACHE_NEW_SF_VP */ /* reloc */ - sf.sf5.sf_viewport_state_offset = 0; - - sf.sf5.viewport_transform = 1; if (key->scissor) sf.sf6.scissor = 1; - if (key->front_face == PIPE_WINDING_CCW) - sf.sf5.front_winding = BRW_FRONTWINDING_CCW; - else - sf.sf5.front_winding = BRW_FRONTWINDING_CW; - switch (key->cull_mode) { case PIPE_WINDING_CCW: case PIPE_WINDING_CW: @@ -281,34 +275,13 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, ret = brw_upload_cache(&brw->cache, BRW_SF_UNIT, key, sizeof(*key), - reloc_bufs, 2, + reloc, 2, &sf, sizeof(sf), NULL, NULL, bo_out); if (ret) return ret; - /* STATE_PREFETCH command description describes this state as being - * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. - */ - /* Emit SF program relocation */ - ret = brw->sws->bo_emit_reloc(*bo_out, - BRW_USAGE_STATE, - sf.thread0.grf_reg_count << 1, - offsetof(struct brw_sf_unit_state, thread0), - brw->sf.prog_bo); - if (ret) - return ret; - - - /* Emit SF viewport relocation */ - ret = brw->sws->bo_emit_reloc(*bo_out, - BRW_USAGE_STATE, - sf.sf5.front_winding | (sf.sf5.viewport_transform << 1), - offsetof(struct brw_sf_unit_state, sf5), - brw->sf.vp_bo); - if (ret) - return ret; return PIPE_OK; } @@ -316,23 +289,47 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, static enum pipe_error upload_sf_unit( struct brw_context *brw ) { struct brw_sf_unit_key key; - struct brw_winsys_buffer *reloc_bufs[2]; + struct brw_winsys_reloc reloc[2]; + unsigned total_grf; + unsigned viewport_transform; + unsigned front_winding; enum pipe_error ret; sf_unit_populate_key(brw, &key); + + /* XXX: cut this crap and pre calculate the key: + */ + total_grf = (align(key.total_grf, 16) / 16 - 1); + viewport_transform = 1; + front_winding = (key.front_face == PIPE_WINDING_CCW ? + BRW_FRONTWINDING_CCW : + BRW_FRONTWINDING_CW); + + /* Emit SF program relocation */ + make_reloc(&reloc[0], + BRW_USAGE_STATE, + total_grf << 1, + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_bo); + + /* Emit SF viewport relocation */ + make_reloc(&reloc[1], + BRW_USAGE_STATE, + front_winding | (viewport_transform << 1), + offsetof(struct brw_sf_unit_state, sf5), + brw->sf.vp_bo); - reloc_bufs[0] = brw->sf.prog_bo; - reloc_bufs[1] = brw->sf.vp_bo; if (brw_search_cache(&brw->cache, BRW_SF_UNIT, &key, sizeof(key), - reloc_bufs, 2, + reloc, 2, NULL, &brw->sf.state_bo)) return PIPE_OK; - ret = sf_unit_create_from_key(brw, &key, reloc_bufs, + ret = sf_unit_create_from_key(brw, &key, + reloc, &brw->sf.state_bo); if (ret) return ret; diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index e219a1d870..97710abec3 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -109,24 +109,24 @@ void brw_destroy_state(struct brw_context *brw); enum pipe_error brw_cache_data(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, - struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, struct brw_winsys_buffer **bo_out ); enum pipe_error brw_cache_data_sz(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, GLuint data_size, - struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, struct brw_winsys_buffer **bo_out); enum pipe_error brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_sz, - struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, const void *data, GLuint data_sz, const void *aux, @@ -137,8 +137,8 @@ boolean brw_search_cache( struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, void *aux_return, struct brw_winsys_buffer **bo_out); diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c index f8369d31ec..16b643ceb2 100644 --- a/src/gallium/drivers/i965/brw_state_cache.c +++ b/src/gallium/drivers/i965/brw_state_cache.c @@ -47,7 +47,7 @@ * a safe point (unlock) we throw out all of the cache data and let it * regenerate for the next rendering operation. * - * The reloc_buf pointers need to be included as key data, otherwise the + * The reloc structs need to be included as key data, otherwise the * non-unique values stuffed in the offset in key data through * brw_cache_data() may result in successful probe for state buffers * even when the buffer being referenced doesn't match. The result would be @@ -73,7 +73,7 @@ static GLuint hash_key(const void *key, GLuint key_size, - struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs) + struct brw_winsys_reloc *relocs, GLuint nr_relocs) { GLuint *ikey = (GLuint *)key; GLuint hash = 0, i; @@ -88,8 +88,8 @@ hash_key(const void *key, GLuint key_size, } /* Include the BO pointers as key data as well */ - ikey = (GLuint *)reloc_bufs; - key_size = nr_reloc_bufs * sizeof(struct brw_winsys_buffer *); + ikey = (GLuint *)relocs; + key_size = nr_relocs * sizeof(struct brw_winsys_reloc); for (i = 0; i < key_size/4; i++) { hash ^= ikey[i]; hash = (hash << 5) | (hash >> 27); @@ -118,7 +118,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, static struct brw_cache_item * search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, GLuint hash, const void *key, GLuint key_size, - struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs) + struct brw_winsys_reloc *relocs, GLuint nr_relocs) { struct brw_cache_item *c; @@ -137,9 +137,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, c->hash == hash && c->key_size == key_size && memcmp(c->key, key, key_size) == 0 && - c->nr_reloc_bufs == nr_reloc_bufs && - memcmp(c->reloc_bufs, reloc_bufs, - nr_reloc_bufs * sizeof(struct brw_winsys_buffer *)) == 0) + c->nr_relocs == nr_relocs && + memcmp(c->relocs, relocs, nr_relocs * sizeof *relocs) == 0) return c; } @@ -178,16 +177,16 @@ brw_search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, void *aux_return, struct brw_winsys_buffer **bo_out) { struct brw_cache_item *item; - GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); + GLuint hash = hash_key(key, key_size, relocs, nr_relocs); item = search_cache(cache, cache_id, hash, key, key_size, - reloc_bufs, nr_reloc_bufs); + relocs, nr_relocs); if (item) { if (aux_return) @@ -207,8 +206,8 @@ brw_upload_cache( struct brw_cache *cache, enum brw_cache_id cache_id, const void *key, GLuint key_size, - struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, const void *data, GLuint data_size, const void *aux, @@ -216,8 +215,8 @@ brw_upload_cache( struct brw_cache *cache, struct brw_winsys_buffer **bo_out) { struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); - GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs); - GLuint relocs_size = nr_reloc_bufs * sizeof(struct brw_winsys_buffer *); + GLuint hash = hash_key(key, key_size, relocs, nr_relocs); + GLuint relocs_size = nr_relocs * sizeof relocs[0]; GLuint aux_size = cache->aux_size[cache_id]; enum pipe_error ret; void *tmp; @@ -236,23 +235,22 @@ brw_upload_cache( struct brw_cache *cache, return ret; - /* Set up the memory containing the key, aux_data, and reloc_bufs */ + /* Set up the memory containing the key, aux_data, and relocs */ tmp = MALLOC(key_size + aux_size + relocs_size); memcpy(tmp, key, key_size); memcpy((char *)tmp + key_size, aux, cache->aux_size[cache_id]); - memcpy((char *)tmp + key_size + aux_size, reloc_bufs, relocs_size); - for (i = 0; i < nr_reloc_bufs; i++) { - if (reloc_bufs[i] != NULL) - p_atomic_inc(&reloc_bufs[i]->reference.count); + memcpy((char *)tmp + key_size + aux_size, relocs, relocs_size); + for (i = 0; i < nr_relocs; i++) { + p_atomic_inc(&relocs[i].bo->reference.count); } item->cache_id = cache_id; item->key = tmp; item->hash = hash; item->key_size = key_size; - item->reloc_bufs = (struct brw_winsys_buffer **)((char *)tmp + key_size + aux_size); - item->nr_reloc_bufs = nr_reloc_bufs; + item->relocs = (struct brw_winsys_reloc *)((char *)tmp + key_size + aux_size); + item->nr_relocs = nr_relocs; bo_reference( &item->bo, *bo_out ); item->data_size = data_size; @@ -275,9 +273,12 @@ brw_upload_cache( struct brw_cache *cache, data_size, cache_id); /* Copy data to the buffer */ - cache->sws->bo_subdata(item->bo, - cache_id, - 0, data_size, data); + ret = cache->sws->bo_subdata(item->bo, + cache_id, + 0, data_size, data, + relocs, nr_relocs); + if (ret) + return ret; update_cache_last(cache, cache_id, item->bo); @@ -293,15 +294,15 @@ brw_cache_data_sz(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, GLuint data_size, - struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, struct brw_winsys_buffer **bo_out) { struct brw_cache_item *item; - GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs); + GLuint hash = hash_key(data, data_size, relocs, nr_relocs); item = search_cache(cache, cache_id, hash, data, data_size, - reloc_bufs, nr_reloc_bufs); + relocs, nr_relocs); if (item) { update_cache_last(cache, cache_id, item->bo); @@ -311,7 +312,7 @@ brw_cache_data_sz(struct brw_cache *cache, return brw_upload_cache(cache, cache_id, data, data_size, - reloc_bufs, nr_reloc_bufs, + relocs, nr_relocs, data, data_size, NULL, NULL, bo_out); @@ -321,20 +322,22 @@ brw_cache_data_sz(struct brw_cache *cache, /** * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. * - * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be + * If nr_relocs is nonzero, brw_search_cache()/brw_upload_cache() would be * better to use, as the potentially changing offsets in the data-used-as-key * will result in excessive cache misses. + * + * XXX: above is no longer true -- can we remove some code? */ enum pipe_error brw_cache_data(struct brw_cache *cache, enum brw_cache_id cache_id, const void *data, - struct brw_winsys_buffer **reloc_bufs, - GLuint nr_reloc_bufs, + struct brw_winsys_reloc *relocs, + GLuint nr_relocs, struct brw_winsys_buffer **bo_out) { return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id], - reloc_bufs, nr_reloc_bufs, bo_out); + relocs, nr_relocs, bo_out); } @@ -510,8 +513,8 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) next = c->next; - for (j = 0; j < c->nr_reloc_bufs; j++) - bo_reference(&c->reloc_bufs[j], NULL); + for (j = 0; j < c->nr_relocs; j++) + bo_reference(&c->relocs[j].bo, NULL); bo_reference(&c->bo, NULL); FREE((void *)c->key); @@ -555,8 +558,8 @@ brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo) *prev = c->next; - for (j = 0; j < c->nr_reloc_bufs; j++) - bo_reference(&c->reloc_bufs[j], NULL); + for (j = 0; j < c->nr_relocs; j++) + bo_reference(&c->relocs[j].bo, NULL); bo_reference(&c->bo, NULL); diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c index a5b30eba47..0b44f39f4d 100644 --- a/src/gallium/drivers/i965/brw_vs_state.c +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -81,6 +81,7 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) static enum pipe_error vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key, + struct brw_winsys_reloc *reloc, struct brw_winsys_buffer **bo_out) { enum pipe_error ret; @@ -145,22 +146,13 @@ vs_unit_create_from_key(struct brw_context *brw, ret = brw_upload_cache(&brw->cache, BRW_VS_UNIT, key, sizeof(*key), - &brw->vs.prog_bo, 1, + reloc, Elements(reloc), &vs, sizeof(vs), NULL, NULL, bo_out); if (ret) return ret; - /* Emit VS program relocation */ - ret = brw->sws->bo_emit_reloc(*bo_out, - BRW_USAGE_STATE, - vs.thread0.grf_reg_count << 1, - offsetof(struct brw_vs_unit_state, thread0), - brw->vs.prog_bo); - if (ret) - return ret; - return PIPE_OK; } @@ -168,17 +160,29 @@ static int prepare_vs_unit(struct brw_context *brw) { struct brw_vs_unit_key key; enum pipe_error ret; + struct brw_winsys_reloc reloc[1]; + unsigned grf_reg_count; vs_unit_populate_key(brw, &key); + grf_reg_count = (align(key.total_grf, 16) / 16 - 1); + + /* Emit VS program relocation */ + make_reloc(&reloc[0], + BRW_USAGE_STATE, + grf_reg_count << 1, + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_bo); + + if (brw_search_cache(&brw->cache, BRW_VS_UNIT, &key, sizeof(key), - &brw->vs.prog_bo, 1, + reloc, 1, NULL, &brw->vs.state_bo)) return PIPE_OK; - ret = vs_unit_create_from_key(brw, &key, &brw->vs.state_bo); + ret = vs_unit_create_from_key(brw, &key, reloc, &brw->vs.state_bo); if (ret) return ret; diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c index b12df0ec03..aaf2a44f61 100644 --- a/src/gallium/drivers/i965/brw_vs_surface_state.c +++ b/src/gallium/drivers/i965/brw_vs_surface_state.c @@ -65,7 +65,8 @@ brw_vs_update_constant_buffer(struct brw_context *brw) size, 64); /* _NEW_PROGRAM_CONSTANTS */ - dri_bo_subdata(const_buffer, 0, size, params->ParameterValues); + brw->sws->bo_subdata(const_buffer, 0, size, params->ParameterValues, + NULL, 0); return const_buffer; } @@ -145,51 +146,31 @@ brw_vs_get_binding_table(struct brw_context *brw, struct brw_winsys_buffer **bo_out) { #if 0 - if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, BRW_VS_MAX_SURF, - NULL, - bo_out)) - { - return PIPE_OK; - } - else { - GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint); - uint32_t *data = malloc(data_size); - int i; - - for (i = 0; i < BRW_VS_MAX_SURF; i++) - if (brw->vs.surf_bo[i]) - data[i] = brw->vs.surf_bo[i]->offset; - else - data[i] = 0; - - ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, - NULL, 0, - brw->vs.surf_bo, BRW_VS_MAX_SURF, - data, data_size, - NULL, NULL, - bo_out); - if (ret) - return ret; - - /* Emit binding table relocations to surface state */ - for (i = 0; i < BRW_VS_MAX_SURF; i++) { - if (brw->vs.surf_bo[i] != NULL) { - /* The presumed offsets were set in the data values for - * brw_upload_cache. - */ - ret = sws->bo_emit_reloc(*bo_out, i * 4, - brw->vs.surf_bo[i], 0, - BRW_USAGE_STATE); - if (ret) - return ret; - } - } + static GLuint data[BRW_VS_MAX_SURF]; /* always zero */ + struct brw_winsys_reloc reloc[BRW_VS_MAX_SURF]; + int i; - FREE(data); - return PIPE_OK; + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + make_reloc(&reloc[i], + BRW_USAGE_STATE, + 0, + i * 4, + brw->vs.surf_bo[i]); } + + ret = brw_cache_data( &brw->surface_cache, + BRW_SS_SURF_BIND, + NULL, 0, + reloc, Elements(reloc), + data, sizeof data, + NULL, NULL, + bo_out); + if (ret) + return ret; + + FREE(data); + return PIPE_OK; #else return PIPE_OK; #endif diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index e72b928b06..2da660a1e6 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -111,6 +111,30 @@ enum brw_buffer_data_type { }; +/* Relocations to be applied with subdata in a call to sws->bo_subdata, below. + * + * Effectively this encodes: + * + * (unsigned *)(subdata + offset) = bo->offset + delta + */ +struct brw_winsys_reloc { + enum brw_buffer_usage usage; /* debug only */ + unsigned delta; + unsigned offset; + struct brw_winsys_buffer *bo; +}; + +static INLINE void make_reloc( struct brw_winsys_reloc *reloc, + enum brw_buffer_usage usage, + unsigned delta, + unsigned offset, + struct brw_winsys_buffer *bo) +{ + reloc->usage = usage; + reloc->delta = delta; + reloc->offset = offset; + reloc->bo = bo; /* Note - note taking a reference yet */ +} @@ -151,7 +175,9 @@ struct brw_winsys_screen { enum brw_buffer_data_type data_type, size_t offset, size_t size, - const void *data); + const void *data, + const struct brw_winsys_reloc *reloc, + unsigned nr_reloc ); boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer); boolean (*bo_references)(struct brw_winsys_buffer *a, diff --git a/src/gallium/drivers/i965/brw_wm_constant_buffer.c b/src/gallium/drivers/i965/brw_wm_constant_buffer.c index 14568265dd..6434c6acf7 100644 --- a/src/gallium/drivers/i965/brw_wm_constant_buffer.c +++ b/src/gallium/drivers/i965/brw_wm_constant_buffer.c @@ -13,16 +13,24 @@ brw_create_constant_surface( struct brw_context *brw, { const GLint w = key->width - 1; struct brw_winsys_buffer *bo; + struct brw_winsys_reloc reloc[1]; enum pipe_error ret; + /* Emit relocation to surface contents */ + make_reloc(&reloc[0], + BRW_USAGE_SAMPLER, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + + memset(&surf, 0, sizeof(surf)); surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf.ss0.surface_type = BRW_SURFACE_BUFFER; surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; - assert(key->bo); - surf.ss1.base_addr = key->bo->offset; /* reloc */ + surf.ss1.base_addr = 0; /* reloc */ surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ @@ -32,24 +40,13 @@ brw_create_constant_surface( struct brw_context *brw, ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, key, sizeof(*key), - &key->bo, key->bo ? 1 : 0, + reloc, Elements(reloc), &surf, sizeof(surf), NULL, NULL, &bo_out); if (ret) return ret; - if (key->bo) { - /* Emit relocation to surface contents */ - ret = brw->sws->bo_emit_reloc(*bo_out, - BRW_USAGE_SAMPLER, - 0, - offsetof(struct brw_surface_state, ss1), - key->bo); - if (ret) - return ret; - } - return PIPE_OK; } diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c index 174836b39d..4e99ac703a 100644 --- a/src/gallium/drivers/i965/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c @@ -165,6 +165,7 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw) static int upload_wm_samplers( struct brw_context *brw ) { struct wm_sampler_key key; + struct brw_winsys_reloc reloc[BRW_MAX_TEX_UNIT]; enum pipe_error ret; int i; @@ -181,9 +182,20 @@ static int upload_wm_samplers( struct brw_context *brw ) return PIPE_OK; } + /* Emit SDC relocations */ + for (i = 0; i < key.sampler_count; i++) { + make_reloc( &reloc[i], + BRW_USAGE_SAMPLER, + 0, + i * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + brw->wm.sdc_bo[i]); + } + + if (brw_search_cache(&brw->cache, BRW_SAMPLER, &key, sizeof(key), - brw->wm.sdc_bo, key.sampler_count, + reloc, key.sampler_count, NULL, &brw->wm.sampler_bo)) return PIPE_OK; @@ -193,24 +205,13 @@ static int upload_wm_samplers( struct brw_context *brw ) */ ret = brw_upload_cache(&brw->cache, BRW_SAMPLER, &key, sizeof(key), - brw->wm.sdc_bo, key.sampler_count, + reloc, key.sampler_count, &key.sampler, sizeof(key.sampler), NULL, NULL, &brw->wm.sampler_bo); if (ret) return ret; - /* Emit SDC relocations */ - for (i = 0; i < key.sampler_count; i++) { - ret = brw->sws->bo_emit_reloc(brw->wm.sampler_bo, - BRW_USAGE_SAMPLER, - 0, - i * sizeof(struct brw_sampler_state) + - offsetof(struct brw_sampler_state, ss2), - brw->wm.sdc_bo[i]); - if (ret) - return ret; - } return 0; } diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index 56789ce7a4..d8e88237ce 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -144,8 +144,36 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, struct brw_winsys_buffer **bo_out) { struct brw_wm_unit_state wm; + struct brw_winsys_reloc reloc[3]; + unsigned nr_reloc = 0; enum pipe_error ret; + /* Emit WM program relocation */ + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_STATE, + wm.thread0.grf_reg_count << 1, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); + + /* Emit scratch space relocation */ + if (key->total_scratch != 0) { + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_SCRATCH, + wm.thread2.per_thread_scratch_space, + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.scratch_bo); + } + + /* Emit sampler state relocation */ + if (key->sampler_count != 0) { + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_STATE, + wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); + } + + memset(&wm, 0, sizeof(wm)); wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; @@ -220,44 +248,13 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, ret = brw_upload_cache(&brw->cache, BRW_WM_UNIT, key, sizeof(*key), - reloc_bufs, 3, + reloc, nr_reloc, &wm, sizeof(wm), NULL, NULL, bo_out); if (ret) return ret; - /* Emit WM program relocation */ - ret = brw->sws->bo_emit_reloc(*bo_out, - BRW_USAGE_STATE, - wm.thread0.grf_reg_count << 1, - offsetof(struct brw_wm_unit_state, thread0), - brw->wm.prog_bo); - if (ret) - return ret; - - /* Emit scratch space relocation */ - if (key->total_scratch != 0) { - ret = brw->sws->bo_emit_reloc(*bo_out, - BRW_USAGE_SCRATCH, - wm.thread2.per_thread_scratch_space, - offsetof(struct brw_wm_unit_state, thread2), - brw->wm.scratch_bo); - if (ret) - return ret; - } - - /* Emit sampler state relocation */ - if (key->sampler_count != 0) { - ret = brw->sws->bo_emit_reloc(*bo_out, - BRW_USAGE_STATE, - wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), - offsetof(struct brw_wm_unit_state, wm4), - brw->wm.sampler_bo); - if (ret) - return ret; - } - return PIPE_OK; } diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index ed365b03b9..f882331433 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -45,33 +45,32 @@ brw_update_texture_surface( struct brw_context *brw, struct brw_texture *tex, struct brw_winsys_buffer **bo_out) { + struct brw_winsys_reloc reloc[1]; enum pipe_error ret; + /* Emit relocation to surface contents */ + make_reloc(&reloc[0], + BRW_USAGE_SAMPLER, + 0, + offsetof(struct brw_surface_state, ss1), + tex->bo); + if (brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &tex->ss, sizeof tex->ss, - &tex->bo, 1, + reloc, Elements(reloc), NULL, bo_out)) return PIPE_OK; ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, &tex->ss, sizeof tex->ss, - &tex->bo, 1, + reloc, Elements(reloc), &tex->ss, sizeof tex->ss, NULL, NULL, bo_out); if (ret) return ret; - - /* Emit relocation to surface contents */ - ret = brw->sws->bo_emit_reloc(*bo_out, - BRW_USAGE_SAMPLER, - 0, - offsetof(struct brw_surface_state, ss1), - tex->bo); - if (ret) - return ret; return PIPE_OK; } @@ -95,8 +94,17 @@ brw_update_render_surface(struct brw_context *brw, { struct brw_surf_ss0 blend_ss0 = brw->curr.blend->ss0; struct brw_surface_state ss; + struct brw_winsys_reloc reloc[1]; enum pipe_error ret; + /* XXX: we will only be rendering to this surface: + */ + make_reloc(&reloc[0], + BRW_USAGE_RENDER_TARGET, + 0, + offsetof(struct brw_surface_state, ss1), + surface->bo); + /* Surfaces are potentially shared between contexts, so can't * scribble the in-place ss0 value in the surface. */ @@ -111,7 +119,7 @@ brw_update_render_surface(struct brw_context *brw, if (brw_search_cache(&brw->surface_cache, BRW_SS_SURFACE, &ss, sizeof(ss), - &surface->bo, 1, + reloc, Elements(reloc), NULL, bo_out)) return PIPE_OK; @@ -119,23 +127,13 @@ brw_update_render_surface(struct brw_context *brw, ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE, &ss, sizeof ss, - &surface->bo, 1, + reloc, Elements(reloc), &ss, sizeof ss, NULL, NULL, bo_out); if (ret) return ret; - /* XXX: we will only be rendering to this surface: - */ - ret = brw->sws->bo_emit_reloc(*bo_out, - BRW_USAGE_RENDER_TARGET, - 0, - offsetof(struct brw_surface_state, ss1), - surface->bo); - if (ret) - return ret; - return PIPE_OK; } @@ -149,6 +147,7 @@ brw_wm_get_binding_table(struct brw_context *brw, struct brw_winsys_buffer **bo_out ) { enum pipe_error ret; + struct brw_winsys_reloc reloc[BRW_WM_MAX_SURF]; uint32_t data[BRW_WM_MAX_SURF]; GLuint data_size = brw->wm.nr_surfaces * sizeof data[0]; int i; @@ -156,13 +155,21 @@ brw_wm_get_binding_table(struct brw_context *brw, assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); assert(brw->wm.nr_surfaces > 0); + /* Emit binding table relocations to surface state */ + for (i = 0; i < brw->wm.nr_surfaces; i++) { + make_reloc(&reloc[i], + BRW_USAGE_STATE, + 0, + i * sizeof(GLuint), + brw->wm.surf_bo[i]); + } + /* Note there is no key for this search beyond the values in the * relocation array: */ if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, - brw->wm.surf_bo, - brw->wm.nr_surfaces, + reloc, brw->wm.nr_surfaces, NULL, bo_out)) return PIPE_OK; @@ -175,24 +182,13 @@ brw_wm_get_binding_table(struct brw_context *brw, ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, - brw->wm.surf_bo, brw->wm.nr_surfaces, + reloc, brw->wm.nr_surfaces, data, data_size, NULL, NULL, bo_out); if (ret) return ret; - /* Emit binding table relocations to surface state */ - for (i = 0; i < brw->wm.nr_surfaces; i++) { - ret = brw->sws->bo_emit_reloc(*bo_out, - BRW_USAGE_STATE, - 0, - i * sizeof(GLuint), - brw->wm.surf_bo[i]); - if (ret) - return ret; - } - return PIPE_OK; } diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c index ab5df56bc0..ce6d85976d 100644 --- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c +++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c @@ -47,6 +47,10 @@ #define MAX_VRAM (128*1024*1024) +#define MAX_DUMPS 128 + + + extern int brw_disasm (FILE *file, const struct brw_instruction *inst, unsigned count ); @@ -294,21 +298,36 @@ xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer, enum brw_buffer_data_type data_type, size_t offset, size_t size, - const void *data) + const void *data, + const struct brw_winsys_reloc *reloc, + unsigned nr_relocs) { struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer); struct xlib_brw_winsys *xbw = xlib_brw_winsys(buffer->sws); + unsigned i; - debug_printf("%s buf %p off %d sz %d %s\n", + debug_printf("%s buf %p off %d sz %d %s relocs: %d\n", __FUNCTION__, - (void *)buffer, offset, size, data_types[data_type]); - - if (1) - dump_data( xbw, data_type, data, size ); + (void *)buffer, offset, size, + data_types[data_type], + nr_relocs); assert(buf->base.size >= offset + size); memcpy(buf->virtual + offset, data, size); + /* Apply the relocations: + */ + for (i = 0; i < nr_relocs; i++) { + debug_printf("\treloc[%d] usage %s off %d value %x+%x\n", + i, usages[reloc[i].usage], reloc[i].offset, + xlib_brw_buffer(reloc[i].bo)->offset, reloc[i].delta); + + *(unsigned *)(buf->virtual + offset + reloc[i].offset) = + xlib_brw_buffer(reloc[i].bo)->offset + reloc[i].delta; + } + + if (1) + dump_data( xbw, data_type, buf->virtual + offset, size ); return 0; } -- cgit v1.2.3 From a49ccf0fd25575c4e40398e5d22f3931e80921f8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 07:47:07 +0000 Subject: i965g: restore code to populate the relocation background I'm emitting this in two places now, to the data presented for upload and also in the delta field of the reloc struct. Probably want to remove the delta field and just pull the background from the key. --- src/gallium/drivers/i965/brw_clip_state.c | 1 + src/gallium/drivers/i965/brw_gs_state.c | 2 +- src/gallium/drivers/i965/brw_sf_state.c | 11 +++++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 3f2b9701e6..467364e884 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -83,6 +83,7 @@ clip_unit_create_from_key(struct brw_context *brw, memset(&clip, 0, sizeof(clip)); + clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; /* reloc */ clip.thread0.kernel_start_pointer = 0; diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c index 1b0de17aec..b64ec286ce 100644 --- a/src/gallium/drivers/i965/brw_gs_state.c +++ b/src/gallium/drivers/i965/brw_gs_state.c @@ -82,7 +82,7 @@ gs_unit_create_from_key(struct brw_context *brw, memset(&gs, 0, sizeof(gs)); - /* maybe-reloc: populate the background */ + /* reloc */ gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; gs.thread0.kernel_start_pointer = 0; diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index a911482149..e412669844 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -142,8 +142,7 @@ sf_unit_create_from_key(struct brw_context *brw, int chipset_max_threads; memset(&sf, 0, sizeof(sf)); - - sf.thread0.grf_reg_count = 0; + sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; /* reloc */ sf.thread0.kernel_start_pointer = 0; @@ -179,10 +178,18 @@ sf_unit_create_from_key(struct brw_context *brw, /* CACHE_NEW_SF_VP */ /* reloc */ + sf.sf5.sf_viewport_state_offset = 0; + + sf.sf5.viewport_transform = 1; if (key->scissor) sf.sf6.scissor = 1; + if (key->front_face == PIPE_WINDING_CCW) + sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + else + sf.sf5.front_winding = BRW_FRONTWINDING_CW; + switch (key->cull_mode) { case PIPE_WINDING_CCW: case PIPE_WINDING_CW: -- cgit v1.2.3 From b9bb41321a9add139cd1dbddcf48e6c81c9d019d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 08:27:43 +0000 Subject: i965g: scissor off by one --- src/gallium/drivers/i965/brw_sf_state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index e412669844..955478e624 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -58,9 +58,9 @@ static enum pipe_error upload_sf_vp(struct brw_context *brw) sfv.viewport.m32 = vp->translate[2]; sfv.scissor.xmin = scissor->minx; - sfv.scissor.xmax = scissor->maxx; /* -1 ?? */ + sfv.scissor.xmax = scissor->maxx - 1; /* ? */ sfv.scissor.ymin = scissor->miny; - sfv.scissor.ymax = scissor->maxy; /* -1 ?? */ + sfv.scissor.ymax = scissor->maxy - 1; /* ? */ ret = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0, &brw->sf.vp_bo ); -- cgit v1.2.3 From b8e63e92102b6ca0b5ce06685590232a3a47d1ea Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 08:28:17 +0000 Subject: i965g: point_rast_rule comment no longer applies Not sure exactly what state we want here now, will need to experiment. --- src/gallium/drivers/i965/brw_sf_state.c | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c index 955478e624..25dc2b52e0 100644 --- a/src/gallium/drivers/i965/brw_sf_state.c +++ b/src/gallium/drivers/i965/brw_sf_state.c @@ -221,32 +221,10 @@ sf_unit_create_from_key(struct brw_context *brw, /* XXX: gl_rasterization_rules? something else? */ - if (0) { - /* Rendering to an OpenGL window */ - sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; - } - else { - /* If rendering to an FBO, the pixel coordinate system is - * inverted with respect to the normal OpenGL coordinate - * system, so BRW_RASTRULE_LOWER_RIGHT is correct. - * But this value is listed as "Reserved, but not seen as useful" - * in Intel documentation (page 212, "Point Rasterization Rule", - * section 7.4 "SF Pipeline State Summary", of document - * "Intel® 965 Express Chipset Family and Intel® G35 Express - * Chipset Graphics Controller Programmer's Reference Manual, - * Volume 2: 3D/Media", Revision 1.0b as of January 2008, - * available at - * http://intellinuxgraphics.org/documentation.html - * at the time of this writing). - * - * It does work on at least some devices, if not all; - * if devices that don't support it can be identified, - * the likely failure case is that points are rasterized - * incorrectly, which is no worse than occurs without - * the value, so we're using it here. - */ - sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT; - } + sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT; + sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT; + sf.sf6.point_rast_rule = 1; + /* XXX clamp max depends on AA vs. non-AA */ /* _NEW_POINT */ -- cgit v1.2.3 From 4a3e24522b0538cb3802c59c22d6f3660c4491be Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 08:29:09 +0000 Subject: i965g: populate wm reloc array earlier Still have to calculate the reloc background in two places. --- src/gallium/drivers/i965/brw_wm_state.c | 82 ++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 36 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c index d8e88237ce..ee970ac75b 100644 --- a/src/gallium/drivers/i965/brw_wm_state.c +++ b/src/gallium/drivers/i965/brw_wm_state.c @@ -140,40 +140,13 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) */ static enum pipe_error wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, - struct brw_winsys_buffer **reloc_bufs, + struct brw_winsys_reloc *reloc, + unsigned nr_reloc, struct brw_winsys_buffer **bo_out) { struct brw_wm_unit_state wm; - struct brw_winsys_reloc reloc[3]; - unsigned nr_reloc = 0; enum pipe_error ret; - /* Emit WM program relocation */ - make_reloc(&reloc[nr_reloc++], - BRW_USAGE_STATE, - wm.thread0.grf_reg_count << 1, - offsetof(struct brw_wm_unit_state, thread0), - brw->wm.prog_bo); - - /* Emit scratch space relocation */ - if (key->total_scratch != 0) { - make_reloc(&reloc[nr_reloc++], - BRW_USAGE_SCRATCH, - wm.thread2.per_thread_scratch_space, - offsetof(struct brw_wm_unit_state, thread2), - brw->wm.scratch_bo); - } - - /* Emit sampler state relocation */ - if (key->sampler_count != 0) { - make_reloc(&reloc[nr_reloc++], - BRW_USAGE_STATE, - wm.wm4.stats_enable | (wm.wm4.sampler_count << 2), - offsetof(struct brw_wm_unit_state, wm4), - brw->wm.sampler_bo); - } - - memset(&wm, 0, sizeof(wm)); wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1; @@ -243,7 +216,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.wm5.line_stipple = key->line_stipple; - if (BRW_DEBUG & DEBUG_STATS || key->stats_wm) + if ((BRW_DEBUG & DEBUG_STATS) || key->stats_wm) wm.wm4.stats_enable = 1; ret = brw_upload_cache(&brw->cache, BRW_WM_UNIT, @@ -262,11 +235,17 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, static enum pipe_error upload_wm_unit( struct brw_context *brw ) { struct brw_wm_unit_key key; - struct brw_winsys_buffer *reloc_bufs[3]; + struct brw_winsys_reloc reloc[3]; + unsigned nr_reloc = 0; enum pipe_error ret; + unsigned grf_reg_count; + unsigned per_thread_scratch_space; + unsigned stats_enable; + unsigned sampler_count; wm_unit_populate_key(brw, &key); + /* Allocate the necessary scratch space if we haven't already. Don't * bother reducing the allocation later, since we use scratch so * rarely. @@ -291,18 +270,49 @@ static enum pipe_error upload_wm_unit( struct brw_context *brw ) } } - reloc_bufs[0] = brw->wm.prog_bo; - reloc_bufs[1] = brw->wm.scratch_bo; - reloc_bufs[2] = brw->wm.sampler_bo; + + /* XXX: temporary: + */ + grf_reg_count = (align(key.total_grf, 16) / 16 - 1); + per_thread_scratch_space = key.total_scratch / 1024 - 1; + stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm; + sampler_count = BRW_IS_IGDNG(brw) ? 0 :(key.sampler_count + 1) / 4; + + /* Emit WM program relocation */ + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_STATE, + grf_reg_count << 1, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); + + /* Emit scratch space relocation */ + if (key.total_scratch != 0) { + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_SCRATCH, + per_thread_scratch_space, + offsetof(struct brw_wm_unit_state, thread2), + brw->wm.scratch_bo); + } + + /* Emit sampler state relocation */ + if (key.sampler_count != 0) { + make_reloc(&reloc[nr_reloc++], + BRW_USAGE_STATE, + stats_enable | (sampler_count << 2), + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); + } + if (brw_search_cache(&brw->cache, BRW_WM_UNIT, &key, sizeof(key), - reloc_bufs, 3, + reloc, nr_reloc, NULL, &brw->wm.state_bo)) return PIPE_OK; - ret = wm_unit_create_from_key(brw, &key, reloc_bufs, + ret = wm_unit_create_from_key(brw, &key, + reloc, nr_reloc, &brw->wm.state_bo); if (ret) return ret; -- cgit v1.2.3 From 018e2250b860df75485d1c7741dfa010c39ae6f1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 08:33:10 +0000 Subject: i965g: remove/disable inactive state atoms --- src/gallium/drivers/i965/brw_misc_state.c | 2 +- src/gallium/drivers/i965/brw_screen.c | 1 + src/gallium/drivers/i965/brw_state.h | 2 -- src/gallium/drivers/i965/brw_state_upload.c | 4 +--- 4 files changed, 3 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index 8e35f9ad1d..ce3e48f360 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -404,7 +404,7 @@ static int upload_invarient_state( struct brw_context *brw ) BRW_BATCH_STRUCT(brw, &sip); } - + /* VF Statistics */ { struct brw_vf_statistics vfs; memset(&vfs, 0, sizeof(vfs)); diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 7991f4ae52..04a3f81bcf 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -291,6 +291,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) #ifdef DEBUG BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0); BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0); + BRW_DEBUG |= DEBUG_STATS; #endif memset(&chipset, 0, sizeof chipset); diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index 97710abec3..a9b8165495 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -52,7 +52,6 @@ brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo) const struct brw_tracked_state brw_blend_constant_color; const struct brw_tracked_state brw_cc_unit; const struct brw_tracked_state brw_cc_vp; -const struct brw_tracked_state brw_check_fallback; const struct brw_tracked_state brw_clip_prog; const struct brw_tracked_state brw_clip_unit; const struct brw_tracked_state brw_curbe_buffer; @@ -65,7 +64,6 @@ const struct brw_tracked_state brw_aa_line_parameters; const struct brw_tracked_state brw_pipelined_state_pointers; const struct brw_tracked_state brw_binding_table_pointers; const struct brw_tracked_state brw_depthbuffer; -const struct brw_tracked_state brw_polygon_stipple_offset; const struct brw_tracked_state brw_polygon_stipple; const struct brw_tracked_state brw_program_parameters; const struct brw_tracked_state brw_recalculate_urb_fence; diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index fdcdd59129..233dce03df 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -38,8 +38,6 @@ const struct brw_tracked_state *atoms[] = { - &brw_check_fallback, - // &brw_wm_input_sizes, &brw_vs_prog, &brw_gs_prog, @@ -58,7 +56,7 @@ const struct brw_tracked_state *atoms[] = &brw_cc_unit, &brw_vs_surfaces, /* must do before unit */ - &brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ + //&brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ &brw_wm_surfaces, /* must do before samplers and unit */ &brw_wm_samplers, -- cgit v1.2.3 From 0e80e4ea7576733ede13f156a1dce644b1e6df89 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 08:54:01 +0000 Subject: i965g: make sure blend color packet header is initialized We will emit this packet at startup (dirty == ~0), even if we haven't had the state tracker call into brw_set_blend_color() yet. This way is a little more efficient also. --- src/gallium/drivers/i965/brw_pipe_blend.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c index 872151222d..b759a910b6 100644 --- a/src/gallium/drivers/i965/brw_pipe_blend.c +++ b/src/gallium/drivers/i965/brw_pipe_blend.c @@ -177,9 +177,6 @@ static void brw_set_blend_color(struct pipe_context *pipe, struct brw_context *brw = brw_context(pipe); struct brw_blend_constant_color *bcc = &brw->curr.bcc; - memset(bcc, 0, sizeof(*bcc)); - bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR; - bcc->header.length = sizeof(*bcc)/4-2; bcc->blend_constant_color[0] = blend_color->color[0]; bcc->blend_constant_color[1] = blend_color->color[1]; bcc->blend_constant_color[2] = blend_color->color[2]; @@ -195,6 +192,15 @@ void brw_pipe_blend_init( struct brw_context *brw ) brw->base.create_blend_state = brw_create_blend_state; brw->base.bind_blend_state = brw_bind_blend_state; brw->base.delete_blend_state = brw_delete_blend_state; + + { + struct brw_blend_constant_color *bcc = &brw->curr.bcc; + + memset(bcc, 0, sizeof(*bcc)); + bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc->header.length = sizeof(*bcc)/4-2; + } + } void brw_pipe_blend_cleanup( struct brw_context *brw ) -- cgit v1.2.3 From fc7fa678f55d15b032e3c9053a22c811e2de4cde Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 08:57:02 +0000 Subject: i965g: populate brw_context chipset id --- src/gallium/drivers/i965/brw_context.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index 8e1421e738..f85116a568 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -114,6 +114,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen) brw->base.screen = screen; brw->base.destroy = brw_destroy_context; brw->sws = brw_screen(screen)->sws; + brw->chipset = brw_screen(screen)->chipset; brw_pipe_blend_init( brw ); brw_pipe_depth_stencil_init( brw ); -- cgit v1.2.3 From c22b47ebb1f00d43fe74d57a1e727fa70c4bf970 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 08:57:40 +0000 Subject: i965g: don't emit line stipple packet if stipple disabled --- src/gallium/drivers/i965/brw_misc_state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index ce3e48f360..5ee87bcac0 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -344,7 +344,9 @@ const struct brw_tracked_state brw_polygon_stipple = { static int upload_line_stipple(struct brw_context *brw) { const struct brw_line_stipple *bls = &brw->curr.rast->bls; - BRW_CACHED_BATCH_STRUCT(brw, bls); + if (bls->header.opcode) { + BRW_CACHED_BATCH_STRUCT(brw, bls); + } return 0; } -- cgit v1.2.3 From b216f1aa474196661aacbaf29604659172d1a74e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 09:35:25 +0000 Subject: i965g: use curr.vertex_element state directly --- src/gallium/drivers/i965/brw_context.h | 5 ----- src/gallium/drivers/i965/brw_draw_upload.c | 13 +++++++------ 2 files changed, 7 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 67fad0d9a5..34799d5211 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -572,12 +572,7 @@ struct brw_context struct brw_winsys_buffer *bo; } vb[PIPE_MAX_ATTRIBS]; - struct { - int dummy; - } ve[PIPE_MAX_ATTRIBS]; - unsigned nr_vb; /* currently the same as curr.num_vertex_buffers */ - unsigned nr_ve; /* currently the same as curr.num_vertex_elements */ } vb; struct { diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index 188605a0c1..f50ce3005d 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -317,6 +317,7 @@ static int brw_emit_vertex_buffers( struct brw_context *brw ) static int brw_emit_vertex_elements(struct brw_context *brw) { + GLuint nr = brw->curr.num_vertex_elements; GLuint i; brw_emit_query_begin(brw); @@ -328,7 +329,7 @@ static int brw_emit_vertex_elements(struct brw_context *brw) * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ - if (brw->vb.nr_ve == 0) { + if (nr == 0) { BEGIN_BATCH(3, IGNORE_CLIPRECTS); OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1); OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) | @@ -346,9 +347,9 @@ static int brw_emit_vertex_elements(struct brw_context *brw) /* Now emit vertex element (VEP) state packets. * */ - BEGIN_BATCH(1 + brw->curr.num_vertex_elements * 2, IGNORE_CLIPRECTS); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_ve * 2) - 2)); - for (i = 0; i < brw->vb.nr_ve; i++) { + BEGIN_BATCH(1 + nr * 2, IGNORE_CLIPRECTS); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr * 2) - 2)); + for (i = 0; i < nr; i++) { const struct pipe_vertex_element *input = &brw->curr.vertex_element[i]; uint32_t format = brw_translate_surface_format( input->src_format ); uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC; @@ -364,10 +365,10 @@ static int brw_emit_vertex_elements(struct brw_context *brw) break; } - OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | + OUT_BATCH((input->vertex_buffer_index << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + (input->src_offset << BRW_VE0_SRC_OFFSET_SHIFT)); if (BRW_IS_IGDNG(brw)) OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) | -- cgit v1.2.3 From 5d7c0cf563b65aeb83f3d2f2ec709a96cf0fbae2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 10:19:24 +0000 Subject: i965g: tgsi outputs cannot be used as source regs --- src/gallium/drivers/i965/brw_vs_emit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 3217777acb..25aea87b8f 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -171,7 +171,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Allocate outputs. The non-position outputs go straight into message regs. */ - c->nr_outputs = 0; + c->nr_outputs = c->prog_data.nr_outputs; c->first_output = reg; c->first_overflow_output = 0; @@ -182,7 +182,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* XXX: need to access vertex output semantics here: */ - c->nr_outputs = c->prog_data.nr_outputs; for (i = 0; i < c->prog_data.nr_outputs; i++) { assert(i < Elements(c->regs[TGSI_FILE_OUTPUT])); @@ -244,12 +243,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } } +#if 0 for (i = 0; i < 128; i++) { if (c->output_regs[i].used_in_src) { c->output_regs[i].reg = brw_vec8_grf(reg, 0); reg++; } } +#endif c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); reg += 2; -- cgit v1.2.3 From 1d6b5957c6be221feb836bc25686246f67769bce Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 10:19:39 +0000 Subject: i965g: add DEBUG_MIN_URB flag --- src/gallium/drivers/i965/brw_debug.h | 2 +- src/gallium/drivers/i965/brw_screen.c | 3 ++- src/gallium/drivers/i965/brw_urb.c | 11 ++++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h index aee62f7a5b..ea3c87218b 100644 --- a/src/gallium/drivers/i965/brw_debug.h +++ b/src/gallium/drivers/i965/brw_debug.h @@ -15,7 +15,7 @@ #define DEBUG_BATCH 0x80 #define DEBUG_PIXEL 0x100 #define DEBUG_BUFMGR 0x200 -#define DEBUG_unused1 0x400 +#define DEBUG_MIN_URB 0x400 #define DEBUG_unused2 0x800 #define DEBUG_unused3 0x1000 #define DEBUG_SYNC 0x2000 diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 04a3f81bcf..275ff0959f 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -48,6 +48,7 @@ static const struct debug_named_value debug_names[] = { { "bat", DEBUG_BATCH}, { "pix", DEBUG_PIXEL}, { "buf", DEBUG_BUFMGR}, + { "min", DEBUG_MIN_URB}, { "sync", DEBUG_SYNC}, { "prim", DEBUG_PRIMS }, { "vert", DEBUG_VERTS }, @@ -291,7 +292,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) #ifdef DEBUG BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0); BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0); - BRW_DEBUG |= DEBUG_STATS; + BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB; #endif memset(&chipset, 0, sizeof chipset); diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c index 57fd8f20b2..907ec56c6c 100644 --- a/src/gallium/drivers/i965/brw_urb.c +++ b/src/gallium/drivers/i965/brw_urb.c @@ -84,7 +84,7 @@ * XXX: Verify min_nr_entries, esp for VS. * XXX: Verify SF min_entry_size. */ -static const struct { +static const struct urb_limits { GLuint min_nr_entries; GLuint preferred_nr_entries; GLuint min_entry_size; @@ -167,6 +167,15 @@ static int recalculate_urb_fence( struct brw_context *brw ) } } + if (BRW_DEBUG & DEBUG_MIN_URB) { + brw->urb.nr_vs_entries = limits[VS].min_nr_entries; + brw->urb.nr_gs_entries = limits[GS].min_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].min_nr_entries; + brw->urb.nr_sf_entries = limits[SF].min_nr_entries; + brw->urb.nr_cs_entries = limits[CS].min_nr_entries; + brw->urb.constrained = 1; + } + if (!check_urb_layout(brw)) { brw->urb.nr_vs_entries = limits[VS].min_nr_entries; brw->urb.nr_gs_entries = limits[GS].min_nr_entries; -- cgit v1.2.3 From 212fb8adbd0e5e28a5d20b0cc03cde46df2831f4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 10:24:19 +0000 Subject: i965g: don't set up vs stack register for non-branching shaders --- src/gallium/drivers/i965/brw_context.h | 2 ++ src/gallium/drivers/i965/brw_pipe_shader.c | 20 ++++++++++---------- src/gallium/drivers/i965/brw_vs_emit.c | 11 ++++++++--- src/gallium/drivers/i965/brw_wm.c | 3 --- src/gallium/drivers/i965/brw_wm.h | 1 - 5 files changed, 20 insertions(+), 17 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 34799d5211..b81dff0aa0 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -154,6 +154,8 @@ struct brw_vertex_shader { const struct tgsi_token *tokens; struct tgsi_shader_info info; + unsigned has_flow_control:1; + unsigned id; struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ GLboolean use_const_buffer; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 662c43c3e5..44f9ad6f9c 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -43,15 +43,15 @@ * Determine if the given shader uses complex features such as flow * conditionals, loops, subroutines. */ -GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp) +static GLboolean has_flow_control(const struct tgsi_shader_info *info) { - return (fp->info.opcode_count[TGSI_OPCODE_ARL] > 0 || - fp->info.opcode_count[TGSI_OPCODE_IF] > 0 || - fp->info.opcode_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */ - fp->info.opcode_count[TGSI_OPCODE_CAL] > 0 || - fp->info.opcode_count[TGSI_OPCODE_BRK] > 0 || /* redundant - BGNLOOP */ - fp->info.opcode_count[TGSI_OPCODE_RET] > 0 || /* redundant - CAL */ - fp->info.opcode_count[TGSI_OPCODE_BGNLOOP] > 0); + return (info->opcode_count[TGSI_OPCODE_ARL] > 0 || + info->opcode_count[TGSI_OPCODE_IF] > 0 || + info->opcode_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */ + info->opcode_count[TGSI_OPCODE_CAL] > 0 || + info->opcode_count[TGSI_OPCODE_BRK] > 0 || /* redundant - BGNLOOP */ + info->opcode_count[TGSI_OPCODE_RET] > 0 || /* redundant - CAL */ + info->opcode_count[TGSI_OPCODE_BGNLOOP] > 0); } @@ -88,7 +88,7 @@ static void *brw_create_fs_state( struct pipe_context *pipe, /* Duplicate tokens, scan shader */ fs->id = brw->program_id++; - fs->has_flow_control = brw_wm_has_flow_control(fs); + fs->has_flow_control = has_flow_control(&fs->info); fs->tokens = tgsi_dup_tokens(shader->tokens); if (fs->tokens == NULL) @@ -126,7 +126,7 @@ static void *brw_create_vs_state( struct pipe_context *pipe, /* Duplicate tokens, scan shader */ vs->id = brw->program_id++; - //vs->has_flow_control = brw_wm_has_flow_control(vs); + vs->has_flow_control = has_flow_control(&vs->info); vs->tokens = tgsi_dup_tokens(shader->tokens); if (vs->tokens == NULL) diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 25aea87b8f..e0fadc8dce 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -252,8 +252,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } #endif - c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); - reg += 2; + if (c->vp->has_flow_control) { + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; + } /* Some opcodes need an internal temporary: */ @@ -1592,7 +1594,10 @@ void brw_vs_emit(struct brw_vs_compile *c) /* Static register allocation */ brw_vs_alloc_regs(c); - brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); + + if (c->vp->has_flow_control) { + brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); + } /* Instructions */ diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 93f90bf329..7f2cb15256 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -162,9 +162,6 @@ static enum pipe_error do_wm_prog( struct brw_context *brw, brw_init_compile(brw, &c->func); - /* temporary sanity check assertion */ - assert(fp->has_flow_control == brw_wm_has_flow_control(c->fp)); - /* * Shader which use GLSL features such as flow control are handled * differently from "simple" shaders. diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 48dac39756..28d216260e 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -338,7 +338,6 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean ps_uses_depth, struct brw_wm_prog_key *key ); -GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp); void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c); void emit_ddxy(struct brw_compile *p, -- cgit v1.2.3 From caf2cf884cb32883e9af07dbe36ca9648bae1821 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 10:38:19 +0000 Subject: i965g: fix some reloc counts --- src/gallium/drivers/i965/brw_cc.c | 2 +- src/gallium/drivers/i965/brw_vs_state.c | 2 +- src/gallium/drivers/i965/brw_vs_surface_state.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index 94e2c99c3e..f05728ea5d 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -150,7 +150,7 @@ cc_unit_create_from_key(struct brw_context *brw, ret = brw_upload_cache(&brw->cache, BRW_CC_UNIT, key, sizeof(*key), - reloc, Elements(reloc), + reloc, 1, &cc, sizeof(cc), NULL, NULL, bo_out); diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c index 0b44f39f4d..dadbb622e4 100644 --- a/src/gallium/drivers/i965/brw_vs_state.c +++ b/src/gallium/drivers/i965/brw_vs_state.c @@ -146,7 +146,7 @@ vs_unit_create_from_key(struct brw_context *brw, ret = brw_upload_cache(&brw->cache, BRW_VS_UNIT, key, sizeof(*key), - reloc, Elements(reloc), + reloc, 1, &vs, sizeof(vs), NULL, NULL, bo_out); diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c index aaf2a44f61..177a5170d2 100644 --- a/src/gallium/drivers/i965/brw_vs_surface_state.c +++ b/src/gallium/drivers/i965/brw_vs_surface_state.c @@ -162,7 +162,7 @@ brw_vs_get_binding_table(struct brw_context *brw, ret = brw_cache_data( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, - reloc, Elements(reloc), + reloc, nr_reloc, data, sizeof data, NULL, NULL, bo_out); -- cgit v1.2.3 From aab9601a753afd012e16df072e774a32eb1348b9 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 11:21:08 +0000 Subject: i965g: hardwire linear interpolation for now seems to generate saner code, need to go back and fix perspective interpolation (and remove the hard-wire) once this is working. --- src/gallium/drivers/i965/brw_sf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index 24d1015bbd..52fb2cd42d 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -138,8 +138,11 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw) * XXX: as long as we're hard-wiring, is eg. position required to * be linear? */ - key.linear_attrs = 0; - key.persp_attrs = (1 << key.nr_attrs) - 1; + //key.linear_attrs = 0; + //key.persp_attrs = (1 << key.nr_attrs) - 1; + + key.linear_attrs = (1 << key.nr_attrs) - 1; + key.persp_attrs = 0; /* BRW_NEW_REDUCED_PRIMITIVE */ switch (brw->reduced_primitive) { -- cgit v1.2.3 From 381cd2d63f4aae29d478e02dda5a978a668168e1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 11:21:48 +0000 Subject: i965g: disassemble each instruction as generated --- src/gallium/drivers/i965/brw_context.h | 1 + src/gallium/drivers/i965/brw_debug.h | 2 +- src/gallium/drivers/i965/brw_disasm.c | 2 +- src/gallium/drivers/i965/brw_eu_emit.c | 7 +++++++ src/gallium/drivers/i965/brw_screen.c | 1 + 5 files changed, 11 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index b81dff0aa0..05fc9d45b5 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -788,6 +788,7 @@ int brw_upload_urb_fence(struct brw_context *brw); int brw_upload_cs_urb_state(struct brw_context *brw); /* brw_disasm.c */ +int brw_disasm_insn (FILE *file, const struct brw_instruction *inst); int brw_disasm (FILE *file, const struct brw_instruction *inst, unsigned count); diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h index ea3c87218b..0deddbf977 100644 --- a/src/gallium/drivers/i965/brw_debug.h +++ b/src/gallium/drivers/i965/brw_debug.h @@ -16,7 +16,7 @@ #define DEBUG_PIXEL 0x100 #define DEBUG_BUFMGR 0x200 #define DEBUG_MIN_URB 0x400 -#define DEBUG_unused2 0x800 +#define DEBUG_DISASSEM 0x800 #define DEBUG_unused3 0x1000 #define DEBUG_SYNC 0x2000 #define DEBUG_PRIMS 0x4000 diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index df0c7b9a2b..4100f11d48 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -770,7 +770,7 @@ static int src1 (FILE *file, const struct brw_instruction *inst) } } -static int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) +int brw_disasm_insn (FILE *file, const struct brw_instruction *inst) { int err = 0; int space = 0; diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index f7fa520348..7776b4f965 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -33,6 +33,7 @@ #include "brw_context.h" #include "brw_defines.h" #include "brw_eu.h" +#include "brw_debug.h" @@ -473,6 +474,12 @@ static struct brw_instruction *next_insn( struct brw_compile *p, { struct brw_instruction *insn; + if (0 && (BRW_DEBUG & DEBUG_DISASSEM)) + { + if (p->nr_insn) + brw_disasm_insn(stderr, &p->store[p->nr_insn-1]); + } + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); insn = &p->store[p->nr_insn++]; diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 275ff0959f..9d8066442b 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -49,6 +49,7 @@ static const struct debug_named_value debug_names[] = { { "pix", DEBUG_PIXEL}, { "buf", DEBUG_BUFMGR}, { "min", DEBUG_MIN_URB}, + { "dis", DEBUG_DISASSEM}, { "sync", DEBUG_SYNC}, { "prim", DEBUG_PRIMS }, { "vert", DEBUG_VERTS }, -- cgit v1.2.3 From a485341455bb270001aad8b39c7b9fa36ac74478 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 11:56:52 +0000 Subject: i965g: add dumping for our new pass_fp output --- src/gallium/drivers/i965/brw_screen.c | 2 +- src/gallium/drivers/i965/brw_wm.h | 4 +- src/gallium/drivers/i965/brw_wm_debug.c | 163 ++++++++++++++++++++++++-------- src/gallium/drivers/i965/brw_wm_fp.c | 35 ++----- src/gallium/drivers/i965/brw_wm_glsl.c | 4 +- src/gallium/drivers/i965/brw_wm_pass0.c | 2 +- 6 files changed, 139 insertions(+), 71 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 9d8066442b..575a418b7d 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -293,7 +293,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) #ifdef DEBUG BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0); BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0); - BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB; + BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB | DEBUG_WM; #endif memset(&chipset, 0, sizeof chipset); diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 28d216260e..7d044ff6ec 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -181,7 +181,6 @@ struct brw_wm_instruction { #define Y 1 #define Z 2 #define W 3 -#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3) struct brw_fp_src { @@ -333,6 +332,9 @@ void brw_wm_print_insn( struct brw_wm_compile *c, void brw_wm_print_program( struct brw_wm_compile *c, const char *stage ); +void brw_wm_print_fp_program( struct brw_wm_compile *c, + const char *stage ); + void brw_wm_lookup_iz( GLuint line_aa, GLuint lookup, GLboolean ps_uses_depth, diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c index 65d7626eea..3d11fa074c 100644 --- a/src/gallium/drivers/i965/brw_wm_debug.c +++ b/src/gallium/drivers/i965/brw_wm_debug.c @@ -34,6 +34,62 @@ #include "brw_context.h" #include "brw_wm.h" +static void print_writemask( unsigned writemask ) +{ + if (writemask != BRW_WRITEMASK_XYZW) + debug_printf(".%s%s%s%s", + (writemask & BRW_WRITEMASK_X) ? "x" : "", + (writemask & BRW_WRITEMASK_Y) ? "y" : "", + (writemask & BRW_WRITEMASK_Z) ? "z" : "", + (writemask & BRW_WRITEMASK_W) ? "w" : ""); +} + +static void print_swizzle( unsigned swizzle ) +{ + char *swz = "xyzw"; + if (swizzle != BRW_SWIZZLE_XYZW) + debug_printf(".%c%c%c%c", + swz[BRW_GET_SWZ(swizzle, X)], + swz[BRW_GET_SWZ(swizzle, Y)], + swz[BRW_GET_SWZ(swizzle, Z)], + swz[BRW_GET_SWZ(swizzle, W)]); +} + +static void print_opcode( unsigned opcode ) +{ + switch (opcode) { + case WM_PIXELXY: + debug_printf("PIXELXY"); + break; + case WM_DELTAXY: + debug_printf("DELTAXY"); + break; + case WM_PIXELW: + debug_printf("PIXELW"); + break; + case WM_WPOSXY: + debug_printf("WPOSXY"); + break; + case WM_PINTERP: + debug_printf("PINTERP"); + break; + case WM_LINTERP: + debug_printf("LINTERP"); + break; + case WM_CINTERP: + debug_printf("CINTERP"); + break; + case WM_FB_WRITE: + debug_printf("FB_WRITE"); + break; + case WM_FRONTFACING: + debug_printf("FRONTFACING"); + break; + default: + debug_printf("%s", tgsi_get_opcode_info(opcode)->mnemonic); + break; + } +} void brw_wm_print_value( struct brw_wm_compile *c, struct brw_wm_value *value ) @@ -98,47 +154,11 @@ void brw_wm_print_insn( struct brw_wm_compile *c, debug_printf(","); } debug_printf("]"); - - if (inst->writemask != BRW_WRITEMASK_XYZW) - debug_printf(".%s%s%s%s", - (inst->writemask & BRW_WRITEMASK_X) ? "x" : "", - (inst->writemask & BRW_WRITEMASK_Y) ? "y" : "", - (inst->writemask & BRW_WRITEMASK_Z) ? "z" : "", - (inst->writemask & BRW_WRITEMASK_W) ? "w" : ""); - - switch (inst->opcode) { - case WM_PIXELXY: - debug_printf(" = PIXELXY"); - break; - case WM_DELTAXY: - debug_printf(" = DELTAXY"); - break; - case WM_PIXELW: - debug_printf(" = PIXELW"); - break; - case WM_WPOSXY: - debug_printf(" = WPOSXY"); - break; - case WM_PINTERP: - debug_printf(" = PINTERP"); - break; - case WM_LINTERP: - debug_printf(" = LINTERP"); - break; - case WM_CINTERP: - debug_printf(" = CINTERP"); - break; - case WM_FB_WRITE: - debug_printf(" = FB_WRITE"); - break; - case WM_FRONTFACING: - debug_printf(" = FRONTFACING"); - break; - default: - debug_printf(" = %s", tgsi_get_opcode_info(inst->opcode)->mnemonic); - break; - } - + print_writemask(inst->writemask); + + debug_printf(" = "); + print_opcode(inst->opcode); + if (inst->saturate) debug_printf("_SAT"); @@ -173,3 +193,64 @@ void brw_wm_print_program( struct brw_wm_compile *c, debug_printf("\n"); } +static const char *file_strings[TGSI_FILE_COUNT+1] = { + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMPLER", + "ADDR", + "IMM", + "LOOP", + "PAYLOAD" +}; + +static void brw_wm_print_fp_insn( struct brw_wm_compile *c, + struct brw_fp_instruction *inst ) +{ + GLuint i; + GLuint nr_args = brw_wm_nr_args(inst->opcode); + + print_opcode(inst->opcode); + if (inst->dst.saturate) + debug_printf("_SAT"); + debug_printf(" "); + + if (inst->dst.indirect) + debug_printf("["); + + debug_printf("%s[%d]", + file_strings[inst->dst.file], + inst->dst.index ); + print_writemask(inst->dst.writemask); + + if (inst->dst.indirect) + debug_printf("]"); + + debug_printf(nr_args ? ", " : "\n"); + + for (i = 0; i < nr_args; i++) { + debug_printf("%s%s%s[%d]%s", + inst->src[i].negate ? "-" : "", + inst->src[i].abs ? "ABS(" : "", + file_strings[inst->src[i].file], + inst->src[i].index, + inst->src[i].abs ? ")" : ""); + print_swizzle(inst->src[i].swizzle); + debug_printf("%s", i == nr_args - 1 ? "\n" : ", "); + } +} + + +void brw_wm_print_fp_program( struct brw_wm_compile *c, + const char *stage ) +{ + GLuint insn; + + debug_printf("%s:\n", stage); + for (insn = 0; insn < c->nr_fp_insns; insn++) + brw_wm_print_fp_insn(c, &c->fp_instructions[insn]); + debug_printf("\n"); +} + diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index bba448815b..74aa02f198 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -45,20 +45,6 @@ #include "brw_debug.h" - - -static const char *wm_opcode_strings[] = { - "PIXELXY", - "DELTAXY", - "PIXELW", - "LINTERP", - "PINTERP", - "CINTERP", - "WPOSXY", - "FB_WRITE", - "FRONTFACING", -}; - /*********************************************************************** * Source regs */ @@ -94,10 +80,10 @@ static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z { unsigned swz = reg.swizzle; - reg.swizzle = ( GET_SWZ(swz, x) << 0 | - GET_SWZ(swz, y) << 2 | - GET_SWZ(swz, z) << 4 | - GET_SWZ(swz, w) << 6 ); + reg.swizzle = ( BRW_GET_SWZ(swz, x) << 0 | + BRW_GET_SWZ(swz, y) << 2 | + BRW_GET_SWZ(swz, z) << 4 | + BRW_GET_SWZ(swz, w) << 6 ); return reg; } @@ -200,10 +186,10 @@ out: swizzle |= (swizzle & 0x3) << (j * 2); return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ), - GET_SWZ(swizzle, X), - GET_SWZ(swizzle, Y), - GET_SWZ(swizzle, Z), - GET_SWZ(swizzle, W) ); + BRW_GET_SWZ(swizzle, X), + BRW_GET_SWZ(swizzle, Y), + BRW_GET_SWZ(swizzle, Z), + BRW_GET_SWZ(swizzle, W) ); } @@ -843,7 +829,7 @@ static GLboolean projtex( struct brw_wm_compile *c, return GL_FALSE; /* ut2004 gun rendering !?! */ if (src.file == TGSI_FILE_INPUT && - GET_SWZ(src.swizzle, W) == W && + BRW_GET_SWZ(src.swizzle, W) == W && c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE) return GL_FALSE; @@ -1214,8 +1200,7 @@ int brw_wm_pass_fp( struct brw_wm_compile *c ) } if (BRW_DEBUG & DEBUG_WM) { - debug_printf("pass_fp:\n"); - //brw_print_program( c->fp_brw_program ); + brw_wm_print_fp_program( c, "pass_fp" ); debug_printf("\n"); } diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c index 284f819bf8..3b3afc39d3 100644 --- a/src/gallium/drivers/i965/brw_wm_glsl.c +++ b/src/gallium/drivers/i965/brw_wm_glsl.c @@ -558,7 +558,7 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c, { const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; const GLuint nr = 1; - const GLuint component = GET_SWZ(src->Swizzle, channel); + const GLuint component = BRW_GET_SWZ(src->Swizzle, channel); /* Extended swizzle terms */ if (component == SWIZZLE_ZERO) { @@ -598,7 +598,7 @@ static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, const struct prog_src_register *src = &inst->SrcReg[srcRegIndex]; if (src->File == TGSI_FILE_IMMEDIATE) { /* an immediate */ - const int component = GET_SWZ(src->Swizzle, channel); + const int component = BRW_GET_SWZ(src->Swizzle, channel); const GLfloat *param = c->fp->program.Base.Parameters->ParameterValues[src->Index]; GLfloat value = param[component]; diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 7b18335dec..53232325d2 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -227,7 +227,7 @@ static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c, struct brw_fp_src src, GLuint i ) { - return pass0_get_reg(c, src.file, src.index, GET_SWZ(src.swizzle,i)); + return pass0_get_reg(c, src.file, src.index, BRW_GET_SWZ(src.swizzle,i)); } -- cgit v1.2.3 From 1e3910a878e63d7859b205a30e23535d1da67d45 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 12:01:11 +0000 Subject: i965g: init saturate field in fp dst_reg helper --- src/gallium/drivers/i965/brw_wm_fp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 74aa02f198..d27a768a0c 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -223,6 +223,7 @@ static struct brw_fp_dst dst_reg(GLuint file, GLuint idx) reg.index = idx; reg.writemask = BRW_WRITEMASK_XYZW; reg.indirect = 0; + reg.saturate = 0; return reg; } -- cgit v1.2.3 From 3e14a482daf5e69331efac69711534a8b66118e4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 12:51:26 +0000 Subject: i965g: propogate nr_cbufs into wm prog key --- src/gallium/drivers/i965/brw_context.h | 1 + src/gallium/drivers/i965/brw_pipe_fb.c | 5 ++++- src/gallium/drivers/i965/brw_wm.c | 5 +++++ 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 05fc9d45b5..f53b92d4f5 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -209,6 +209,7 @@ struct brw_sampler { #define PIPE_NEW_QUERY 0x80000 #define PIPE_NEW_SCISSOR 0x100000 #define PIPE_NEW_BOUND_TEXTURES 0x200000 +#define PIPE_NEW_NR_CBUFS 0x400000 diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index d9b70f4eef..f65f45fb84 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -38,7 +38,10 @@ static void brw_set_framebuffer_state( struct pipe_context *pipe, } } - brw->curr.fb.nr_cbufs = fb->nr_cbufs; + if (brw->curr.fb.nr_cbufs != fb->nr_cbufs) { + brw->curr.fb.nr_cbufs = fb->nr_cbufs; + brw->state.dirty.mesa |= PIPE_NEW_NR_CBUFS; + } } diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 7f2cb15256..8589aa22a8 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -259,6 +259,10 @@ static void brw_wm_populate_key( struct brw_context *brw, /* CACHE_NEW_VS_PROG */ key->vp_nr_outputs = brw->vs.prog_data->nr_outputs; + key->nr_cbufs = brw->curr.fb.nr_cbufs; + + key->nr_inputs = brw->curr.fragment_shader->info.num_inputs; + /* The unique fragment program ID */ key->program_string_id = brw->curr.fragment_shader->id; } @@ -294,6 +298,7 @@ const struct brw_tracked_state brw_wm_prog = { .mesa = (PIPE_NEW_FRAGMENT_SHADER | PIPE_NEW_DEPTH_STENCIL_ALPHA | PIPE_NEW_RAST | + PIPE_NEW_NR_CBUFS | PIPE_NEW_BOUND_TEXTURES), .brw = (BRW_NEW_WM_INPUT_DIMENSIONS | BRW_NEW_REDUCED_PRIMITIVE), -- cgit v1.2.3 From eacd13bcc809e1e877a48c2942eb6285aa21f6be Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 13:09:12 +0000 Subject: i965g: plumb through fb_write target and eot data --- src/gallium/drivers/i965/brw_wm.h | 10 +++++----- src/gallium/drivers/i965/brw_wm_emit.c | 4 ++-- src/gallium/drivers/i965/brw_wm_fp.c | 26 +++++++++++++------------- src/gallium/drivers/i965/brw_wm_pass0.c | 10 +++++++--- src/gallium/drivers/i965/brw_wm_pass1.c | 4 ++-- 5 files changed, 29 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index 7d044ff6ec..f85a8af878 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -142,9 +142,10 @@ struct brw_wm_instruction { GLuint saturate:1; GLuint writemask:4; GLuint tex_unit:4; /* texture/sampler unit for texture instructions */ - GLuint tex_target:4; /* TGSI_TEXTURE_x for texture instructions*/ + GLuint target:4; /* TGSI_TEXTURE_x for texture instructions, + * target binding table index for FB_WRITE + */ GLuint eot:1; /* End of thread indicator for FB_WRITE*/ - GLuint target:10; /* target binding table index for FB_WRITE*/ }; @@ -204,10 +205,9 @@ struct brw_fp_instruction { struct brw_fp_dst dst; struct brw_fp_src src[3]; unsigned opcode:8; + unsigned target:8; /* XXX: special usage for FB_WRITE */ unsigned tex_unit:4; - unsigned tex_target:4; - unsigned target:10; /* destination surface for FB_WRITE */ - unsigned eot:1; /* mark last instruction (usually FB_WRITE) */ + unsigned pad:12; }; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 1c38f80cda..a14e12f35b 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -803,7 +803,7 @@ static void emit_tex( struct brw_wm_compile *c, /* How many input regs are there? */ - switch (inst->tex_target) { + switch (inst->target) { case TGSI_TEXTURE_1D: emit = BRW_WRITEMASK_X; nr = 1; @@ -885,7 +885,7 @@ static void emit_txb( struct brw_wm_compile *c, GLuint msg_type; /* Shadow ignored for txb. */ - switch (inst->tex_target) { + switch (inst->target) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: brw_MOV(p, brw_message_reg(2), arg[0]); diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index d27a768a0c..2a207958eb 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -280,18 +280,24 @@ static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c) static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c, GLuint op, struct brw_fp_dst dest, - GLuint tex_src_unit, - GLuint tex_src_target, + GLuint tex_unit, + GLuint target, struct brw_fp_src src0, struct brw_fp_src src1, struct brw_fp_src src2 ) { struct brw_fp_instruction *inst = get_fp_inst(c); + if (tex_unit || target) + assert(op == TGSI_OPCODE_TXP || + op == TGSI_OPCODE_TXB || + op == TGSI_OPCODE_TEX || + op == WM_FB_WRITE); + inst->opcode = op; inst->dst = dest; - inst->tex_unit = tex_src_unit; - inst->tex_target = tex_src_target; + inst->tex_unit = tex_unit; + inst->target = target; inst->src[0] = src0; inst->src[1] = src1; inst->src[2] = src2; @@ -916,23 +922,17 @@ static void emit_fb_write( struct brw_wm_compile *c ) for (i = 0 ; i < c->key.nr_cbufs; i++) { struct brw_fp_src outcolor; - unsigned target = 1<key.nr_cbufs - 1) - target |= 1; outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i); - /* Use emit_tex_op so that we can specify the inst->tex_target + /* Use emit_tex_op so that we can specify the inst->target * field, which is abused to contain the FB write target and the * EOT marker */ emit_tex_op(c, WM_FB_WRITE, dst_undef(), - target, - 0, + (i == c->key.nr_cbufs - 1), /* EOT */ + i, outcolor, payload_r0_depth, outdepth); diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 53232325d2..7bb341e2c2 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -274,9 +274,13 @@ translate_insn(struct brw_wm_compile *c, out->opcode = inst->opcode; out->saturate = inst->dst.saturate; out->tex_unit = inst->tex_unit; - out->tex_target = inst->tex_target; - out->eot = inst->eot; //inst->Aux & 1; - out->target = inst->target; //inst->Aux >> 1; + out->target = inst->target; + + /* Nasty hack: + */ + out->eot = (inst->opcode == WM_FB_WRITE && + inst->tex_unit != 0); + /* Args: */ diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c index 09ad2b8f5b..005747f00b 100644 --- a/src/gallium/drivers/i965/brw_wm_pass1.c +++ b/src/gallium/drivers/i965/brw_wm_pass1.c @@ -223,11 +223,11 @@ void brw_wm_pass1( struct brw_wm_compile *c ) case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: - read0 = get_texcoord_mask(inst->tex_target); + read0 = get_texcoord_mask(inst->target); break; case TGSI_OPCODE_TXB: - read0 = get_texcoord_mask(inst->tex_target) | BRW_WRITEMASK_W; + read0 = get_texcoord_mask(inst->target) | BRW_WRITEMASK_W; break; case WM_WPOSXY: -- cgit v1.2.3 From 11805726d311a5d11e58f01b2793cc19d7f98566 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 13:41:52 +0000 Subject: i965g: remove references to brw_surface_bo() --- src/gallium/drivers/i965/brw_misc_state.c | 6 +++--- src/gallium/drivers/i965/brw_screen.h | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index 5ee87bcac0..4dd73636fd 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -231,7 +231,7 @@ static int prepare_depthbuffer(struct brw_context *brw) struct pipe_surface *zsbuf = brw->curr.fb.zsbuf; if (zsbuf) - brw_add_validated_bo(brw, brw_surface_bo(zsbuf)); + brw_add_validated_bo(brw, brw_surface(zsbuf)->bo); return 0; } @@ -278,8 +278,8 @@ static int emit_depthbuffer(struct brw_context *brw) return PIPE_ERROR_BAD_INPUT; } - bo = brw_surface_bo(surface); - pitch = brw_surface_pitch(surface); + bo = brw_surface(surface)->bo; + pitch = brw_surface(surface)->pitch; BEGIN_BATCH(len, IGNORE_CLIPRECTS); OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index f7267cc78a..301b20d549 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -167,9 +167,6 @@ brw_buffer_is_user_buffer( const struct pipe_buffer *buf ) return ((const struct brw_buffer *)buf)->user_buffer != NULL; } -struct brw_winsys_buffer * -brw_surface_bo( struct pipe_surface *surface ); - unsigned brw_surface_pitch( const struct pipe_surface *surface ); -- cgit v1.2.3 From 4d1ae7a546250548332e432f305ce47bd97967c3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 14:18:28 +0000 Subject: i965g: propogate index size state --- src/gallium/drivers/i965/brw_draw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 88cb31ad54..84803e43be 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -194,8 +194,10 @@ brw_draw_range_elements(struct pipe_context *pipe, * XXX: do we need to go through state validation to achieve this? * Could just call upload code directly. */ - if (brw->curr.index_buffer != index_buffer) { + if (brw->curr.index_buffer != index_buffer || + brw->curr.index_size != index_size) { pipe_buffer_reference( &brw->curr.index_buffer, index_buffer ); + brw->curr.index_size = index_size; brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER; } -- cgit v1.2.3 From 86c32df3e08b69605cbc59f4b3b72ac58b651db2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 15:40:16 +0000 Subject: i965g: get brw_state_debug.c building --- src/gallium/drivers/i965/Makefile | 1 + src/gallium/drivers/i965/brw_state_debug.c | 27 ++++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index f0a5bc7ee5..8df07d1c10 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -40,6 +40,7 @@ C_SOURCES = \ brw_sf_emit.c \ brw_sf_state.c \ brw_state_batch.c \ + brw_state_debug.c \ brw_state_cache.c \ brw_state_upload.c \ brw_structs_dump.c \ diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c index cc4744dc16..050f74761c 100644 --- a/src/gallium/drivers/i965/brw_state_debug.c +++ b/src/gallium/drivers/i965/brw_state_debug.c @@ -40,7 +40,29 @@ struct dirty_bit_map { #define DEFINE_BIT(name) {name, #name, 0} static struct dirty_bit_map mesa_bits[] = { + DEFINE_BIT(PIPE_NEW_DEPTH_STENCIL_ALPHA), + DEFINE_BIT(PIPE_NEW_RAST), + DEFINE_BIT(PIPE_NEW_BLEND), + DEFINE_BIT(PIPE_NEW_VIEWPORT), + DEFINE_BIT(PIPE_NEW_SAMPLERS), + DEFINE_BIT(PIPE_NEW_VERTEX_BUFFER), + DEFINE_BIT(PIPE_NEW_VERTEX_ELEMENT), + DEFINE_BIT(PIPE_NEW_FRAGMENT_SHADER), + DEFINE_BIT(PIPE_NEW_VERTEX_SHADER), + DEFINE_BIT(PIPE_NEW_FRAGMENT_CONSTANTS), + DEFINE_BIT(PIPE_NEW_VERTEX_CONSTANTS), + DEFINE_BIT(PIPE_NEW_CLIP), + DEFINE_BIT(PIPE_NEW_INDEX_BUFFER), + DEFINE_BIT(PIPE_NEW_INDEX_RANGE), DEFINE_BIT(PIPE_NEW_BLEND_COLOR), + DEFINE_BIT(PIPE_NEW_POLYGON_STIPPLE), + DEFINE_BIT(PIPE_NEW_FRAMEBUFFER_DIMENSIONS), + DEFINE_BIT(PIPE_NEW_DEPTH_BUFFER), + DEFINE_BIT(PIPE_NEW_COLOR_BUFFERS), + DEFINE_BIT(PIPE_NEW_QUERY), + DEFINE_BIT(PIPE_NEW_SCISSOR), + DEFINE_BIT(PIPE_NEW_BOUND_TEXTURES), + DEFINE_BIT(PIPE_NEW_NR_CBUFS), {0, 0, 0} }; @@ -55,11 +77,10 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_CONTEXT), DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS), DEFINE_BIT(BRW_NEW_PSP), - DEFINE_BIT(BRW_NEW_FENCE), + DEFINE_BIT(BRW_NEW_WM_SURFACES), + DEFINE_BIT(BRW_NEW_xxx), DEFINE_BIT(BRW_NEW_INDICES), - DEFINE_BIT(BRW_NEW_INDEX_BUFFER), DEFINE_BIT(BRW_NEW_VERTICES), - DEFINE_BIT(BRW_NEW_BATCH), {0, 0, 0} }; -- cgit v1.2.3 From 12ea198fd20c04b94bf0fe584b6d894d019d0c40 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Fri, 6 Nov 2009 00:09:04 +0000 Subject: i965g: Header whitespace --- src/gallium/drivers/i965/brw_winsys.h | 84 +++++++++++++++++------------------ 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index 2da660a1e6..a723244960 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -54,7 +54,7 @@ struct brw_winsys_buffer { enum brw_buffer_type { BRW_BUFFER_TYPE_TEXTURE, - BRW_BUFFER_TYPE_SCANOUT, /**< a texture used for scanning out from */ + BRW_BUFFER_TYPE_SCANOUT, /**< a texture used for scanning out from */ BRW_BUFFER_TYPE_VERTEX, BRW_BUFFER_TYPE_CURBE, BRW_BUFFER_TYPE_QUERY, @@ -63,9 +63,9 @@ enum brw_buffer_type BRW_BUFFER_TYPE_BATCH, BRW_BUFFER_TYPE_GENERAL_STATE, BRW_BUFFER_TYPE_SURFACE_STATE, - BRW_BUFFER_TYPE_PIXEL, /* image uploads, pbo's, etc */ - BRW_BUFFER_TYPE_GENERIC, /* unknown */ - BRW_BUFFER_TYPE_MAX /* Count of possible values */ + BRW_BUFFER_TYPE_PIXEL, /* image uploads, pbo's, etc */ + BRW_BUFFER_TYPE_GENERIC, /* unknown */ + BRW_BUFFER_TYPE_MAX /* Count of possible values */ }; @@ -74,14 +74,14 @@ enum brw_buffer_type */ enum brw_buffer_usage { BRW_USAGE_STATE, /* INSTRUCTION, 0 */ - BRW_USAGE_QUERY_RESULT, /* INSTRUCTION, INSTRUCTION */ + BRW_USAGE_QUERY_RESULT, /* INSTRUCTION, INSTRUCTION */ BRW_USAGE_RENDER_TARGET, /* RENDER, 0 */ - BRW_USAGE_DEPTH_BUFFER, /* RENDER, RENDER */ - BRW_USAGE_BLIT_SOURCE, /* RENDER, 0 */ + BRW_USAGE_DEPTH_BUFFER, /* RENDER, RENDER */ + BRW_USAGE_BLIT_SOURCE, /* RENDER, 0 */ BRW_USAGE_BLIT_DEST, /* RENDER, RENDER */ - BRW_USAGE_SAMPLER, /* SAMPLER, 0 */ - BRW_USAGE_VERTEX, /* VERTEX, 0 */ - BRW_USAGE_SCRATCH, /* 0, 0 */ + BRW_USAGE_SAMPLER, /* SAMPLER, 0 */ + BRW_USAGE_VERTEX, /* VERTEX, 0 */ + BRW_USAGE_SCRATCH, /* 0, 0 */ BRW_USAGE_MAX }; @@ -124,11 +124,11 @@ struct brw_winsys_reloc { struct brw_winsys_buffer *bo; }; -static INLINE void make_reloc( struct brw_winsys_reloc *reloc, - enum brw_buffer_usage usage, - unsigned delta, - unsigned offset, - struct brw_winsys_buffer *bo) +static INLINE void make_reloc(struct brw_winsys_reloc *reloc, + enum brw_buffer_usage usage, + unsigned delta, + unsigned offset, + struct brw_winsys_buffer *bo) { reloc->usage = usage; reloc->delta = delta; @@ -149,27 +149,27 @@ struct brw_winsys_screen { /** * Create a buffer. */ - enum pipe_error (*bo_alloc)( struct brw_winsys_screen *sws, - enum brw_buffer_type type, - unsigned size, - unsigned alignment, - struct brw_winsys_buffer **bo_out ); + enum pipe_error (*bo_alloc)(struct brw_winsys_screen *sws, + enum brw_buffer_type type, + unsigned size, + unsigned alignment, + struct brw_winsys_buffer **bo_out); /* Destroy a buffer when our refcount goes to zero: */ - void (*bo_destroy)( struct brw_winsys_buffer *buffer ); + void (*bo_destroy)(struct brw_winsys_buffer *buffer); /* delta -- added to b2->offset, and written into buffer * offset -- location above value is written to within buffer */ - enum pipe_error (*bo_emit_reloc)( struct brw_winsys_buffer *buffer, - enum brw_buffer_usage usage, - unsigned delta, - unsigned offset, - struct brw_winsys_buffer *b2); + enum pipe_error (*bo_emit_reloc)(struct brw_winsys_buffer *buffer, + enum brw_buffer_usage usage, + unsigned delta, + unsigned offset, + struct brw_winsys_buffer *b2); - enum pipe_error (*bo_exec)( struct brw_winsys_buffer *buffer, - unsigned bytes_used ); + enum pipe_error (*bo_exec)(struct brw_winsys_buffer *buffer, + unsigned bytes_used); enum pipe_error (*bo_subdata)(struct brw_winsys_buffer *buffer, enum brw_buffer_data_type data_type, @@ -181,14 +181,14 @@ struct brw_winsys_screen { boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer); boolean (*bo_references)(struct brw_winsys_buffer *a, - struct brw_winsys_buffer *b); + struct brw_winsys_buffer *b); /* XXX: couldn't this be handled by returning true/false on * bo_emit_reloc? */ - enum pipe_error (*check_aperture_space)( struct brw_winsys_screen *iws, - struct brw_winsys_buffer **buffers, - unsigned count ); + enum pipe_error (*check_aperture_space)(struct brw_winsys_screen *iws, + struct brw_winsys_buffer **buffers, + unsigned count); /** * Map a buffer. @@ -199,11 +199,11 @@ struct brw_winsys_screen { unsigned length, boolean write, boolean discard, - boolean flush_explicit ); + boolean flush_explicit); - void (*bo_flush_range)( struct brw_winsys_buffer *buffer, - unsigned offset, - unsigned length ); + void (*bo_flush_range)(struct brw_winsys_buffer *buffer, + unsigned offset, + unsigned length); /** * Unmap a buffer. @@ -224,7 +224,7 @@ struct brw_winsys_screen { }; static INLINE void * -bo_map_read( struct brw_winsys_screen *sws, struct brw_winsys_buffer *buf ) +bo_map_read(struct brw_winsys_screen *sws, struct brw_winsys_buffer *buf) { return sws->bo_map( buf, BRW_DATA_OTHER, @@ -259,8 +259,8 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen); */ struct pipe_texture; boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture, - struct brw_winsys_buffer **buffer, - unsigned *stride); + struct brw_winsys_buffer **buffer, + unsigned *stride); /** * Wrap a brw_winsys buffer with a texture blanket. @@ -269,9 +269,9 @@ boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture, */ struct pipe_texture * brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, - const struct pipe_texture *template, - const unsigned pitch, - struct brw_winsys_buffer *buffer); + const struct pipe_texture *template, + const unsigned pitch, + struct brw_winsys_buffer *buffer); -- cgit v1.2.3 From 215b49ae1e44cc9ec2fcf7b9ca3711c07bcc95bc Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Fri, 6 Nov 2009 15:54:27 +0000 Subject: i965g: Add functions needed by the winsys --- src/gallium/drivers/i965/brw_screen_texture.c | 117 +++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 8e684aa076..911f4825f2 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -300,8 +300,6 @@ fail: return NULL; } - - static struct pipe_texture *brw_texture_blanket(struct pipe_screen *screen, const struct pipe_texture *templ, const unsigned *stride, @@ -365,7 +363,122 @@ boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen, return FALSE; } +boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture, + struct brw_winsys_buffer **buffer, + unsigned *stride) +{ + struct brw_texture *tex = brw_texture(texture); + + *buffer = tex->bo; + if (stride) + *stride = tex->pitch; + + return TRUE; +} + +struct pipe_texture * +brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, + const struct pipe_texture *templ, + const unsigned pitch, + struct brw_winsys_buffer *buffer) +{ + struct brw_screen *bscreen = brw_screen(screen); + struct brw_texture *tex; + enum brw_buffer_type buffer_type; + enum pipe_error ret; + + if (pf_is_compressed(templ->format)) + return NULL; + + if (pf_is_depth_or_stencil(templ->format)) + return NULL; + + tex = CALLOC_STRUCT(brw_texture); + if (!tex) + return NULL; + + memcpy(&tex->base, templ, sizeof *templ); + pipe_reference_init(&tex->base.reference, 1); + tex->base.screen = screen; + + tex->cpp = pf_get_size(tex->base.format); + + make_empty_list(&tex->views[0]); + make_empty_list(&tex->views[1]); + + if (1) + tex->tiling = BRW_TILING_NONE; + else + tex->tiling = BRW_TILING_X; + + if (!brw_texture_layout(bscreen, tex)) + goto fail; + + + if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_PRIMARY)) { + buffer_type = BRW_BUFFER_TYPE_SCANOUT; + } else { + buffer_type = BRW_BUFFER_TYPE_TEXTURE; + } + + tex->bo = buffer; + + if (tex->pitch != pitch) + goto fail; + + +/* fix this warning + if (tex->size > buffer->size) + goto fail; + */ + + if (ret) + goto fail; + + tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); + tex->ss.ss0.surface_format = translate_tex_format(tex->base.format); + assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID); + + /* This is ok for all textures with channel width 8bit or less: + */ +/* tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + + + /* XXX: what happens when tex->bo->offset changes??? + */ + tex->ss.ss1.base_addr = 0; /* reloc */ + tex->ss.ss2.mip_count = tex->base.last_level; + tex->ss.ss2.width = tex->base.width[0] - 1; + tex->ss.ss2.height = tex->base.height[0] - 1; + + switch (tex->tiling) { + case BRW_TILING_NONE: + tex->ss.ss3.tiled_surface = 0; + tex->ss.ss3.tile_walk = 0; + break; + case BRW_TILING_X: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + break; + case BRW_TILING_Y: + tex->ss.ss3.tiled_surface = 1; + tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR; + break; + } + + tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1; + tex->ss.ss3.depth = tex->base.depth[0] - 1; + + tex->ss.ss4.min_lod = 0; + return &tex->base; + +fail: + FREE(tex); + return NULL; +} void brw_screen_tex_init( struct brw_screen *brw_screen ) { -- cgit v1.2.3 From 2eb6b0defe65b01a7ed1562c2f16c17125242c16 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Fri, 6 Nov 2009 16:40:54 +0000 Subject: i965g: Fix texture blanket function --- src/gallium/drivers/i965/brw_screen_texture.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 911f4825f2..adc0aaa8a9 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -387,12 +387,14 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, enum brw_buffer_type buffer_type; enum pipe_error ret; - if (pf_is_compressed(templ->format)) + if (templ->target != PIPE_TEXTURE_2D || + templ->last_level != 0 || + templ->depth[0] != 1) return NULL; - if (pf_is_depth_or_stencil(templ->format)) + if (pf_is_compressed(templ->format)) return NULL; - + tex = CALLOC_STRUCT(brw_texture); if (!tex) return NULL; @@ -408,6 +410,9 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, if (1) tex->tiling = BRW_TILING_NONE; + else if (bscreen->chipset.is_965 && + pf_is_depth_or_stencil(templ->format)) + tex->tiling = BRW_TILING_Y; else tex->tiling = BRW_TILING_X; @@ -424,17 +429,13 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, tex->bo = buffer; - if (tex->pitch != pitch) - goto fail; - + tex->pitch = pitch; -/* fix this warning + /* fix this warning */ +#if 0 if (tex->size > buffer->size) goto fail; - */ - - if (ret) - goto fail; +#endif tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; tex->ss.ss0.surface_type = translate_tex_target(tex->base.target); -- cgit v1.2.3 From 4fbe6c4e4e754e0e850165d5a303990515ceaba6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 14:20:04 +0000 Subject: i965g: get rid of cc key, simplify state upload Keep a valid reloc table active between uploads, avoid recalculating it every time. --- src/gallium/drivers/i965/brw_cc.c | 155 +++++++-------------------------- src/gallium/drivers/i965/brw_context.c | 8 +- src/gallium/drivers/i965/brw_context.h | 13 ++- src/gallium/drivers/i965/brw_pipe_fb.c | 4 + 4 files changed, 51 insertions(+), 129 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c index f05728ea5d..3e070f5591 100644 --- a/src/gallium/drivers/i965/brw_cc.c +++ b/src/gallium/drivers/i965/brw_cc.c @@ -35,48 +35,13 @@ #include "brw_defines.h" -struct sane_viewport { - float top; - float left; - float width; - float height; - float near; - float far; -}; - -static void calc_sane_viewport( const struct pipe_viewport_state *vp, - struct sane_viewport *svp ) -{ - /* XXX fix me, obviously. - */ - svp->top = 0; - svp->left = 0; - svp->width = 250; - svp->height = 250; - svp->near = 0; - svp->far = 1; -} - static enum pipe_error prepare_cc_vp( struct brw_context *brw ) { - struct brw_cc_viewport ccv; - struct sane_viewport svp; - enum pipe_error ret; - - memset(&ccv, 0, sizeof(ccv)); - - /* PIPE_NEW_VIEWPORT */ - calc_sane_viewport( &brw->curr.viewport, &svp ); - - ccv.min_depth = svp.near; - ccv.max_depth = svp.far; - - ret = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0, - &brw->cc.vp_bo ); - if (ret) - return ret; - - return PIPE_OK; + return brw_cache_data( &brw->cache, + BRW_CC_VP, + &brw->curr.ccv, + NULL, 0, + &brw->cc.reloc[CC_RELOC_VP].bo ); } const struct brw_tracked_state brw_cc_vp = { @@ -88,15 +53,6 @@ const struct brw_tracked_state brw_cc_vp = { .prepare = prepare_cc_vp }; -struct brw_cc_unit_key { - struct brw_cc0 cc0; - struct brw_cc1 cc1; - struct brw_cc2 cc2; - struct brw_cc3 cc3; - struct brw_cc5 cc5; - struct brw_cc6 cc6; - struct brw_cc7 cc7; -}; /* A long-winded way to OR two unsigned integers together: */ @@ -110,85 +66,22 @@ combine_cc3( struct brw_cc3 a, struct brw_cc3 b ) return ca.cc3; } -static void -cc_unit_populate_key(const struct brw_context *brw, - struct brw_cc_unit_key *key) -{ - key->cc0 = brw->curr.zstencil->cc0; - key->cc1 = brw->curr.zstencil->cc1; - key->cc2 = brw->curr.zstencil->cc2; - key->cc3 = combine_cc3( brw->curr.zstencil->cc3, brw->curr.blend->cc3 ); - key->cc5 = brw->curr.blend->cc5; - key->cc6 = brw->curr.blend->cc6; - key->cc7 = brw->curr.zstencil->cc7; -} - -/** - * Creates the state cache entry for the given CC unit key. - */ -static enum pipe_error -cc_unit_create_from_key(struct brw_context *brw, - struct brw_cc_unit_key *key, - struct brw_winsys_reloc *reloc, - struct brw_winsys_buffer **bo_out) -{ - struct brw_cc_unit_state cc; - enum pipe_error ret; - - memset(&cc, 0, sizeof(cc)); - - cc.cc0 = key->cc0; - cc.cc1 = key->cc1; - cc.cc2 = key->cc2; - cc.cc3 = key->cc3; - - cc.cc4.cc_viewport_state_offset = 0; - - cc.cc5 = key->cc5; - cc.cc6 = key->cc6; - cc.cc7 = key->cc7; - - ret = brw_upload_cache(&brw->cache, BRW_CC_UNIT, - key, sizeof(*key), - reloc, 1, - &cc, sizeof(cc), - NULL, NULL, - bo_out); - if (ret) - return ret; - - return PIPE_OK; -} static int prepare_cc_unit( struct brw_context *brw ) { - struct brw_cc_unit_key key; - struct brw_winsys_reloc reloc[1]; - enum pipe_error ret; - - cc_unit_populate_key(brw, &key); - - /* CACHE_NEW_CC_VP */ - make_reloc(&reloc[0], - BRW_USAGE_STATE, - 0, - offsetof(struct brw_cc_unit_state, cc4), - brw->cc.vp_bo); - - if (brw_search_cache(&brw->cache, BRW_CC_UNIT, - &key, sizeof(key), - reloc, 1, - NULL, - &brw->cc.state_bo)) - return PIPE_OK; - - ret = cc_unit_create_from_key(brw, &key, - reloc, - &brw->cc.state_bo); - if (ret) - return ret; + brw->cc.cc.cc0 = brw->curr.zstencil->cc0; + brw->cc.cc.cc1 = brw->curr.zstencil->cc1; + brw->cc.cc.cc2 = brw->curr.zstencil->cc2; + brw->cc.cc.cc3 = combine_cc3( brw->curr.zstencil->cc3, brw->curr.blend->cc3 ); - return PIPE_OK; + brw->cc.cc.cc5 = brw->curr.blend->cc5; + brw->cc.cc.cc6 = brw->curr.blend->cc6; + brw->cc.cc.cc7 = brw->curr.zstencil->cc7; + + return brw_cache_data_sz(&brw->cache, BRW_CC_UNIT, + &brw->cc.cc, sizeof(brw->cc.cc), + brw->cc.reloc, 1, + &brw->cc.state_bo); } const struct brw_tracked_state brw_cc_unit = { @@ -201,4 +94,18 @@ const struct brw_tracked_state brw_cc_unit = { }; +void brw_hw_cc_init( struct brw_context *brw ) +{ + make_reloc(&brw->cc.reloc[0], + BRW_USAGE_STATE, + 0, + offsetof(struct brw_cc_unit_state, cc4), + NULL); +} + +void brw_hw_cc_cleanup( struct brw_context *brw ) +{ + bo_reference(&brw->cc.state_bo, NULL); + bo_reference(&brw->cc.reloc[0].bo, NULL); +} diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c index f85116a568..e67551882d 100644 --- a/src/gallium/drivers/i965/brw_context.c +++ b/src/gallium/drivers/i965/brw_context.c @@ -65,6 +65,9 @@ static void brw_destroy_context( struct pipe_context *pipe ) brw_pipe_vertex_cleanup( brw ); brw_pipe_clear_cleanup( brw ); + brw_hw_cc_cleanup( brw ); + + FREE(brw->wm.compile_data); for (i = 0; i < brw->curr.fb.nr_cbufs; i++) @@ -96,9 +99,6 @@ static void brw_destroy_context( struct pipe_context *pipe ) bo_reference(&brw->wm.sampler_bo, NULL); bo_reference(&brw->wm.prog_bo, NULL); bo_reference(&brw->wm.state_bo, NULL); - bo_reference(&brw->cc.prog_bo, NULL); - bo_reference(&brw->cc.state_bo, NULL); - bo_reference(&brw->cc.vp_bo, NULL); } @@ -128,6 +128,8 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen) brw_pipe_vertex_init( brw ); brw_pipe_clear_init( brw ); + brw_hw_cc_init( brw ); + brw_init_state( brw ); brw_draw_init( brw ); diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index f53b92d4f5..4a975ecd7e 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -481,6 +481,8 @@ struct brw_query_object { uint64_t result; }; +#define CC_RELOC_VP 0 + /** * brw_context is derived from pipe_context @@ -525,6 +527,7 @@ struct brw_context struct brw_blend_constant_color bcc; struct brw_polygon_stipple bps; + struct brw_cc_viewport ccv; /** * Index buffer for this draw_prims call. @@ -708,9 +711,10 @@ struct brw_context struct { - struct brw_winsys_buffer *prog_bo; struct brw_winsys_buffer *state_bo; - struct brw_winsys_buffer *vp_bo; + + struct brw_cc_unit_state cc; + struct brw_winsys_reloc reloc[1]; } cc; struct { @@ -764,6 +768,7 @@ void brw_pipe_shader_init( struct brw_context *brw ); void brw_pipe_vertex_init( struct brw_context *brw ); void brw_pipe_clear_init( struct brw_context *brw ); + void brw_pipe_blend_cleanup( struct brw_context *brw ); void brw_pipe_depth_stencil_cleanup( struct brw_context *brw ); void brw_pipe_framebuffer_cleanup( struct brw_context *brw ); @@ -776,6 +781,10 @@ void brw_pipe_shader_cleanup( struct brw_context *brw ); void brw_pipe_vertex_cleanup( struct brw_context *brw ); void brw_pipe_clear_cleanup( struct brw_context *brw ); +void brw_hw_cc_init( struct brw_context *brw ); +void brw_hw_cc_cleanup( struct brw_context *brw ); + + void brw_context_flush( struct brw_context *brw ); diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index f65f45fb84..1511220447 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -49,7 +49,11 @@ static void brw_set_viewport_state( struct pipe_context *pipe, const struct pipe_viewport_state *viewport ) { struct brw_context *brw = brw_context(pipe); + brw->curr.viewport = *viewport; + brw->curr.ccv.min_depth = 0.0; /* XXX: near */ + brw->curr.ccv.max_depth = 1.0; /* XXX: far */ + brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT; } -- cgit v1.2.3 From 833f5bbfafee00ad44085e121eea0a2579eb3459 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Fri, 6 Nov 2009 18:34:23 +0000 Subject: i965g: First clear! --- src/gallium/drivers/i965/brw_screen_texture.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index adc0aaa8a9..842c70a39a 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -384,8 +384,6 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, { struct brw_screen *bscreen = brw_screen(screen); struct brw_texture *tex; - enum brw_buffer_type buffer_type; - enum pipe_error ret; if (templ->target != PIPE_TEXTURE_2D || templ->last_level != 0 || @@ -419,17 +417,13 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, if (!brw_texture_layout(bscreen, tex)) goto fail; - - if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | - PIPE_TEXTURE_USAGE_PRIMARY)) { - buffer_type = BRW_BUFFER_TYPE_SCANOUT; - } else { - buffer_type = BRW_BUFFER_TYPE_TEXTURE; - } + /* XXX Maybe some more checks? */ + if ((pitch / tex->cpp) < tex->pitch) + goto fail; - tex->bo = buffer; + tex->pitch = pitch / tex->cpp; - tex->pitch = pitch; + tex->bo = buffer; /* fix this warning */ #if 0 -- cgit v1.2.3 From dc97a5d782b01d530bb7cbe6e76625f969259e32 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Fri, 6 Nov 2009 19:05:41 +0000 Subject: i965g: Match pitch modification on get buffer as well This is a ugly hack in order to match what the intel X driver gives us. However putting this in the winsys where it fits better forces it reach more into the driver then it already does. --- src/gallium/drivers/i965/brw_screen_texture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 842c70a39a..dbefbfc5cc 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -371,7 +371,7 @@ boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture, *buffer = tex->bo; if (stride) - *stride = tex->pitch; + *stride = tex->pitch * tex->cpp; return TRUE; } -- cgit v1.2.3 From 9708ce874edb140ddffd44ddbb33011273a8ded9 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Fri, 6 Nov 2009 20:19:39 +0000 Subject: i965g: Add texture transfer functions They don't seem to work. Maybe we are forgetting to flush the gpu or something. --- src/gallium/drivers/i965/brw_screen_texture.c | 95 +++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index dbefbfc5cc..75bb8a73b7 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -363,6 +363,97 @@ boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen, return FALSE; } + +/* + * Transfer functions + */ + +static struct pipe_transfer* +brw_get_tex_transfer(struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct brw_texture *tex = brw_texture(texture); + struct brw_transfer *trans; + unsigned offset; /* in bytes */ + + if (texture->target == PIPE_TEXTURE_CUBE) { + offset = tex->image_offset[level][face]; + } else if (texture->target == PIPE_TEXTURE_3D) { + offset = tex->image_offset[level][zslice]; + } else { + offset = tex->image_offset[level][0]; + assert(face == 0); + assert(zslice == 0); + } + + trans = CALLOC_STRUCT(brw_transfer); + if (trans) { + pipe_texture_reference(&trans->base.texture, texture); + trans->base.format = trans->base.format; + trans->base.x = x; + trans->base.y = y; + trans->base.width = w; + trans->base.height = h; + trans->base.block = texture->block; + trans->base.nblocksx = texture->nblocksx[level]; + trans->base.nblocksy = texture->nblocksy[level]; + trans->base.stride = tex->pitch * tex->cpp; + trans->offset = offset; + trans->base.usage = usage; + } + return &trans->base; +} + +static void * +brw_transfer_map(struct pipe_screen *screen, + struct pipe_transfer *transfer) +{ + struct brw_texture *tex = brw_texture(transfer->texture); + struct brw_winsys_screen *sws = brw_screen(screen)->sws; + char *map; + unsigned usage = transfer->usage; + + map = sws->bo_map(tex->bo, + BRW_DATA_OTHER, + 0, + tex->bo->size, + (usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE, + (usage & 0) ? TRUE : FALSE, + (usage & 0) ? TRUE : FALSE); + + if (!map) + return NULL; + + return map + brw_transfer(transfer)->offset + + transfer->y / transfer->block.height * transfer->stride + + transfer->x / transfer->block.width * transfer->block.size; +} + +static void +brw_transfer_unmap(struct pipe_screen *screen, + struct pipe_transfer *transfer) +{ + struct brw_texture *tex = brw_texture(transfer->texture); + struct brw_winsys_screen *sws = brw_screen(screen)->sws; + + sws->bo_unmap(tex->bo); +} + +static void +brw_tex_transfer_destroy(struct pipe_transfer *trans) +{ + pipe_texture_reference(&trans->texture, NULL); + FREE(trans); +} + + +/* + * Functions exported to the winsys + */ + boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture, struct brw_winsys_buffer **buffer, unsigned *stride) @@ -481,4 +572,8 @@ void brw_screen_tex_init( struct brw_screen *brw_screen ) brw_screen->base.texture_create = brw_texture_create; brw_screen->base.texture_destroy = brw_texture_destroy; brw_screen->base.texture_blanket = brw_texture_blanket; + brw_screen->base.get_tex_transfer = brw_get_tex_transfer; + brw_screen->base.transfer_map = brw_transfer_map; + brw_screen->base.transfer_unmap = brw_transfer_unmap; + brw_screen->base.tex_transfer_destroy = brw_tex_transfer_destroy; } -- cgit v1.2.3 From 229f6b9a7e699b814e07ba762de97a5ebcffce51 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sat, 7 Nov 2009 15:47:21 +0000 Subject: i965g: Formalize on S8Z24 as the suported depth format --- src/gallium/drivers/i965/brw_misc_state.c | 3 ++- src/gallium/drivers/i965/brw_pipe_clear.c | 7 ------- src/gallium/drivers/i965/brw_screen.c | 2 ++ src/gallium/drivers/i965/brw_screen_texture.c | 4 ---- 4 files changed, 4 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c index 4dd73636fd..e4b24229db 100644 --- a/src/gallium/drivers/i965/brw_misc_state.c +++ b/src/gallium/drivers/i965/brw_misc_state.c @@ -265,7 +265,8 @@ static int emit_depthbuffer(struct brw_context *brw) format = BRW_DEPTHFORMAT_D16_UNORM; cpp = 2; break; - case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; cpp = 4; break; diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c index 34cad62977..f846b4342c 100644 --- a/src/gallium/drivers/i965/brw_pipe_clear.c +++ b/src/gallium/drivers/i965/brw_pipe_clear.c @@ -135,8 +135,6 @@ static void zstencil_clear(struct brw_context *brw, unsigned value; switch (bsurface->base.format) { - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8Z24_UNORM: value = ((unsigned)(depth * MASK24) & MASK24); @@ -150,11 +148,6 @@ static void zstencil_clear(struct brw_context *brw, } switch (bsurface->base.format) { - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - value = (value << 8) | stencil; - break; - case PIPE_FORMAT_X8Z24_UNORM: case PIPE_FORMAT_S8Z24_UNORM: value = value | (stencil << 24); diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 575a418b7d..af885320a7 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -210,12 +210,14 @@ brw_is_format_supported(struct pipe_screen *screen, PIPE_FORMAT_A8L8_UNORM, PIPE_FORMAT_YCBCR, PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_X8Z24_UNORM, PIPE_FORMAT_S8Z24_UNORM, PIPE_FORMAT_NONE /* list terminator */ }; static const enum pipe_format surface_supported[] = { PIPE_FORMAT_A8R8G8B8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_X8Z24_UNORM, PIPE_FORMAT_S8Z24_UNORM, PIPE_FORMAT_NONE /* list terminator */ }; diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 75bb8a73b7..9ca60b46d3 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -142,12 +142,8 @@ static GLuint translate_tex_format( enum pipe_format pf ) case PIPE_FORMAT_DXT1_SRGB: return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; - /* XXX: which pipe depth formats does i965 suppport - */ case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: return BRW_SURFACEFORMAT_I24X8_UNORM; #if 0 -- cgit v1.2.3 From cbad97b68504a64650cb77bad96962310ab9c7f9 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Sat, 7 Nov 2009 17:47:00 +0000 Subject: i965g: Fixup texture formats --- src/gallium/drivers/i965/brw_screen.c | 45 +++++++++++++++--- src/gallium/drivers/i965/brw_screen_texture.c | 66 +++++++++++++++------------ 2 files changed, 74 insertions(+), 37 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index af885320a7..05da72ebb2 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -201,31 +201,62 @@ brw_is_format_supported(struct pipe_screen *screen, unsigned geom_flags) { static const enum pipe_format tex_supported[] = { - PIPE_FORMAT_R8G8B8A8_UNORM, - PIPE_FORMAT_A8R8G8B8_UNORM, - PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_L8_UNORM, - PIPE_FORMAT_A8_UNORM, PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_L16_UNORM, + /*PIPE_FORMAT_I16_UNORM,*/ + /*PIPE_FORMAT_A16_UNORM,*/ PIPE_FORMAT_A8L8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_A1R5G5B5_UNORM, + PIPE_FORMAT_A4R4G4B4_UNORM, + PIPE_FORMAT_X8R8G8B8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + /* video */ PIPE_FORMAT_YCBCR, PIPE_FORMAT_YCBCR_REV, + /* compressed */ + /*PIPE_FORMAT_FXT1_RGBA,*/ + PIPE_FORMAT_DXT1_RGB, + PIPE_FORMAT_DXT1_RGBA, + PIPE_FORMAT_DXT3_RGBA, + PIPE_FORMAT_DXT5_RGBA, + /* sRGB */ + PIPE_FORMAT_R8G8B8A8_SRGB, + PIPE_FORMAT_A8L8_SRGB, + PIPE_FORMAT_L8_SRGB, + PIPE_FORMAT_DXT1_SRGB, + /* depth */ + PIPE_FORMAT_Z32_FLOAT, PIPE_FORMAT_X8Z24_UNORM, PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_Z16_UNORM, + /* signed */ + PIPE_FORMAT_R8G8_SNORM, + PIPE_FORMAT_R8G8B8A8_SNORM, PIPE_FORMAT_NONE /* list terminator */ }; - static const enum pipe_format surface_supported[] = { + static const enum pipe_format render_supported[] = { + PIPE_FORMAT_X8R8G8B8_UNORM, PIPE_FORMAT_A8R8G8B8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + static const enum pipe_format depth_supported[] = { + PIPE_FORMAT_Z32_FLOAT, PIPE_FORMAT_X8Z24_UNORM, PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_Z16_UNORM, PIPE_FORMAT_NONE /* list terminator */ }; const enum pipe_format *list; uint i; - if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) - list = surface_supported; + if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) + list = depth_supported; + else if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + list = render_supported; else list = tex_supported; diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 9ca60b46d3..666ec70d42 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -73,16 +73,19 @@ static GLuint translate_tex_format( enum pipe_format pf ) case PIPE_FORMAT_A8_UNORM: return BRW_SURFACEFORMAT_A8_UNORM; - case PIPE_FORMAT_A8L8_UNORM: - return BRW_SURFACEFORMAT_L8A8_UNORM; + case PIPE_FORMAT_L16_UNORM: + return BRW_SURFACEFORMAT_L16_UNORM; - case PIPE_FORMAT_A8R8G8B8_UNORM: /* XXX */ - case PIPE_FORMAT_B8G8R8A8_UNORM: /* XXX */ - case PIPE_FORMAT_R8G8B8A8_UNORM: /* XXX */ - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + /* XXX: Add these to gallium + case PIPE_FORMAT_I16_UNORM: + return BRW_SURFACEFORMAT_I16_UNORM; - case PIPE_FORMAT_R8G8B8X8_UNORM: - return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; + case PIPE_FORMAT_A16_UNORM: + return BRW_SURFACEFORMAT_A16_UNORM; + */ + + case PIPE_FORMAT_A8L8_UNORM: + return BRW_SURFACEFORMAT_L8A8_UNORM; case PIPE_FORMAT_R5G6B5_UNORM: return BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -93,19 +96,15 @@ static GLuint translate_tex_format( enum pipe_format pf ) case PIPE_FORMAT_A4R4G4B4_UNORM: return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; - case PIPE_FORMAT_L16_UNORM: - return BRW_SURFACEFORMAT_L16_UNORM; - - /* XXX: Z texturing: - case PIPE_FORMAT_I16_UNORM: - return BRW_SURFACEFORMAT_I16_UNORM; - */ + case PIPE_FORMAT_A8R8G8B8_UNORM: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - /* XXX: Z texturing: - case PIPE_FORMAT_A16_UNORM: - return BRW_SURFACEFORMAT_A16_UNORM; - */ + /* + * Video formats + */ case PIPE_FORMAT_YCBCR_REV: return BRW_SURFACEFORMAT_YCRCB_NORMAL; @@ -113,6 +112,9 @@ static GLuint translate_tex_format( enum pipe_format pf ) case PIPE_FORMAT_YCBCR: return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; + /* + * Compressed formats. + */ /* XXX: Add FXT to gallium? case PIPE_FORMAT_FXT1_RGBA: return BRW_SURFACEFORMAT_FXT1; @@ -130,6 +132,10 @@ static GLuint translate_tex_format( enum pipe_format pf ) case PIPE_FORMAT_DXT5_RGBA: return BRW_SURFACEFORMAT_BC3_UNORM; + /* + * sRGB formats + */ + case PIPE_FORMAT_R8G8B8A8_SRGB: return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; @@ -142,24 +148,24 @@ static GLuint translate_tex_format( enum pipe_format pf ) case PIPE_FORMAT_DXT1_SRGB: return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; + /* + * Depth formats + */ + + case PIPE_FORMAT_Z16_UNORM: + return BRW_SURFACEFORMAT_I16_UNORM; + case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: return BRW_SURFACEFORMAT_I24X8_UNORM; -#if 0 - /* XXX: these different surface formats don't seem to - * make any difference for shadow sampler/compares. - */ - if (depth_mode == GL_INTENSITY) - return BRW_SURFACEFORMAT_I24X8_UNORM; - else if (depth_mode == GL_ALPHA) - return BRW_SURFACEFORMAT_A24X8_UNORM; - else - return BRW_SURFACEFORMAT_L24X8_UNORM; -#endif + case PIPE_FORMAT_Z32_FLOAT: + return BRW_SURFACEFORMAT_I32_FLOAT; /* XXX: presumably for bump mapping. Add this to mesa state * tracker? + * + * XXX: Add flipped versions of these formats to Gallium. */ case PIPE_FORMAT_R8G8_SNORM: return BRW_SURFACEFORMAT_R8G8_SNORM; -- cgit v1.2.3 From e243279a48d68c0a14fbf2b78d99b6a9e72c87b6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 21:17:08 +0000 Subject: i965g: avoid use of internally generated immediates Currently not working, so don't generate more of them. --- src/gallium/drivers/i965/brw_wm_fp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 2a207958eb..0df84f8546 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -906,8 +906,11 @@ find_output_by_semantic( struct brw_wm_compile *c, return src_reg( TGSI_FILE_OUTPUT, i ); /* If not found, return some arbitrary immediate value: + * + * XXX: this is a good idea but immediates are up generating extra + * curbe entries atm, as they would have in the original driver. */ - return src_imm1f(c, 1.0); + return src_reg( TGSI_FILE_OUTPUT, 0 ); /* src_imm1f(c, 1.0); */ } -- cgit v1.2.3 From d86e9079a8eb2e84cb231fdbca4b74d744198afb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 23:29:47 +0000 Subject: i965g: fix off-by-one on curbe upload --- src/gallium/drivers/i965/brw_curbe.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 0a5cfcc7cf..79ebac9d15 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -224,9 +224,13 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; - GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT]; + GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT] + 1; struct pipe_screen *screen = brw->base.screen; + /* XXX: note that constant buffers are currently *already* in + * buffer objects. If we want to keep on putting them into the + * curbe, makes sense to treat constbuf's specially with malloc. + */ const GLfloat *value = screen->buffer_map( screen, brw->curr.vertex_constants, PIPE_BUFFER_USAGE_CPU_READ); @@ -272,8 +276,10 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) } if (brw->curbe.curbe_bo == NULL) { - /* Allocate a single page for CURBE entries for this batchbuffer. - * They're generally around 64b. + /* Allocate a single page for CURBE entries for this + * batchbuffer. They're generally around 64b. We will + * discard the curbe buffer after the batch is flushed to + * avoid synchronous updates. */ ret = brw->sws->bo_alloc(brw->sws, BRW_BUFFER_TYPE_CURBE, @@ -292,8 +298,8 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) /* Copy data to the buffer: */ brw->sws->bo_subdata(brw->curbe.curbe_bo, + BRW_DATA_CONSTANT_BUFFER, brw->curbe.curbe_offset, - BRW_DATA_OTHER, bufsz, buf, NULL, 0); -- cgit v1.2.3 From e4d174ea1b36976f3dd255ee6b86207ad5b25c31 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 23:31:49 +0000 Subject: i965g: smaller upload buffer for index data --- src/gallium/drivers/i965/brw_draw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 84803e43be..45d5ade1fc 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -268,7 +268,7 @@ boolean brw_draw_init( struct brw_context *brw ) return FALSE; brw->vb.upload_index = u_upload_create( brw->base.screen, - 128 * 1024, + 32 * 1024, 64, PIPE_BUFFER_USAGE_INDEX ); if (brw->vb.upload_index == NULL) -- cgit v1.2.3 From a010307e0adc2c0d4ef586c81507d99ef15c2142 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Nov 2009 23:34:33 +0000 Subject: i965g: some more cases where file_max[] is used without obligatory +1 --- src/gallium/drivers/i965/brw_vs_emit.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index e0fadc8dce..26f0ec5a11 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -82,15 +82,15 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) * works if everything fits in the GRF. * XXX this heuristic/check may need some fine tuning... */ - if (c->vp->info.file_max[TGSI_FILE_CONSTANT] + - c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + - c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 > BRW_MAX_GRF) + if (c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1 + + c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 + + c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 > BRW_MAX_GRF) c->vp->use_const_buffer = GL_TRUE; else { /* XXX: immediates can go elsewhere if necessary: */ - assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + - c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 <= BRW_MAX_GRF); + assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 + + c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 <= BRW_MAX_GRF); c->vp->use_const_buffer = GL_FALSE; } -- cgit v1.2.3 From 0c547d63c497f06c38f7a3c000e478bdcf2594b6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 9 Nov 2009 18:04:58 -0800 Subject: i965g: skip over vertex position output when preallocating FS inputs --- src/gallium/drivers/i965/brw_wm_pass2.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c index a5574bd1a3..2a879863ab 100644 --- a/src/gallium/drivers/i965/brw_wm_pass2.c +++ b/src/gallium/drivers/i965/brw_wm_pass2.c @@ -83,6 +83,10 @@ static void init_registers( struct brw_wm_compile *c ) for (j = 0; j < c->nr_creg; j++) prealloc_reg(c, &c->creg[j], reg++); + reg++; /* XXX: skip over position output */ + + /* XXX: currently just hope the VS outputs line up with FS inputs: + */ for (j = 0; j < c->key.vp_nr_outputs; j++) prealloc_reg(c, &c->payload.input_interp[j], reg++); -- cgit v1.2.3 From 2f54d02d205468a840b35a3554f2ad8ffc31ec9c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 10 Nov 2009 18:07:11 -0800 Subject: i965g: consult fs inputs when laying out vs output regs Vertex shader now emits just the FS inputs, in the positions and order expected by the fragment shader. This means potentially regenerating the vertex shader to match different fragment shader's input layouts. --- src/gallium/drivers/i965/brw_context.h | 13 ++++ src/gallium/drivers/i965/brw_pipe_shader.c | 6 ++ src/gallium/drivers/i965/brw_vs.c | 14 ++-- src/gallium/drivers/i965/brw_vs.h | 7 +- src/gallium/drivers/i965/brw_vs_emit.c | 116 ++++++++++++++++++++--------- 5 files changed, 113 insertions(+), 43 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 4a975ecd7e..31f3cf3685 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -161,11 +161,24 @@ struct brw_vertex_shader { GLboolean use_const_buffer; }; +struct brw_fs_signature { + GLuint nr_inputs; + struct { + GLuint semantic:5; + GLuint semantic_index:27; + } input[PIPE_MAX_SHADER_INPUTS]; +}; + +#define brw_fs_signature_size(s) (offsetof(struct brw_fs_signature, input) + \ + ((s)->nr_inputs * sizeof (s)->input[0])) + struct brw_fragment_shader { const struct tgsi_token *tokens; struct tgsi_shader_info info; + struct brw_fs_signature signature; + unsigned iz_lookup; //unsigned wm_lookup; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 44f9ad6f9c..7febf9e0c2 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -96,6 +96,12 @@ static void *brw_create_fs_state( struct pipe_context *pipe, tgsi_scan_shader(fs->tokens, &fs->info); + fs->signature.nr_inputs = fs->info.num_inputs; + for (i = 0; i < fs->info.num_inputs; i++) { + fs->signature.input[i].semantic = fs->info.input_semantic_name[i]; + fs->signature.input[i].semantic_index = fs->info.input_semantic_index[i]; + } + for (i = 0; i < fs->info.num_inputs; i++) if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION) fs->uses_depth = 1; diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index 966940ceac..05a62ed974 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -90,22 +90,24 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw) { struct brw_vs_prog_key key; struct brw_vertex_shader *vp = brw->curr.vertex_shader; + struct brw_fragment_shader *fs = brw->curr.fragment_shader; enum pipe_error ret; memset(&key, 0, sizeof(key)); - /* Just upload the program verbatim for now. Always send it all - * the inputs it asks for, whether they are varying or not. - */ key.program_string_id = vp->id; key.nr_userclip = brw->curr.ucp.nr; key.copy_edgeflag = (brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL || brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL); + memcpy(&key.fs_signature, &fs->signature, + brw_fs_signature_size(&fs->signature)); + + /* Make an early check for the key. */ if (brw_search_cache(&brw->cache, BRW_VS_PROG, - &key, sizeof(key), + &key, brw_vs_prog_key_size(&key), NULL, 0, &brw->vs.prog_data, &brw->vs.prog_bo)) @@ -123,7 +125,9 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = PIPE_NEW_CLIP | PIPE_NEW_RAST, + .mesa = (PIPE_NEW_CLIP | + PIPE_NEW_RAST | + PIPE_NEW_FRAGMENT_SHADER), .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h index b4e450d89b..3d1598d02b 100644 --- a/src/gallium/drivers/i965/brw_vs.h +++ b/src/gallium/drivers/i965/brw_vs.h @@ -43,8 +43,11 @@ struct brw_vs_prog_key { GLuint nr_userclip:4; GLuint copy_edgeflag:1; GLuint pad:26; + struct brw_fs_signature fs_signature; }; +#define brw_vs_prog_key_size(s) (offsetof(struct brw_vs_prog_key, fs_signature) + \ + brw_fs_signature_size(&(s)->fs_signature)) #define MAX_IF_DEPTH 32 @@ -65,8 +68,8 @@ struct brw_vs_compile { GLboolean copy_edgeflag; - GLuint first_output; - GLuint first_overflow_output; /**< VERT_ATTRIB_x */ + GLuint overflow_grf_start; + GLuint overflow_count; GLuint first_tmp; GLuint last_tmp; diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 26f0ec5a11..933c9c4d63 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -66,6 +66,38 @@ static void release_tmps( struct brw_vs_compile *c ) } +static boolean is_position_output( struct brw_vs_compile *c, + unsigned vs_output ) +{ + struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + + return (semantic == TGSI_SEMANTIC_POSITION && + index == 0); +} + + +static boolean find_output_slot( struct brw_vs_compile *c, + unsigned vs_output, + unsigned *fs_input_slot ) +{ + struct brw_vertex_shader *vs = c->vp; + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + unsigned i; + + for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { + if (c->key.fs_signature.input[i].semantic == semantic && + c->key.fs_signature.input[i].semantic_index == index) { + *fs_input_slot = i; + return TRUE; + } + } + + return FALSE; +} + /** * Preallocate GRF register before code emit. @@ -172,42 +204,50 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Allocate outputs. The non-position outputs go straight into message regs. */ c->nr_outputs = c->prog_data.nr_outputs; - c->first_output = reg; - c->first_overflow_output = 0; if (c->chipset.is_igdng) mrf = 8; else mrf = 4; + + if (c->key.fs_signature.nr_inputs > BRW_MAX_MRF) { + c->overflow_grf_start = reg; + c->overflow_count = c->key.fs_signature.nr_inputs - BRW_MAX_MRF; + reg += c->overflow_count; + } + /* XXX: need to access vertex output semantics here: */ for (i = 0; i < c->prog_data.nr_outputs; i++) { - assert(i < Elements(c->regs[TGSI_FILE_OUTPUT])); + unsigned slot; - /* XXX: Hardwire position to zero: - */ - if (i == 0) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; - } - /* XXX: disable psiz: + /* XXX: Put output position in slot zero always. Clipper, etc, + * need access to this reg. */ - else if (0) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + if (is_position_output(c, i)) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); /* copy to mrf 0 */ reg++; - mrf++; /* just a placeholder? XXX fix later stages & remove this */ } - else if (mrf < 16) { - c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); - mrf++; + else if (find_output_slot(c, i, &slot)) { + + if (0 /* is_psize_output(c, i) */ ) { + /* c->psize_out.grf = reg; */ + /* c->psize_out.mrf = i; */ + } + + /* The first (16-4) outputs can go straight into the message regs. + */ + if (slot + mrf < BRW_MAX_MRF) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(slot + mrf); + } + else { + int grf = c->overflow_grf_start + slot - BRW_MAX_MRF; + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(grf, 0); + } } else { - /* too many vertex results to fit in MRF, use GRF for overflow */ - if (!c->first_overflow_output) - c->first_overflow_output = i; - c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); - reg++; + c->regs[TGSI_FILE_OUTPUT][i] = brw_null_reg(); } } @@ -1072,6 +1112,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS]; struct brw_reg ndc; int eot; + int i; GLuint len_vertext_header = 2; if (c->key.copy_edgeflag) { @@ -1167,7 +1208,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) len_vertext_header = 2; } - eot = (c->first_overflow_output == 0); + eot = (c->overflow_count == 0); brw_urb_WRITE(p, brw_null_reg(), /* dest */ @@ -1182,19 +1223,22 @@ static void emit_vertex_write( struct brw_vs_compile *c) 0, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); - if (c->first_overflow_output > 0) { - /* Not all of the vertex outputs/results fit into the MRF. - * Move the overflowed attributes from the GRF to the MRF and - * issue another brw_urb_WRITE(). - */ + /* Not all of the vertex outputs/results fit into the MRF. + * Move the overflowed attributes from the GRF to the MRF and + * issue another brw_urb_WRITE(). + */ + for (i = 0; i < c->overflow_count; i += BRW_MAX_MRF) { + unsigned nr = MIN2(c->overflow_count - i, BRW_MAX_MRF); + GLuint j; + + eot = (i + nr >= c->overflow_count); + /* XXX I'm not 100% sure about which MRF regs to use here. Starting * at mrf[4] atm... */ - GLuint i, mrf = 0; - for (i = c->first_overflow_output; i < c->prog_data.nr_outputs; i++) { - /* move from GRF to MRF */ - brw_MOV(p, brw_message_reg(4+mrf), c->regs[TGSI_FILE_OUTPUT][i]); - mrf++; + for (j = 0; j < nr; j++) { + brw_MOV(p, brw_message_reg(4+j), + brw_vec8_grf(c->overflow_grf_start + i + j, 0)); } brw_urb_WRITE(p, @@ -1203,11 +1247,11 @@ static void emit_vertex_write( struct brw_vs_compile *c) c->r0, /* src */ 0, /* allocate */ 1, /* used */ - mrf+1, /* msg len */ + nr+1, /* msg len */ 0, /* response len */ - 1, /* eot */ - 1, /* writes complete */ - BRW_MAX_MRF-1, /* urb destination offset */ + eot, /* eot */ + eot, /* writes complete */ + i-1, /* urb destination offset */ BRW_URB_SWIZZLE_INTERLEAVE); } } -- cgit v1.2.3 From d03a1c2216635a1475172e6603a243348675fd6f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 14 Nov 2009 16:06:57 -0800 Subject: i965g: restore check on line smooth state --- src/gallium/drivers/i965/brw_wm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 8589aa22a8..3c5a2dab7a 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -225,7 +225,8 @@ static void brw_wm_populate_key( struct brw_context *brw, line_aa = AA_NEVER; break; case PIPE_PRIM_LINES: - line_aa = AA_ALWAYS; + line_aa = (brw->curr.rast->templ.line_smooth ? + AA_ALWAYS : AA_NEVER); break; default: line_aa = brw->curr.rast->unfilled_aa_line; -- cgit v1.2.3 From d299ee771b577a8ce839861d1af336fc316e0a1d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 15 Nov 2009 11:47:57 -0800 Subject: i965g rename offset_edge to offset_edgeflag --- src/gallium/drivers/i965/brw_clip.h | 2 +- src/gallium/drivers/i965/brw_clip_unfilled.c | 10 +++++----- src/gallium/drivers/i965/brw_clip_util.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index 772c34be88..9bec9643d7 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -132,7 +132,7 @@ struct brw_clip_compile { GLuint offset_bfc0; GLuint offset_bfc1; - GLuint offset_edge; + GLuint offset_edgeflag; }; #define ATTR_SIZE (4*4) diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c index 1cb86dd25b..0fab3a5f1a 100644 --- a/src/gallium/drivers/i965/brw_clip_unfilled.c +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -214,12 +214,12 @@ static void merge_edgeflags( struct brw_clip_compile *c ) { brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); - brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset_edge), brw_imm_f(0)); + brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset_edgeflag), brw_imm_f(0)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); - brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset_edge), brw_imm_f(0)); + brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset_edgeflag), brw_imm_f(0)); brw_set_predicate_control(p, BRW_PREDICATE_NONE); } brw_ENDIF(p, is_poly); @@ -290,7 +290,7 @@ static void emit_lines(struct brw_clip_compile *c, /* draw edge if edgeflag != 0 */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, - deref_1f(v0, c->offset_edge), + deref_1f(v0, c->offset_edgeflag), brw_imm_f(0)); draw_edge = brw_IF(p, BRW_EXECUTE_1); { @@ -329,7 +329,7 @@ static void emit_points(struct brw_clip_compile *c, */ brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, - deref_1f(v0, c->offset_edge), + deref_1f(v0, c->offset_edgeflag), brw_imm_f(0)); draw_point = brw_IF(p, BRW_EXECUTE_1); { @@ -446,7 +446,7 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c ) brw_clip_tri_init_vertices(c); brw_clip_init_ff_sync(c); - assert(c->offset_edge); + assert(c->offset_edgeflag); if (c->key.fill_ccw == CLIP_CULL && c->key.fill_cw == CLIP_CULL) { diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index f8f98c8037..018511e699 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -146,7 +146,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, if (c->chipset.is_igdng) delta = i * 16 + 32 * 3; - if (delta == c->offset_edge) { + if (delta == c->offset_edgeflag) { if (force_edgeflag) brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); else -- cgit v1.2.3 From 1877e6cd2d76143ef8a9c516122afe614ae3b4a4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 17 Nov 2009 14:46:23 -0800 Subject: i965g: handle special vs outputs specially Where vs output semantic tags indicate an output is signficant for fixed function processing (such as clipping, unfilled modes, etc), retain information about that output so that we can get to it easily later on. Fix up the unfilled processing, but hard-wire edgeflag to one for now. With this change, trivial/tri-unfilled works. --- src/gallium/drivers/i965/brw_clip.c | 45 ++++++++++++++++++++-------- src/gallium/drivers/i965/brw_clip.h | 15 +++++++--- src/gallium/drivers/i965/brw_clip_line.c | 5 ++-- src/gallium/drivers/i965/brw_clip_tri.c | 12 ++++---- src/gallium/drivers/i965/brw_clip_unfilled.c | 9 +++--- src/gallium/drivers/i965/brw_clip_util.c | 2 +- src/gallium/drivers/i965/brw_context.h | 19 +++++++++--- src/gallium/drivers/i965/brw_pipe_shader.c | 38 ++++++++++++++++++++--- src/gallium/drivers/i965/brw_vs.c | 13 +++++++- src/gallium/drivers/i965/brw_vs_emit.c | 42 ++++++++++++++++---------- src/gallium/drivers/i965/brw_wm_pass2.c | 2 +- 11 files changed, 147 insertions(+), 55 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 35e1d2fdbd..4ec7b823e8 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -58,7 +58,6 @@ compile_clip_prog( struct brw_context *brw, const GLuint *program; GLuint program_size; GLuint delta; - GLuint i; memset(&c, 0, sizeof(c)); @@ -82,16 +81,26 @@ compile_clip_prog( struct brw_context *brw, else delta = REG_SIZE; - /* XXX: c.offset is now pretty redundant: - */ - for (i = 0; i < c.key.nr_attrs; i++) { - c.offset[i] = delta; - delta += ATTR_SIZE; - } - /* XXX: c.nr_attrs is very redundant: */ c.nr_attrs = c.key.nr_attrs; + + c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE; + + if (c.key.output_color0) + c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE; + + if (c.key.output_color1) + c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE; + + if (c.key.output_bfc0) + c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE; + + if (c.key.output_bfc1) + c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE; + + if (c.key.output_edgeflag) + c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE; if (BRW_IS_IGDNG(brw)) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ @@ -158,21 +167,33 @@ compile_clip_prog( struct brw_context *brw, static enum pipe_error upload_clip_prog(struct brw_context *brw) { - enum pipe_error ret; + const struct brw_vertex_shader *vs = brw->curr.vertex_shader; struct brw_clip_prog_key key; + enum pipe_error ret; /* Populate the key, starting from the almost-complete version from * the rast state. */ /* PIPE_NEW_RAST */ - memcpy(&key, &brw->curr.rast->clip_key, sizeof key); - + key = brw->curr.rast->clip_key; + /* BRW_NEW_REDUCED_PRIMITIVE */ key.primitive = brw->reduced_primitive; + /* XXX: if edgeflag is moved to a proper TGSI vs output, can remove + * dependency on CACHE_NEW_VS_PROG + */ + /* CACHE_NEW_VS_PROG */ + key.nr_attrs = brw->vs.prog_data->nr_outputs; + key.output_edgeflag = brw->vs.prog_data->output_edgeflag; + /* PIPE_NEW_VS */ - key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_OUTPUT] + 1; + key.output_hpos = vs->output_hpos; + key.output_color0 = vs->output_color0; + key.output_color1 = vs->output_color1; + key.output_bfc0 = vs->output_bfc0; + key.output_bfc1 = vs->output_bfc1; /* PIPE_NEW_CLIP */ key.nr_userclip = brw->curr.ucp.nr; diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index 9bec9643d7..8729efa47b 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -42,7 +42,7 @@ * up polygon offset and flatshading at this point: */ struct brw_clip_prog_key { - GLuint nr_attrs:5; + GLuint nr_attrs:6; GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; @@ -54,7 +54,14 @@ struct brw_clip_prog_key { GLuint copy_bfc_cw:1; GLuint copy_bfc_ccw:1; GLuint clip_mode:3; - GLuint pad1:7; + GLuint output_hpos:6; /* not always zero? */ + + GLuint output_color0:6; + GLuint output_color1:6; + GLuint output_bfc0:6; + GLuint output_bfc1:6; + GLuint output_edgeflag:6; + GLuint pad1:2; GLfloat offset_factor; GLfloat offset_units; @@ -123,7 +130,6 @@ struct brw_clip_compile { GLuint last_mrf; GLuint header_position_offset; - GLuint offset[PIPE_MAX_SHADER_OUTPUTS]; GLboolean need_ff_sync; GLuint nr_color_attrs; @@ -131,7 +137,8 @@ struct brw_clip_compile { GLuint offset_color1; GLuint offset_bfc0; GLuint offset_bfc1; - + + GLuint offset_hpos; GLuint offset_edgeflag; }; diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c index a4790bda95..54282d975e 100644 --- a/src/gallium/drivers/i965/brw_clip_line.c +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -132,7 +132,6 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) struct brw_instruction *is_neg2 = NULL; struct brw_instruction *not_culled; struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); - const int hpos = 0; /* XXX: position not always first element */ brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); @@ -173,12 +172,12 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* dp = DP4(vtx->position, plane) */ - brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[hpos]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset_hpos), c->reg.plane_equation); /* if (IS_NEGATIVE(dp1)) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[hpos]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset_hpos), c->reg.plane_equation); is_negative = brw_IF(p, BRW_EXECUTE_1); { /* diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c index 5486f4fa89..fa00f6044f 100644 --- a/src/gallium/drivers/i965/brw_clip_tri.c +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -249,13 +249,13 @@ void brw_clip_tri( struct brw_clip_compile *c ) /* IS_NEGATIVE(prev) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset_hpos), c->reg.plane_equation); prev_test = brw_IF(p, BRW_EXECUTE_1); { /* IS_POSITIVE(next) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); - brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation); next_test = brw_IF(p, BRW_EXECUTE_1); { @@ -297,7 +297,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) /* IS_NEGATIVE(next) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation); next_test = brw_IF(p, BRW_EXECUTE_1); { /* Going out of bounds. Avoid division by zero as we @@ -462,9 +462,9 @@ static void brw_clip_test( struct brw_clip_compile *c ) brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); - brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); - brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); - brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v0, deref_4f(vt0, c->offset_hpos)); + brw_MOV(p, v1, deref_4f(vt1, c->offset_hpos)); + brw_MOV(p, v2, deref_4f(vt2, c->offset_hpos)); brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); /* test nearz, xmin, ymin plane */ diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c index 0fab3a5f1a..aec835b8ce 100644 --- a/src/gallium/drivers/i965/brw_clip_unfilled.c +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -45,9 +45,9 @@ static void compute_tri_direction( struct brw_clip_compile *c ) struct brw_compile *p = &c->func; struct brw_reg e = c->reg.tmp0; struct brw_reg f = c->reg.tmp1; - struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); - struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); - struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset_hpos); + struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset_hpos); + struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset_hpos); struct brw_reg v0n = get_tmp(c); @@ -123,7 +123,8 @@ static void copy_bfc( struct brw_clip_compile *c ) /* Do we have any colors to copy? */ - if (c->nr_color_attrs == 0) + if ((c->offset_color0 == 0 || c->offset_bfc0 == 0) && + (c->offset_color1 == 0 || c->offset_bfc1 == 0)) return; /* In some wierd degnerate cases we can end up testing the diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 018511e699..872042c9a9 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -106,7 +106,7 @@ static void brw_clip_project_vertex( struct brw_clip_compile *c, /* Fixup position. Extract from the original vertex and re-project * to screen space: */ - brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, tmp, deref_4f(vert_addr, c->offset_hpos)); brw_clip_project_position(c, tmp); brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 31f3cf3685..31e04b6e14 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -152,13 +152,23 @@ struct brw_rasterizer_state; struct brw_vertex_shader { const struct tgsi_token *tokens; + struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ + struct tgsi_shader_info info; - unsigned has_flow_control:1; + GLuint has_flow_control:1; + GLuint use_const_buffer:1; + + /* Offsets of special vertex shader outputs required for clipping. + */ + GLuint output_hpos:6; /* not always zero? */ + GLuint output_color0:6; + GLuint output_color1:6; + GLuint output_bfc0:6; + GLuint output_bfc1:6; + GLuint output_edgeflag:6; unsigned id; - struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ - GLboolean use_const_buffer; }; struct brw_fs_signature { @@ -317,7 +327,8 @@ struct brw_vs_prog_data { GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */ - GLboolean copy_edgeflag; + GLuint output_edgeflag; + GLboolean writes_psiz; /* Used for calculating urb partitions: diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 7febf9e0c2..02bc8fa130 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -124,21 +124,51 @@ static void *brw_create_vs_state( struct pipe_context *pipe, const struct pipe_shader_state *shader ) { struct brw_context *brw = brw_context(pipe); + struct brw_vertex_shader *vs; + unsigned i; - struct brw_vertex_shader *vs = CALLOC_STRUCT(brw_vertex_shader); + vs = CALLOC_STRUCT(brw_vertex_shader); if (vs == NULL) return NULL; /* Duplicate tokens, scan shader */ - vs->id = brw->program_id++; - vs->has_flow_control = has_flow_control(&vs->info); - vs->tokens = tgsi_dup_tokens(shader->tokens); if (vs->tokens == NULL) goto fail; tgsi_scan_shader(vs->tokens, &vs->info); + + vs->id = brw->program_id++; + vs->has_flow_control = has_flow_control(&vs->info); + + for (i = 0; i < vs->info.num_outputs; i++) { + int index = vs->info.output_semantic_index[i]; + switch (vs->info.output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + vs->output_hpos = i; + break; + case TGSI_SEMANTIC_COLOR: + if (index == 0) + vs->output_color0 = i; + else + vs->output_color1 = i; + break; + case TGSI_SEMANTIC_BCOLOR: + if (index == 0) + vs->output_bfc0 = i; + else + vs->output_bfc1 = i; + break; +#if 0 + case TGSI_SEMANTIC_EDGEFLAG: + vs->output_edgeflag = i; + break; +#endif + } + } + + /* Done: */ diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index 05a62ed974..2668392919 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -57,7 +57,18 @@ static enum pipe_error do_vs_prog( struct brw_context *brw, c.prog_data.nr_outputs = vp->info.num_outputs; c.prog_data.nr_inputs = vp->info.num_inputs; - c.prog_data.copy_edgeflag = c.key.copy_edgeflag; + + /* XXX: we want edgeflag handling to be integrated to the vertex + * shader, but are currently faking the edgeflag output: + */ + if (c.key.copy_edgeflag) { + c.prog_data.output_edgeflag = c.prog_data.nr_outputs; + c.prog_data.nr_outputs++; + } + else { + c.prog_data.output_edgeflag = ~0; + } + if (1) tgsi_dump(c.vp->tokens, 0); diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 933c9c4d63..bcaeaca62d 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -70,11 +70,17 @@ static boolean is_position_output( struct brw_vs_compile *c, unsigned vs_output ) { struct brw_vertex_shader *vs = c->vp; - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; - return (semantic == TGSI_SEMANTIC_POSITION && - index == 0); + if (vs_output == c->prog_data.output_edgeflag) { + return FALSE; + } + else { + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + + return (semantic == TGSI_SEMANTIC_POSITION && + index == 0); + } } @@ -83,15 +89,22 @@ static boolean find_output_slot( struct brw_vs_compile *c, unsigned *fs_input_slot ) { struct brw_vertex_shader *vs = c->vp; - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; - unsigned i; - for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { - if (c->key.fs_signature.input[i].semantic == semantic && + if (vs_output == c->prog_data.output_edgeflag) { + *fs_input_slot = c->key.fs_signature.nr_inputs; + return TRUE; + } + else { + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + unsigned i; + + for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { + if (c->key.fs_signature.input[i].semantic == semantic && c->key.fs_signature.input[i].semantic_index == index) { - *fs_input_slot = i; - return TRUE; + *fs_input_slot = i; + return TRUE; + } } } @@ -219,7 +232,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* XXX: need to access vertex output semantics here: */ - for (i = 0; i < c->prog_data.nr_outputs; i++) { + for (i = 0; i < c->nr_outputs; i++) { unsigned slot; /* XXX: Put output position in slot zero always. Clipper, etc, @@ -1116,10 +1129,9 @@ static void emit_vertex_write( struct brw_vs_compile *c) GLuint len_vertext_header = 2; if (c->key.copy_edgeflag) { - assert(0); brw_MOV(p, - get_reg(c, TGSI_FILE_OUTPUT, 0), - get_reg(c, TGSI_FILE_INPUT, 0)); + get_reg(c, TGSI_FILE_OUTPUT, c->prog_data.output_edgeflag), + brw_imm_f(1)); } /* Build ndc coords */ diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c index 2a879863ab..56f39d036b 100644 --- a/src/gallium/drivers/i965/brw_wm_pass2.c +++ b/src/gallium/drivers/i965/brw_wm_pass2.c @@ -93,7 +93,7 @@ static void init_registers( struct brw_wm_compile *c ) assert(c->key.vp_nr_outputs >= 1); c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = c->key.vp_nr_outputs * 2; + c->prog_data.urb_read_length = (c->key.nr_inputs + 1) * 2; c->prog_data.curb_read_length = c->nr_creg * 2; /* Note this allocation: -- cgit v1.2.3 From 6b1ede0110f855218119a7a3b26fe3b26aee1bbd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 18 Nov 2009 14:40:43 -0800 Subject: i965g: remove more references to nr_vp_outputs in wm compilation We're really more concerned about how many inputs the fragment shader is expecting. --- src/gallium/drivers/i965/brw_wm_pass2.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c index 56f39d036b..19248b4519 100644 --- a/src/gallium/drivers/i965/brw_wm_pass2.c +++ b/src/gallium/drivers/i965/brw_wm_pass2.c @@ -87,11 +87,9 @@ static void init_registers( struct brw_wm_compile *c ) /* XXX: currently just hope the VS outputs line up with FS inputs: */ - for (j = 0; j < c->key.vp_nr_outputs; j++) + for (j = 0; j < c->key.nr_inputs; j++) prealloc_reg(c, &c->payload.input_interp[j], reg++); - assert(c->key.vp_nr_outputs >= 1); - c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; c->prog_data.urb_read_length = (c->key.nr_inputs + 1) * 2; c->prog_data.curb_read_length = c->nr_creg * 2; -- cgit v1.2.3 From 25cbf9b4da7be45218f645102d6be5144be4291f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 14:47:24 -0800 Subject: i965g: fix initialization of texture width/height/depth arrays Will remove these arrays in another branch - they're completely redundant. --- src/gallium/drivers/i965/brw_screen_tex_layout.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c index bcdf8d8074..f793fa8859 100644 --- a/src/gallium/drivers/i965/brw_screen_tex_layout.c +++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c @@ -101,16 +101,17 @@ brw_tex_set_level_info(struct brw_texture *tex, GLuint x, GLuint y, GLuint w, GLuint h, GLuint d) { - assert(tex->base.width[level] == w); - assert(tex->base.height[level] == h); - assert(tex->base.depth[level] == d); - assert(tex->image_offset[level] == NULL); - assert(nr_images >= 1); if (BRW_DEBUG & DEBUG_TEXTURE) debug_printf("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, level, w, h, d, x, y, tex->level_offset[level]); + assert(tex->image_offset[level] == NULL); + assert(nr_images >= 1); + + tex->base.width[level] = w; + tex->base.height[level] = h; + tex->base.depth[level] = d; tex->level_offset[level] = (x + y * tex->pitch) * tex->cpp; tex->nr_images[level] = nr_images; -- cgit v1.2.3 From 9e4f3eaf6630e0d3a9b05da90e4879a94516b974 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 15:04:00 -0800 Subject: i965g: make the load-before-use vs immediate path work --- src/gallium/drivers/i965/brw_vs_emit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index bcaeaca62d..52d4731dfd 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -202,7 +202,8 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) struct brw_reg r; int j; - r = brw_vec8_grf(reg, 0); + c->regs[TGSI_FILE_IMMEDIATE][i] = + r = brw_vec8_grf(reg, 0); for (j = 0; j < 4; j++) { brw_MOV(&c->func, @@ -1628,7 +1629,7 @@ void brw_vs_emit(struct brw_vs_compile *c) case TGSI_TOKEN_TYPE_IMMEDIATE: { static const float id[4] = {0,0,0,1}; - const float *imm = &parse.FullToken.FullImmediate.u[i].Float; + const float *imm = &parse.FullToken.FullImmediate.u[0].Float; unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; for (i = 0; i < size; i++) -- cgit v1.2.3 From 1b9eda4c74c83cc0ffa98f2885660c80cdff2a65 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 16:09:39 -0800 Subject: i965g: get fragment constants sort-of working --- src/gallium/drivers/i965/brw_curbe.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 79ebac9d15..5fa1723311 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -51,10 +51,10 @@ static int calculate_curbe_offsets( struct brw_context *brw ) { /* CACHE_NEW_WM_PROG */ - const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; + const GLuint nr_fp_regs = brw->wm.prog_data->curb_read_length; /* BRW_NEW_VERTEX_PROGRAM */ - const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16; + const GLuint nr_vp_regs = brw->vs.prog_data->curb_read_length; GLuint nr_clip_regs = 0; GLuint total_regs; @@ -162,6 +162,7 @@ static GLfloat fixed_plane[6][4] = { */ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) { + struct pipe_screen *screen = brw->base.screen; const GLuint sz = brw->curbe.total_size; const GLuint bufsz = sz * 16 * sizeof(GLfloat); enum pipe_error ret; @@ -182,14 +183,15 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) /* fragment shader constants */ if (brw->curbe.wm_size) { GLuint offset = brw->curbe.wm_start * 16; + unsigned nr = brw->wm.prog_data->nr_params; - /* map fs constant buffer */ + const GLfloat *value = screen->buffer_map( screen, + brw->curr.fragment_constants, + PIPE_BUFFER_USAGE_CPU_READ); - /* copy float constants */ - for (i = 0; i < brw->wm.prog_data->nr_params; i++) - buf[offset + i] = *brw->wm.prog_data->param[i]; + memcpy(&buf[offset], value, nr * 4 * sizeof(float)); - /* unmap fs constant buffer */ + screen->buffer_unmap( screen, brw->curr.fragment_constants ); } @@ -225,7 +227,6 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT] + 1; - struct pipe_screen *screen = brw->base.screen; /* XXX: note that constant buffers are currently *already* in * buffer objects. If we want to keep on putting them into the -- cgit v1.2.3 From 8db59a1fa329b28ba375d54d6d6d5df06f411a6e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 16:14:44 -0800 Subject: i965g: setup nr_attrs should track nr fragment shader inputs This would be equivalent to taking the nr outputs of the vp varient --- src/gallium/drivers/i965/brw_sf.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index 52fb2cd42d..e75f447a03 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -124,8 +124,12 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw) /* Populate the key, noting state dependencies: */ - /* CACHE_NEW_VS_PROG */ - key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_OUTPUT] + 1; + + /* XXX: Add one to turn the max value into a count, then add + * another one to account for the position input. + */ + /* PIPE_NEW_FRAGMENT_SHADER */ + key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 2; /* XXX: this is probably where the mapping between vertex shader -- cgit v1.2.3 From c58e20fbbb87b8dbd0c58294d4ad3d297c3aa747 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 18:47:07 -0800 Subject: i965g: fix typo in previous commit --- src/gallium/drivers/i965/brw_sf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index e75f447a03..6f4502da97 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -129,7 +129,7 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw) * another one to account for the position input. */ /* PIPE_NEW_FRAGMENT_SHADER */ - key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 2; + key.nr_attrs = brw->curr.fragment_shader->info.file_max[TGSI_FILE_INPUT] + 2; /* XXX: this is probably where the mapping between vertex shader -- cgit v1.2.3 From 47cef2bb8f5979ae690e89943f83060999a29a55 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 18:55:18 -0800 Subject: i965g: add new state flag tracking fs signature changes --- src/gallium/drivers/i965/brw_context.h | 1 + src/gallium/drivers/i965/brw_pipe_shader.c | 13 ++++++++++++- src/gallium/drivers/i965/brw_sf.c | 9 ++++----- src/gallium/drivers/i965/brw_vs.c | 7 +++---- 4 files changed, 20 insertions(+), 10 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 31e04b6e14..65859be0ec 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -233,6 +233,7 @@ struct brw_sampler { #define PIPE_NEW_SCISSOR 0x100000 #define PIPE_NEW_BOUND_TEXTURES 0x200000 #define PIPE_NEW_NR_CBUFS 0x400000 +#define PIPE_NEW_FRAGMENT_SIGNATURE 0x800000 diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 02bc8fa130..c755fa6889 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -58,9 +58,20 @@ static GLboolean has_flow_control(const struct tgsi_shader_info *info) static void brw_bind_fs_state( struct pipe_context *pipe, void *prog ) { + struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog; struct brw_context *brw = brw_context(pipe); + + if (brw->curr.fragment_shader == fs) + return; + + if (brw->curr.fragment_shader == NULL || + fs == NULL || + memcmp(&brw->curr.fragment_shader->signature, &fs->signature, + brw_fs_signature_size(&fs->signature)) != 0) { + brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SIGNATURE; + } - brw->curr.fragment_shader = (struct brw_fragment_shader *)prog; + brw->curr.fragment_shader = fs; brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SHADER; } diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index 6f4502da97..aa2ab5098c 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -125,11 +125,10 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw) /* Populate the key, noting state dependencies: */ - /* XXX: Add one to turn the max value into a count, then add - * another one to account for the position input. + /* XXX: Add one to account for the position input. */ - /* PIPE_NEW_FRAGMENT_SHADER */ - key.nr_attrs = brw->curr.fragment_shader->info.file_max[TGSI_FILE_INPUT] + 2; + /* PIPE_NEW_FRAGMENT_SIGNATURE */ + key.nr_attrs = brw->curr.fragment_shader->signature.nr_inputs + 1; /* XXX: this is probably where the mapping between vertex shader @@ -194,7 +193,7 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw) const struct brw_tracked_state brw_sf_prog = { .dirty = { - .mesa = (PIPE_NEW_RAST | PIPE_NEW_VERTEX_SHADER), + .mesa = (PIPE_NEW_RAST | PIPE_NEW_FRAGMENT_SIGNATURE), .brw = (BRW_NEW_REDUCED_PRIMITIVE), .cache = 0 }, diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index 2668392919..25b51eb41e 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -101,7 +101,7 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw) { struct brw_vs_prog_key key; struct brw_vertex_shader *vp = brw->curr.vertex_shader; - struct brw_fragment_shader *fs = brw->curr.fragment_shader; + struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature; enum pipe_error ret; memset(&key, 0, sizeof(key)); @@ -111,8 +111,7 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw) key.copy_edgeflag = (brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL || brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL); - memcpy(&key.fs_signature, &fs->signature, - brw_fs_signature_size(&fs->signature)); + memcpy(&key.fs_signature, sig, brw_fs_signature_size(sig)); /* Make an early check for the key. @@ -138,7 +137,7 @@ const struct brw_tracked_state brw_vs_prog = { .dirty = { .mesa = (PIPE_NEW_CLIP | PIPE_NEW_RAST | - PIPE_NEW_FRAGMENT_SHADER), + PIPE_NEW_FRAGMENT_SIGNATURE), .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, -- cgit v1.2.3 From 34a01929d54266e8e5fec47e94859405bce588fa Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 19:15:21 -0800 Subject: i965g: special case setup when fs has no inputs --- src/gallium/drivers/i965/brw_sf.c | 60 ++++++++++++++++++++-------------- src/gallium/drivers/i965/brw_sf.h | 1 + src/gallium/drivers/i965/brw_sf_emit.c | 19 +++++++++++ 3 files changed, 55 insertions(+), 25 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index aa2ab5098c..0b94dc40c3 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -64,32 +64,42 @@ static enum pipe_error compile_sf_prog( struct brw_context *brw, c.prog_data.urb_read_length = c.nr_attr_regs; c.prog_data.urb_entry_size = c.nr_setup_regs * 2; - - /* Which primitive? Or all three? + /* Special case when there are no attributes to setup. + * + * XXX: should be able to set nr_setup_attrs to nr_attrs-1 -- but + * breaks vp-tris.c */ - switch (key->primitive) { - case SF_TRIANGLES: - c.nr_verts = 3; - brw_emit_tri_setup( &c, GL_TRUE ); - break; - case SF_LINES: - c.nr_verts = 2; - brw_emit_line_setup( &c, GL_TRUE ); - break; - case SF_POINTS: - c.nr_verts = 1; - if (key->do_point_sprite) - brw_emit_point_sprite_setup( &c, GL_TRUE ); - else - brw_emit_point_setup( &c, GL_TRUE ); - break; - case SF_UNFILLED_TRIS: - c.nr_verts = 3; - brw_emit_anyprim_setup( &c ); - break; - default: - assert(0); - return PIPE_ERROR_BAD_INPUT; + if (c.nr_attrs - 1 == 0) { + c.nr_verts = 0; + brw_emit_null_setup( &c ); + } + else { + /* Which primitive? Or all three? + */ + switch (key->primitive) { + case SF_TRIANGLES: + c.nr_verts = 3; + brw_emit_tri_setup( &c, GL_TRUE ); + break; + case SF_LINES: + c.nr_verts = 2; + brw_emit_line_setup( &c, GL_TRUE ); + break; + case SF_POINTS: + c.nr_verts = 1; + if (key->do_point_sprite) + brw_emit_point_sprite_setup( &c, GL_TRUE ); + else + brw_emit_point_setup( &c, GL_TRUE ); + break; + case SF_UNFILLED_TRIS: + c.nr_verts = 3; + brw_emit_anyprim_setup( &c ); + break; + default: + assert(0); + return PIPE_ERROR_BAD_INPUT; + } } /* get the program diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h index 0b7003dc5e..a895c7d2f6 100644 --- a/src/gallium/drivers/i965/brw_sf.h +++ b/src/gallium/drivers/i965/brw_sf.h @@ -112,6 +112,7 @@ struct brw_sf_compile { }; +void brw_emit_null_setup( struct brw_sf_compile *c ); void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate ); void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate ); void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate ); diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index db52c9553e..2983e8a9dd 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -352,6 +352,25 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, } +void brw_emit_null_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + + /* m0 is implicitly copied from r0 in the send instruction: + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + 1, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, /* offset */ + BRW_URB_SWIZZLE_TRANSPOSE); +} void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) { -- cgit v1.2.3 From 4fb77ef840a42c3c8e2a43aa772a73614528fc4d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 19:31:32 -0800 Subject: i965g: include interpolation info in fs signature --- src/gallium/drivers/i965/brw_context.h | 5 +++-- src/gallium/drivers/i965/brw_pipe_shader.c | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 65859be0ec..64279c4676 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -174,8 +174,9 @@ struct brw_vertex_shader { struct brw_fs_signature { GLuint nr_inputs; struct { - GLuint semantic:5; - GLuint semantic_index:27; + GLuint interp:3; /* TGSI_INTERPOLATE_x */ + GLuint semantic:5; /* TGSI_SEMANTIC_x */ + GLuint semantic_index:24; } input[PIPE_MAX_SHADER_INPUTS]; }; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index c755fa6889..3222ee7777 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -109,6 +109,7 @@ static void *brw_create_fs_state( struct pipe_context *pipe, fs->signature.nr_inputs = fs->info.num_inputs; for (i = 0; i < fs->info.num_inputs; i++) { + fs->signature.input[i].interp = fs->info.input_interpolate[i]; fs->signature.input[i].semantic = fs->info.input_semantic_name[i]; fs->signature.input[i].semantic_index = fs->info.input_semantic_index[i]; } -- cgit v1.2.3 From d2f4c80c8baf48bcfd3e33a275df2fa6fcb6d353 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 19:41:25 -0800 Subject: i965g: get linear vs perspective interpolation working again --- src/gallium/drivers/i965/brw_sf.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index 0b94dc40c3..e1986a9dbb 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -127,8 +127,10 @@ static enum pipe_error compile_sf_prog( struct brw_context *brw, */ static enum pipe_error upload_sf_prog(struct brw_context *brw) { - enum pipe_error ret; + const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature; struct brw_sf_prog_key key; + enum pipe_error ret; + unsigned i; memset(&key, 0, sizeof(key)); @@ -138,24 +140,26 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw) /* XXX: Add one to account for the position input. */ /* PIPE_NEW_FRAGMENT_SIGNATURE */ - key.nr_attrs = brw->curr.fragment_shader->signature.nr_inputs + 1; + key.nr_attrs = sig->nr_inputs + 1; - /* XXX: this is probably where the mapping between vertex shader - * outputs and fragment shader inputs should be handled. Assume - * for now 1:1 correspondance. - * - * XXX: scan frag shader inputs to work out linear vs. perspective - * interpolation below. - * - * XXX: as long as we're hard-wiring, is eg. position required to - * be linear? + /* XXX: why is position required to be linear? why do we care + * about it at all? */ - //key.linear_attrs = 0; - //key.persp_attrs = (1 << key.nr_attrs) - 1; + key.linear_attrs = 1; /* position -- but why? */ - key.linear_attrs = (1 << key.nr_attrs) - 1; - key.persp_attrs = 0; + for (i = 0; i < sig->nr_inputs; i++) { + switch (sig->input[i].interp) { + case TGSI_INTERPOLATE_CONSTANT: + break; + case TGSI_INTERPOLATE_LINEAR: + key.linear_attrs |= 1 << (i+1); + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + key.persp_attrs |= 1 << (i+1); + break; + } + } /* BRW_NEW_REDUCED_PRIMITIVE */ switch (brw->reduced_primitive) { -- cgit v1.2.3 From 63b0af07755201e5ad630bf7f67a7997263734d6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 19:51:04 -0800 Subject: i965g: first pass at vs immediates in curbe --- src/gallium/drivers/i965/brw_context.h | 6 ++ src/gallium/drivers/i965/brw_curbe.c | 40 ++++++---- src/gallium/drivers/i965/brw_pipe_shader.c | 43 +++++++++++ src/gallium/drivers/i965/brw_vs_emit.c | 120 ++++++++++++----------------- 4 files changed, 124 insertions(+), 85 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 64279c4676..096c8cf12b 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -149,12 +149,17 @@ struct brw_blend_state { struct brw_rasterizer_state; +struct brw_immediate_data { + unsigned nr; + float (*data)[4]; +}; struct brw_vertex_shader { const struct tgsi_token *tokens; struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ struct tgsi_shader_info info; + struct brw_immediate_data immediates; GLuint has_flow_control:1; GLuint use_const_buffer:1; @@ -189,6 +194,7 @@ struct brw_fragment_shader { struct tgsi_shader_info info; struct brw_fs_signature signature; + struct brw_immediate_data immediates; unsigned iz_lookup; //unsigned wm_lookup; diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 5fa1723311..3e821d5afe 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -226,21 +226,34 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; - GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT] + 1; + struct brw_vertex_shader *vs = brw->curr.vertex_shader; + GLuint nr_immediate, nr_const; - /* XXX: note that constant buffers are currently *already* in - * buffer objects. If we want to keep on putting them into the - * curbe, makes sense to treat constbuf's specially with malloc. - */ - const GLfloat *value = screen->buffer_map( screen, - brw->curr.vertex_constants, - PIPE_BUFFER_USAGE_CPU_READ); + nr_immediate = vs->immediates.nr; + if (nr_immediate) { + memcpy(&buf[offset], + vs->immediates.data, + nr_immediate * 4 * sizeof(float)); - /* XXX: what if user's constant buffer is too small? - */ - memcpy(&buf[offset], value, nr * 4 * sizeof(float)); + offset += nr_immediate * 4; + } - screen->buffer_unmap( screen, brw->curr.vertex_constants ); + nr_const = vs->info.file_max[TGSI_FILE_CONSTANT] + 1; + if (nr_const) { + /* XXX: note that constant buffers are currently *already* in + * buffer objects. If we want to keep on putting them into the + * curbe, makes sense to treat constbuf's specially with malloc. + */ + const GLfloat *value = screen->buffer_map( screen, + brw->curr.vertex_constants, + PIPE_BUFFER_USAGE_CPU_READ); + + /* XXX: what if user's constant buffer is too small? + */ + memcpy(&buf[offset], value, nr_const * 4 * sizeof(float)); + + screen->buffer_unmap( screen, brw->curr.vertex_constants ); + } } if (BRW_DEBUG & DEBUG_CURBE) { @@ -263,8 +276,7 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) } else { /* constants have changed */ - if (brw->curbe.last_buf) - FREE(brw->curbe.last_buf); + FREE(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 3222ee7777..31a715ab65 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -55,6 +55,47 @@ static GLboolean has_flow_control(const struct tgsi_shader_info *info) } +static void scan_immediates(const struct tgsi_token *tokens, + const struct tgsi_shader_info *info, + struct brw_immediate_data *imm) +{ + struct tgsi_parse_context parse; + boolean done = FALSE; + + imm->nr = 0; + imm->data = MALLOC(info->immediate_count * 4 * sizeof(float)); + + tgsi_parse_init( &parse, tokens ); + while (!tgsi_parse_end_of_tokens( &parse ) && !done) { + tgsi_parse_token( &parse ); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: { + static const float id[4] = {0,0,0,1}; + const float *value = &parse.FullToken.FullImmediate.u[0].Float; + unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + unsigned i; + + for (i = 0; i < size; i++) + imm->data[imm->nr][i] = value[i]; + + for (; i < 4; i++) + imm->data[imm->nr][i] = id[i]; + + imm->nr++; + break; + } + + case TGSI_TOKEN_TYPE_INSTRUCTION: + done = 1; + break; + } + } +} + static void brw_bind_fs_state( struct pipe_context *pipe, void *prog ) { @@ -106,6 +147,7 @@ static void *brw_create_fs_state( struct pipe_context *pipe, goto fail; tgsi_scan_shader(fs->tokens, &fs->info); + scan_immediates(fs->tokens, &fs->info, &fs->immediates); fs->signature.nr_inputs = fs->info.num_inputs; for (i = 0; i < fs->info.num_inputs; i++) { @@ -150,6 +192,7 @@ static void *brw_create_vs_state( struct pipe_context *pipe, goto fail; tgsi_scan_shader(vs->tokens, &vs->info); + scan_immediates(vs->tokens, &vs->info, &vs->immediates); vs->id = brw->program_id++; vs->has_flow_control = has_flow_control(&vs->info); diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 52d4731dfd..00f0af2d07 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -42,6 +42,15 @@ #include "brw_vs.h" #include "brw_debug.h" +/* Choose one of the 4 vec4's which can be packed into each 16-wide reg. + */ +static INLINE struct brw_reg brw_vec4_grf_repeat( GLuint reg, GLuint slot ) +{ + int nr = reg + slot/2; + int subnr = (slot%2) * 4; + + return stride(brw_vec4_grf(nr, subnr), 0, 4, 1); +} static struct brw_reg get_tmp( struct brw_vs_compile *c ) @@ -119,7 +128,7 @@ static boolean find_output_slot( struct brw_vs_compile *c, */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { - GLuint i, reg = 0, mrf; + GLuint i, reg = 0, subreg = 0, mrf; int attributes_in_vue; /* Determine whether to use a real constant buffer or use a block @@ -150,33 +159,57 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* User clip planes from curbe: */ if (c->key.nr_userclip) { - for (i = 0; i < c->key.nr_userclip; i++) { - c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); + /* Skip over fixed planes: Or never read them into vs unit? + */ + subreg += 6; + + for (i = 0; i < c->key.nr_userclip; i++, subreg++) { + c->userplane[i] = + stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1); } /* Deal with curbe alignment: */ - reg += ((6 + c->key.nr_userclip + 3) / 4) * 2; + subreg = align(subreg, 2); + /*reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;*/ } - /* Vertex program parameters from curbe: + + /* Immediates: always in the curbe. + * + * XXX: Can try to encode some immediates as brw immediates + * XXX: Make sure ureg sets minimal immediate size and respect it + * here. */ - if (c->vp->use_const_buffer) { - /* get constants from a real constant buffer */ - c->prog_data.curb_read_length = 0; - c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ + for (i = 0; i < c->vp->info.immediate_count; i++, subreg++) { + c->regs[TGSI_FILE_IMMEDIATE][i] = + stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1); } - else { - /* use a section of the GRF for constants */ + c->prog_data.nr_params = c->vp->info.immediate_count * 4; + + + /* Vertex constant buffer. + * + * Constants from the buffer can be either cached in the curbe or + * loaded as needed from the actual constant buffer. + */ + if (!c->vp->use_const_buffer) { GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1; - for (i = 0; i < nr_params; i++) { - c->regs[TGSI_FILE_CONSTANT][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + + for (i = 0; i < nr_params; i++, subreg++) { + c->regs[TGSI_FILE_CONSTANT][i] = + stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1); } - reg += (nr_params + 1) / 2; - c->prog_data.curb_read_length = reg - 1; - c->prog_data.nr_params = nr_params * 4; + + c->prog_data.nr_params += nr_params * 4; } + /* All regs allocated + */ + reg += (subreg + 1) / 2; + c->prog_data.curb_read_length = reg - 1; + + /* Allocate input regs: */ c->nr_inputs = c->vp->info.num_inputs; @@ -191,28 +224,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) if (c->nr_inputs == 0) reg++; - /* Allocate a GRF and load immediate values by hand with 4 MOVs!!! - * - * XXX: Try to encode float immediates as brw immediates - * XXX: Put immediates into the CURBE. - * XXX: Make sure ureg sets minimal immediate size and respect it - * here. - */ - for (i = 0; i < c->nr_immediates; i++) { - struct brw_reg r; - int j; - - c->regs[TGSI_FILE_IMMEDIATE][i] = - r = brw_vec8_grf(reg, 0); - - for (j = 0; j < 4; j++) { - brw_MOV(&c->func, - brw_writemask(r, (1<immediate[i][j])); - } - - reg++; - } /* Allocate outputs. The non-position outputs go straight into message regs. @@ -1605,8 +1616,6 @@ void brw_vs_emit(struct brw_vs_compile *c) struct brw_instruction *end_inst, *last_inst; struct tgsi_parse_context parse; struct tgsi_full_instruction *inst; - boolean done = FALSE; - int i; if (BRW_DEBUG & DEBUG_VS) tgsi_dump(c->vp->tokens, 0); @@ -1616,37 +1625,6 @@ void brw_vs_emit(struct brw_vs_compile *c) brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); - /* Inputs */ - tgsi_parse_init( &parse, tokens ); - while( !tgsi_parse_end_of_tokens( &parse ) ) { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - /* Nothing to do -- using info from tgsi_scan(). - */ - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: { - static const float id[4] = {0,0,0,1}; - const float *imm = &parse.FullToken.FullImmediate.u[0].Float; - unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; - - for (i = 0; i < size; i++) - c->immediate[c->nr_immediates][i] = imm[i]; - - for ( ; i < 4; i++) - c->immediate[c->nr_immediates][i] = id[i]; - - c->nr_immediates++; - break; - } - - case TGSI_TOKEN_TYPE_INSTRUCTION: - done = 1; - break; - } - } /* Static register allocation */ -- cgit v1.2.3 From 9507a6c206627b3ae76e2ae8398fff518e39941a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 20:02:42 -0800 Subject: i965g: fragment shader immediates working --- src/gallium/drivers/i965/brw_curbe.c | 30 ++++++++++++++++----- src/gallium/drivers/i965/brw_wm.h | 9 ------- src/gallium/drivers/i965/brw_wm_pass0.c | 48 ++++++++------------------------- 3 files changed, 34 insertions(+), 53 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 3e821d5afe..3f031577d5 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -182,16 +182,32 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) /* fragment shader constants */ if (brw->curbe.wm_size) { + const struct brw_fragment_shader *fs = brw->curr.fragment_shader; GLuint offset = brw->curbe.wm_start * 16; - unsigned nr = brw->wm.prog_data->nr_params; + GLuint nr_immediate, nr_const; + + nr_immediate = fs->immediates.nr; + if (nr_immediate) { + memcpy(&buf[offset], + fs->immediates.data, + nr_immediate * 4 * sizeof(float)); - const GLfloat *value = screen->buffer_map( screen, - brw->curr.fragment_constants, - PIPE_BUFFER_USAGE_CPU_READ); + offset += nr_immediate * 4; + } - memcpy(&buf[offset], value, nr * 4 * sizeof(float)); + nr_const = fs->info.file_max[TGSI_FILE_CONSTANT] + 1; +/* nr_const = brw->wm.prog_data->nr_params; */ + if (nr_const) { + const GLfloat *value = screen->buffer_map( screen, + brw->curr.fragment_constants, + PIPE_BUFFER_USAGE_CPU_READ); - screen->buffer_unmap( screen, brw->curr.fragment_constants ); + memcpy(&buf[offset], value, + nr_const * 4 * sizeof(float)); + + screen->buffer_unmap( screen, + brw->curr.fragment_constants ); + } } @@ -226,7 +242,7 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; - struct brw_vertex_shader *vs = brw->curr.vertex_shader; + const struct brw_vertex_shader *vs = brw->curr.vertex_shader; GLuint nr_immediate, nr_const; nr_immediate = vs->immediates.nr; diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index f85a8af878..b7d807dcb3 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -129,12 +129,6 @@ struct brw_wm_ref { GLuint insn:24; }; -struct brw_wm_imm_ref { - const struct brw_wm_ref *ref; - GLfloat imm1f; -}; - - struct brw_wm_instruction { struct brw_wm_value *dst[4]; struct brw_wm_ref *src[3][4]; @@ -272,9 +266,6 @@ struct brw_wm_compile { struct brw_wm_instruction instruction[BRW_WM_MAX_INSN]; GLuint nr_insns; - struct brw_wm_imm_ref imm_ref[BRW_WM_MAX_CONST]; - GLuint nr_imm_refs; - struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2]; GLuint grf_limit; diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c index 7bb341e2c2..0bacad2b0f 100644 --- a/src/gallium/drivers/i965/brw_wm_pass0.c +++ b/src/gallium/drivers/i965/brw_wm_pass0.c @@ -30,6 +30,7 @@ */ #include "util/u_memory.h" +#include "util/u_math.h" #include "brw_debug.h" #include "brw_wm.h" @@ -97,9 +98,10 @@ static void pass0_set_fpreg_ref( struct brw_wm_compile *c, } static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, - const GLfloat *param_ptr ) + unsigned idx, + unsigned component) { - GLuint i = c->prog_data.nr_params++; + GLuint i = idx * 4 + component; if (i >= BRW_WM_MAX_PARAM) { debug_printf("%s: out of params\n", __FUNCTION__); @@ -109,8 +111,7 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, else { struct brw_wm_ref *ref = get_ref(c); - c->prog_data.param[i] = param_ptr; - c->nr_creg = (i+16)/16; + c->nr_creg = MAX2(c->nr_creg, (i+16)/16); /* Push the offsets into hw_reg. These will be added to the * real register numbers once one is allocated in pass2. @@ -125,37 +126,6 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, } -/** Return a ref to an immediate value */ -static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c, - const GLfloat *imm1f ) -{ - GLuint i; - - /* Search for an existing const value matching the request: - */ - for (i = 0; i < c->nr_imm_refs; i++) { - if (c->imm_ref[i].imm1f == *imm1f) - return c->imm_ref[i].ref; - } - - /* Else try to add a new one: - */ - if (c->nr_imm_refs < Elements(c->imm_ref)) { - GLuint i = c->nr_imm_refs++; - - /* An immediate is a special type of parameter: - */ - c->imm_ref[i].imm1f = *imm1f; - c->imm_ref[i].ref = get_param_ref(c, imm1f); - - return c->imm_ref[i].ref; - } - else { - debug_printf("%s: out of imm_refs\n", __FUNCTION__); - c->prog_data.error = 1; - return NULL; - } -} /* Lookup our internal registers @@ -177,11 +147,15 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c, break; case TGSI_FILE_CONSTANT: - ref = get_param_ref(c, &c->env_param[idx][component]); + ref = get_param_ref(c, + c->fp->info.immediate_count + idx, + component); break; case TGSI_FILE_IMMEDIATE: - ref = get_imm_ref(c, &c->immediate[idx].v[component]); + ref = get_param_ref(c, + idx, + component); break; default: -- cgit v1.2.3 From 95d7aca4b9963820e7ead81830340dbeb563897b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 19 Nov 2009 20:40:41 -0800 Subject: i965g: fix typo converting wm src regs --- src/gallium/drivers/i965/brw_wm_fp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 0df84f8546..174486a101 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -1023,7 +1023,7 @@ static void emit_insn( struct brw_wm_compile *c, inst->Instruction.Saturate ); for (i = 0; i < inst->Instruction.NumSrcRegs; i++) - src[i] = translate_src( c, &inst->FullSrcRegisters[0] ); + src[i] = translate_src( c, &inst->FullSrcRegisters[i] ); switch (opcode) { case TGSI_OPCODE_ABS: -- cgit v1.2.3 From 8bf75f28de161173d1cdaad8c74bcac074e1211e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 21 Nov 2009 01:52:22 +0000 Subject: i965g: get basic texturing working again Revert to fixed-layout surface binding table -- it's probably the best way to do this. Pass sampler and texture numbers separately even though we're always keeping them the same at present. --- src/gallium/drivers/i965/brw_context.h | 13 +++-- src/gallium/drivers/i965/brw_pipe_fb.c | 4 +- src/gallium/drivers/i965/brw_pipe_sampler.c | 3 +- src/gallium/drivers/i965/brw_sf.c | 3 +- src/gallium/drivers/i965/brw_wm.c | 9 +++ src/gallium/drivers/i965/brw_wm.h | 4 +- src/gallium/drivers/i965/brw_wm_emit.c | 34 ++++++------ src/gallium/drivers/i965/brw_wm_fp.c | 39 ++++++++----- src/gallium/drivers/i965/brw_wm_surface_state.c | 74 ++++++++++++++++--------- 9 files changed, 117 insertions(+), 66 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 096c8cf12b..598e747fe0 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -209,9 +209,9 @@ struct brw_fragment_shader { struct brw_sampler { - float border_color[4]; struct brw_ss0 ss0; struct brw_ss1 ss1; + float border_color[4]; struct brw_ss3 ss3; }; @@ -355,20 +355,23 @@ struct brw_vs_ouput_sizes { /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 +/** Max number of render targets in a shader */ +#define BRW_MAX_DRAW_BUFFERS 4 + /** * Size of our surface binding table for the WM. * This contains pointers to the drawing surfaces and current texture * objects and shader constant buffers (+2). */ -#define BRW_WM_MAX_SURF (PIPE_MAX_COLOR_BUFS + BRW_MAX_TEX_UNIT + 1) +#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) /** * Helpers to convert drawing buffers, textures and constant buffers * to surface binding table indexes, for WM. */ -#define SURF_INDEX_DRAW(d) (d) -#define SURF_INDEX_FRAG_CONST_BUFFER (PIPE_MAX_COLOR_BUFS) -#define SURF_INDEX_TEXTURE(t) (PIPE_MAX_COLOR_BUFS + 1 + (t)) +#define BTI_COLOR_BUF(d) (d) +#define BTI_FRAGMENT_CONSTANTS (BRW_MAX_DRAW_BUFFERS) +#define BTI_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 1 + (t)) /** * Size of surface binding table for the VS. diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c index 1511220447..6b03094f50 100644 --- a/src/gallium/drivers/i965/brw_pipe_fb.c +++ b/src/gallium/drivers/i965/brw_pipe_fb.c @@ -31,7 +31,7 @@ static void brw_set_framebuffer_state( struct pipe_context *pipe, /* Color buffers: */ - for (i = 0; i < MAX2(fb->nr_cbufs, brw->curr.fb.nr_cbufs); i++) { + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { if (brw->curr.fb.cbufs[i] != fb->cbufs[i]) { brw->state.dirty.mesa |= PIPE_NEW_COLOR_BUFFERS; pipe_surface_reference(&brw->curr.fb.cbufs[i], fb->cbufs[i]); @@ -39,7 +39,7 @@ static void brw_set_framebuffer_state( struct pipe_context *pipe, } if (brw->curr.fb.nr_cbufs != fb->nr_cbufs) { - brw->curr.fb.nr_cbufs = fb->nr_cbufs; + brw->curr.fb.nr_cbufs = MIN2(BRW_MAX_DRAW_BUFFERS, fb->nr_cbufs); brw->state.dirty.mesa |= PIPE_NEW_NR_CBUFS; } } diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c index f0a765ecf5..5cd38a43a6 100644 --- a/src/gallium/drivers/i965/brw_pipe_sampler.c +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -107,7 +107,7 @@ static void * brw_create_sampler_state( struct pipe_context *pipe, const struct pipe_sampler_state *template ) { - struct brw_sampler_state *sampler = CALLOC_STRUCT(brw_sampler_state); + struct brw_sampler *sampler = CALLOC_STRUCT(brw_sampler); sampler->ss0.min_filter = translate_img_filter( template->min_img_filter ); sampler->ss0.mag_filter = translate_img_filter( template->mag_img_filter ); @@ -214,7 +214,6 @@ void brw_pipe_sampler_init( struct brw_context *brw ) brw->base.set_sampler_textures = brw_set_sampler_textures; } - void brw_pipe_sampler_cleanup( struct brw_context *brw ) { } diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index e1986a9dbb..a28fb71589 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -153,9 +153,10 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw) case TGSI_INTERPOLATE_CONSTANT: break; case TGSI_INTERPOLATE_LINEAR: + case TGSI_INTERPOLATE_PERSPECTIVE: key.linear_attrs |= 1 << (i+1); break; - case TGSI_INTERPOLATE_PERSPECTIVE: +// case TGSI_INTERPOLATE_PERSPECTIVE: key.persp_attrs |= 1 << (i+1); break; } diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 3c5a2dab7a..2c9d3e5e87 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -56,6 +56,15 @@ GLuint brw_wm_nr_args( GLuint opcode ) case WM_FB_WRITE: case WM_PINTERP: return 3; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + case TGSI_OPCODE_TXB: + case TGSI_OPCODE_TXD: + /* sampler arg is held as a field in the instruction, not in an + * actual register: + */ + return tgsi_get_opcode_info(opcode)->num_src - 1; + default: assert(opcode < MAX_OPCODE); return tgsi_get_opcode_info(opcode)->num_src; diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h index b7d807dcb3..f1ca9f6369 100644 --- a/src/gallium/drivers/i965/brw_wm.h +++ b/src/gallium/drivers/i965/brw_wm.h @@ -135,6 +135,7 @@ struct brw_wm_instruction { GLuint opcode:8; GLuint saturate:1; GLuint writemask:4; + GLuint sampler:4; GLuint tex_unit:4; /* texture/sampler unit for texture instructions */ GLuint target:4; /* TGSI_TEXTURE_x for texture instructions, * target binding table index for FB_WRITE @@ -201,7 +202,8 @@ struct brw_fp_instruction { unsigned opcode:8; unsigned target:8; /* XXX: special usage for FB_WRITE */ unsigned tex_unit:4; - unsigned pad:12; + unsigned sampler:4; + unsigned pad:8; }; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index a14e12f35b..3250db1848 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -792,7 +792,8 @@ static void emit_tex( struct brw_wm_compile *c, const struct brw_wm_instruction *inst, struct brw_reg *dst, GLuint dst_flags, - struct brw_reg *arg ) + struct brw_reg *coord, + GLuint sampler) { struct brw_compile *p = &c->func; GLuint msgLength, responseLength; @@ -838,7 +839,7 @@ static void emit_tex( struct brw_wm_compile *c, for (i = 0; i < nr; i++) { static const GLuint swz[4] = {0,1,2,2}; if (emit & (1<payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ + BTI_TEXTURE(inst->tex_unit), + sampler, /* sampler index */ inst->writemask, msg_type, responseLength, @@ -878,7 +879,8 @@ static void emit_txb( struct brw_wm_compile *c, const struct brw_wm_instruction *inst, struct brw_reg *dst, GLuint dst_flags, - struct brw_reg *arg ) + struct brw_reg *coord, + GLuint sampler ) { struct brw_compile *p = &c->func; GLuint msgLength; @@ -888,7 +890,7 @@ static void emit_txb( struct brw_wm_compile *c, switch (inst->target) { case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: - brw_MOV(p, brw_message_reg(2), arg[0]); + brw_MOV(p, brw_message_reg(2), coord[0]); brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; @@ -896,22 +898,22 @@ static void emit_txb( struct brw_wm_compile *c, case TGSI_TEXTURE_RECT: case TGSI_TEXTURE_SHADOW2D: case TGSI_TEXTURE_SHADOWRECT: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); + brw_MOV(p, brw_message_reg(2), coord[0]); + brw_MOV(p, brw_message_reg(4), coord[1]); brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); break; case TGSI_TEXTURE_3D: case TGSI_TEXTURE_CUBE: - brw_MOV(p, brw_message_reg(2), arg[0]); - brw_MOV(p, brw_message_reg(4), arg[1]); - brw_MOV(p, brw_message_reg(6), arg[2]); + brw_MOV(p, brw_message_reg(2), coord[0]); + brw_MOV(p, brw_message_reg(4), coord[1]); + brw_MOV(p, brw_message_reg(6), coord[2]); break; default: /* unexpected target */ abort(); } - brw_MOV(p, brw_message_reg(8), arg[3]); + brw_MOV(p, brw_message_reg(8), coord[3]); msgLength = 9; if (BRW_IS_IGDNG(p->brw)) @@ -923,8 +925,8 @@ static void emit_txb( struct brw_wm_compile *c, retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW), 1, retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW), - SURF_INDEX_TEXTURE(inst->tex_unit), - inst->tex_unit, /* sampler */ + BTI_TEXTURE(inst->tex_unit), + sampler, /* sampler index */ inst->writemask, msg_type, 8, /* responseLength */ @@ -1483,11 +1485,11 @@ void brw_wm_emit( struct brw_wm_compile *c ) /* Texturing operations: */ case TGSI_OPCODE_TEX: - emit_tex(c, inst, dst, dst_flags, args[0]); + emit_tex(c, inst, dst, dst_flags, args[0], inst->sampler); break; case TGSI_OPCODE_TXB: - emit_txb(c, inst, dst, dst_flags, args[0]); + emit_txb(c, inst, dst, dst_flags, args[0], inst->sampler); break; case TGSI_OPCODE_KIL: diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index 174486a101..a8b5e15f36 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -282,6 +282,7 @@ static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c, struct brw_fp_dst dest, GLuint tex_unit, GLuint target, + GLuint sampler, struct brw_fp_src src0, struct brw_fp_src src1, struct brw_fp_src src2 ) @@ -298,6 +299,7 @@ static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c, inst->dst = dest; inst->tex_unit = tex_unit; inst->target = target; + inst->sampler = sampler; inst->src[0] = src0; inst->src[1] = src1; inst->src[2] = src2; @@ -313,7 +315,7 @@ static INLINE void emit_op3(struct brw_wm_compile *c, struct brw_fp_src src1, struct brw_fp_src src2 ) { - emit_tex_op(c, op, dest, 0, 0, src0, src1, src2); + emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src2); } @@ -323,7 +325,7 @@ static INLINE void emit_op2(struct brw_wm_compile *c, struct brw_fp_src src0, struct brw_fp_src src1) { - emit_tex_op(c, op, dest, 0, 0, src0, src1, src_undef()); + emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src_undef()); } static INLINE void emit_op1(struct brw_wm_compile *c, @@ -331,14 +333,14 @@ static INLINE void emit_op1(struct brw_wm_compile *c, struct brw_fp_dst dest, struct brw_fp_src src0) { - emit_tex_op(c, op, dest, 0, 0, src0, src_undef(), src_undef()); + emit_tex_op(c, op, dest, 0, 0, 0, src0, src_undef(), src_undef()); } static INLINE void emit_op0(struct brw_wm_compile *c, GLuint op, struct brw_fp_dst dest) { - emit_tex_op(c, op, dest, 0, 0, src_undef(), src_undef(), src_undef()); + emit_tex_op(c, op, dest, 0, 0, 0, src_undef(), src_undef(), src_undef()); } @@ -674,7 +676,8 @@ static void precalc_tex( struct brw_wm_compile *c, struct brw_fp_dst dst, unsigned target, unsigned unit, - struct brw_fp_src src0 ) + struct brw_fp_src src0, + struct brw_fp_src sampler ) { struct brw_fp_src coord = src_undef(); struct brw_fp_dst tmp = dst_undef(); @@ -751,6 +754,7 @@ static void precalc_tex( struct brw_wm_compile *c, dst_saturate(tmp, dst.saturate), unit, target, + sampler.index, coord, src_undef(), src_undef()); @@ -802,6 +806,7 @@ static void precalc_tex( struct brw_wm_compile *c, dst, unit, target, + sampler.index, coord, src_undef(), src_undef()); @@ -851,7 +856,8 @@ static void precalc_txp( struct brw_wm_compile *c, struct brw_fp_dst dst, unsigned target, unsigned unit, - struct brw_fp_src src0 ) + struct brw_fp_src src0, + struct brw_fp_src sampler ) { if (projtex(c, target, src0)) { struct brw_fp_dst tmp = get_temp(c); @@ -877,7 +883,8 @@ static void precalc_txp( struct brw_wm_compile *c, dst, target, unit, - src_reg_from_dst(tmp)); + src_reg_from_dst(tmp), + sampler ); release_temp(c, tmp); } @@ -885,7 +892,7 @@ static void precalc_txp( struct brw_wm_compile *c, { /* dst = TEX src0 */ - precalc_tex(c, dst, target, unit, src0); + precalc_tex(c, dst, target, unit, src0, sampler); } } @@ -936,6 +943,7 @@ static void emit_fb_write( struct brw_wm_compile *c ) dst_undef(), (i == c->key.nr_cbufs - 1), /* EOT */ i, + 0, /* no sampler */ outcolor, payload_r0_depth, outdepth); @@ -1056,15 +1064,17 @@ static void emit_insn( struct brw_wm_compile *c, case TGSI_OPCODE_TEX: precalc_tex(c, dst, inst->InstructionExtTexture.Texture, - src[0].file, /* sampler unit */ - src[1] ); + src[1].index, /* use sampler unit for tex idx */ + src[0], /* coord */ + src[1]); /* sampler */ break; case TGSI_OPCODE_TXP: precalc_txp(c, dst, inst->InstructionExtTexture.Texture, - src[0].file, /* sampler unit */ - src[1] ); + src[1].index, /* use sampler unit for tex idx */ + src[0], /* coord */ + src[1]); /* sampler */ break; case TGSI_OPCODE_TXB: @@ -1072,8 +1082,9 @@ static void emit_insn( struct brw_wm_compile *c, */ precalc_tex(c, dst, inst->InstructionExtTexture.Texture, - src[0].file, /* sampler unit */ - src[1] ); + src[1].index, /* use sampler unit for tex idx*/ + src[0], + src[1]); break; case TGSI_OPCODE_XPD: diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c index f882331433..f92b8198ed 100644 --- a/src/gallium/drivers/i965/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965/brw_wm_surface_state.c @@ -149,19 +149,23 @@ brw_wm_get_binding_table(struct brw_context *brw, enum pipe_error ret; struct brw_winsys_reloc reloc[BRW_WM_MAX_SURF]; uint32_t data[BRW_WM_MAX_SURF]; + GLuint nr_relocs = 0; GLuint data_size = brw->wm.nr_surfaces * sizeof data[0]; int i; assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); assert(brw->wm.nr_surfaces > 0); - /* Emit binding table relocations to surface state */ + /* Emit binding table relocations to surface state + */ for (i = 0; i < brw->wm.nr_surfaces; i++) { - make_reloc(&reloc[i], - BRW_USAGE_STATE, - 0, - i * sizeof(GLuint), - brw->wm.surf_bo[i]); + if (brw->wm.surf_bo[i]) { + make_reloc(&reloc[nr_relocs++], + BRW_USAGE_STATE, + 0, + i * sizeof(GLuint), + brw->wm.surf_bo[i]); + } } /* Note there is no key for this search beyond the values in the @@ -169,7 +173,7 @@ brw_wm_get_binding_table(struct brw_context *brw, */ if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, - reloc, brw->wm.nr_surfaces, + reloc, nr_relocs, NULL, bo_out)) return PIPE_OK; @@ -182,7 +186,7 @@ brw_wm_get_binding_table(struct brw_context *brw, ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND, NULL, 0, - reloc, brw->wm.nr_surfaces, + reloc, nr_relocs, data, data_size, NULL, NULL, bo_out); @@ -208,40 +212,60 @@ static enum pipe_error prepare_wm_surfaces(struct brw_context *brw ) for (i = 0; i < brw->curr.fb.nr_cbufs; i++) { ret = brw_update_render_surface(brw, brw_surface(brw->curr.fb.cbufs[i]), - &brw->wm.surf_bo[nr_surfaces++]); + &brw->wm.surf_bo[BTI_COLOR_BUF(i)]); if (ret) return ret; + + nr_surfaces = BTI_COLOR_BUF(i) + 1; + } + + + + /* PIPE_NEW_FRAGMENT_CONSTANTS + */ +#if 0 + if (brw->curr.fragment_constants) { + ret = brw_update_fragment_constant_surface( + brw, + brw->curr.fragment_constants, + &brw->wm.surf_bo[BTI_FRAGMENT_CONSTANTS]); + + if (ret) + return ret; + + nr_surfaces = BTI_FRAGMENT_CONSTANTS + 1; } + else { + bo_reference(&brw->wm.surf_bo[SURF_FRAG_CONSTANTS], NULL); + } +#endif + /* PIPE_NEW_TEXTURE */ for (i = 0; i < brw->curr.num_textures; i++) { ret = brw_update_texture_surface(brw, brw_texture(brw->curr.texture[i]), - &brw->wm.surf_bo[nr_surfaces++]); + &brw->wm.surf_bo[BTI_TEXTURE(i)]); if (ret) return ret; + + nr_surfaces = BTI_TEXTURE(i) + 1; } - /* PIPE_NEW_FRAGMENT_CONSTANTS + /* Clear any inactive entries: */ -#if 0 - if (brw->curr.fragment_constants) { - ret = brw_update_fragment_constant_surface(brw, - brw->curr.fragment_constants, - &brw->wm.surf_bo[nr_surfaces++]); - if (ret) - return ret; - } -#endif + for (i = brw->curr.fb.nr_cbufs; i < BRW_MAX_DRAW_BUFFERS; i++) + bo_reference(&brw->wm.surf_bo[BTI_COLOR_BUF(i)], NULL); - if (brw->wm.nr_surfaces != nr_surfaces) { + if (!brw->curr.fragment_constants) + bo_reference(&brw->wm.surf_bo[BTI_FRAGMENT_CONSTANTS], NULL); - /* Unreference any left-over old buffers - */ - for (i = nr_surfaces; i < brw->wm.nr_surfaces; i++) - bo_reference(&brw->wm.surf_bo[i], NULL); + /* XXX: no pipe_max_textures define?? */ + for (i = brw->curr.num_textures; i < PIPE_MAX_SAMPLERS; i++) + bo_reference(&brw->wm.surf_bo[BTI_TEXTURE(i)], NULL); + if (brw->wm.nr_surfaces != nr_surfaces) { brw->wm.nr_surfaces = nr_surfaces; brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; } -- cgit v1.2.3 From 21172d4358be5cce3bda244b33e3728a5fde3751 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 23 Nov 2009 00:58:13 +0000 Subject: i965g: apply linear math to both linear and perspective attrs --- src/gallium/drivers/i965/brw_sf.c | 3 +-- src/gallium/drivers/i965/brw_sf_emit.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c index a28fb71589..e1986a9dbb 100644 --- a/src/gallium/drivers/i965/brw_sf.c +++ b/src/gallium/drivers/i965/brw_sf.c @@ -153,10 +153,9 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw) case TGSI_INTERPOLATE_CONSTANT: break; case TGSI_INTERPOLATE_LINEAR: - case TGSI_INTERPOLATE_PERSPECTIVE: key.linear_attrs |= 1 << (i+1); break; -// case TGSI_INTERPOLATE_PERSPECTIVE: + case TGSI_INTERPOLATE_PERSPECTIVE: key.persp_attrs |= 1 << (i+1); break; } diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c index 2983e8a9dd..3b85725e36 100644 --- a/src/gallium/drivers/i965/brw_sf_emit.c +++ b/src/gallium/drivers/i965/brw_sf_emit.c @@ -324,7 +324,7 @@ static GLboolean calculate_masks( struct brw_sf_compile *c, { GLboolean is_last_attr = (reg == c->nr_setup_regs - 1); GLuint persp_mask = c->key.persp_attrs; - GLuint linear_mask = c->key.linear_attrs; + GLuint linear_mask = (c->key.persp_attrs | c->key.linear_attrs); *pc_persp = 0; *pc_linear = 0; -- cgit v1.2.3 From 968a7dfb292f1eefa9ada8096bb023c051518c32 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 23 Nov 2009 01:47:57 +0000 Subject: i965g: use correct key size for vs upload --- src/gallium/drivers/i965/brw_vs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index 25b51eb41e..14a1c3bcf1 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -84,7 +84,7 @@ static enum pipe_error do_vs_prog( struct brw_context *brw, return ret; ret = brw_upload_cache( &brw->cache, BRW_VS_PROG, - &c.key, sizeof(c.key), + &c.key, brw_vs_prog_key_size(&c.key), NULL, 0, program, program_size, &c.prog_data, -- cgit v1.2.3 From cd0e6619e6d9f0f60606d1e079b1a04af1717309 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 23 Nov 2009 03:00:47 +0000 Subject: i965g: correct test for unfilled modes --- src/gallium/drivers/i965/brw_pipe_rast.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c index 27c568de0a..2117e91a9e 100644 --- a/src/gallium/drivers/i965/brw_pipe_rast.c +++ b/src/gallium/drivers/i965/brw_pipe_rast.c @@ -58,8 +58,10 @@ calculate_clip_key_rast( const struct brw_context *brw, key->fill_cw = translate_fill(templ->fill_cw); } - if (key->fill_cw != CLIP_FILL || - key->fill_ccw != CLIP_FILL) { + if (key->fill_cw == CLIP_LINE || + key->fill_ccw == CLIP_LINE || + key->fill_cw == CLIP_POINT || + key->fill_ccw == CLIP_POINT) { key->do_unfilled = 1; key->clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; } -- cgit v1.2.3 From 0a89ad80d957869cf8760326787c6189ab50a1e0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 23 Nov 2009 03:06:58 +0000 Subject: i965g: gs nr_attrs also tracks nr fragment shader inputs Or, equivalently, nr of outputs of active vertex shader varient. --- src/gallium/drivers/i965/brw_gs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c index ce77be24f6..921b201bae 100644 --- a/src/gallium/drivers/i965/brw_gs.c +++ b/src/gallium/drivers/i965/brw_gs.c @@ -154,10 +154,12 @@ static const unsigned gs_prim[PIPE_PRIM_MAX] = { static void populate_key( struct brw_context *brw, struct brw_gs_prog_key *key ) { + const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature; + memset(key, 0, sizeof(*key)); - /* CACHE_NEW_VS_PROG */ - key->nr_attrs = brw->vs.prog_data->nr_outputs; + /* PIPE_NEW_FRAGMENT_SIGNATURE */ + key->nr_attrs = sig->nr_inputs + 1; /* BRW_NEW_PRIMITIVE */ key->primitive = gs_prim[brw->primitive]; @@ -206,9 +208,9 @@ static int prepare_gs_prog(struct brw_context *brw) const struct brw_tracked_state brw_gs_prog = { .dirty = { - .mesa = 0, + .mesa = PIPE_NEW_FRAGMENT_SIGNATURE, .brw = BRW_NEW_PRIMITIVE, - .cache = CACHE_NEW_VS_PROG + .cache = 0, }, .prepare = prepare_gs_prog }; -- cgit v1.2.3 From ca9b0e942c5f8f95383c637b05e3fb237f013688 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 29 Nov 2009 18:59:19 +1000 Subject: i965g: remove surface from views list before freeing this fixes a crash with the xorg state tracker, however it then locks up the GPU once rendering is enabled but at least it doesn't crash. Signed-off-by: Dave Airlie --- src/gallium/drivers/i965/brw_screen_surface.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c index 21a7382873..1e37c63d6c 100644 --- a/src/gallium/drivers/i965/brw_screen_surface.c +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -245,6 +245,7 @@ static void brw_tex_surface_destroy( struct pipe_surface *surf ) /* Unreference texture, shared buffer: */ + remove_from_list(surface); bo_reference(&surface->bo, NULL); pipe_texture_reference( &surface->base.texture, NULL ); -- cgit v1.2.3 From 4490122d0cae360d1552cea7d7d860de352f13f6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 25 Nov 2009 23:02:46 +0000 Subject: i965g: remove redundant nr_attrs member --- src/gallium/drivers/i965/brw_clip.c | 8 ++------ src/gallium/drivers/i965/brw_clip.h | 4 ++-- src/gallium/drivers/i965/brw_clip_tri.c | 6 +++--- src/gallium/drivers/i965/brw_clip_util.c | 2 +- 4 files changed, 8 insertions(+), 12 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 4ec7b823e8..58d9e56df2 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -81,10 +81,6 @@ compile_clip_prog( struct brw_context *brw, else delta = REG_SIZE; - /* XXX: c.nr_attrs is very redundant: - */ - c.nr_attrs = c.key.nr_attrs; - c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE; if (c.key.output_color0) @@ -103,9 +99,9 @@ compile_clip_prog( struct brw_context *brw, c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE; if (BRW_IS_IGDNG(brw)) - c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ + c.nr_regs = (c.key.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ else - c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_regs = (c.key.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ c.nr_bytes = c.nr_regs * REG_SIZE; diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index 8729efa47b..80e3a11a37 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -115,9 +115,9 @@ struct brw_clip_compile { struct brw_reg ff_sync; } reg; - /* 3 different ways of expressing vertex size: + /* 3 different ways of expressing vertex size, including + * key.nr_attrs. */ - GLuint nr_attrs; GLuint nr_regs; GLuint nr_bytes; diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c index fa00f6044f..4cde7294ea 100644 --- a/src/gallium/drivers/i965/brw_clip_tri.c +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -66,12 +66,12 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, i += c->nr_regs; } - if (c->nr_attrs & 1) { + if (c->key.nr_attrs & 1) { for (j = 0; j < 3; j++) { - GLuint delta = c->nr_attrs*16 + 32; + GLuint delta = c->key.nr_attrs*16 + 32; if (c->chipset.is_igdng) - delta = c->nr_attrs * 16 + 32 * 3; + delta = c->key.nr_attrs * 16 + 32 * 3; brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); } diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 872042c9a9..97a5710310 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -140,7 +140,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c, /* Iterate over each attribute (could be done in pairs?) */ - for (i = 0; i < c->nr_attrs; i++) { + for (i = 0; i < c->key.nr_attrs; i++) { GLuint delta = i*16 + 32; if (c->chipset.is_igdng) -- cgit v1.2.3 From cddc7e3a9cd321247c2298ef1b94cced1122a8e5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 30 Nov 2009 13:39:21 +0000 Subject: brw: add dumping to gem winsys --- progs/demos/gears.c | 7 ++ src/gallium/drivers/i965/Makefile | 1 + src/gallium/drivers/i965/brw_batchbuffer.c | 2 - src/gallium/drivers/i965/brw_context.h | 5 - src/gallium/drivers/i965/brw_debug.h | 1 + src/gallium/drivers/i965/brw_disasm.c | 4 +- src/gallium/drivers/i965/brw_eu_emit.c | 1 + src/gallium/drivers/i965/brw_screen.c | 11 ++ src/gallium/drivers/i965/brw_vs_emit.c | 1 + src/gallium/drivers/i965/brw_winsys.h | 21 ++++ src/gallium/drivers/i965/brw_winsys_debug.c | 87 ++++++++++++++ src/gallium/drivers/i965/brw_wm_emit.c | 1 + src/gallium/winsys/drm/i965/gem/i965_drm_api.c | 21 ++++ src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c | 133 +++++++++++++++++++--- src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h | 2 + src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 91 +-------------- 16 files changed, 280 insertions(+), 109 deletions(-) create mode 100644 src/gallium/drivers/i965/brw_winsys_debug.c (limited to 'src/gallium/drivers/i965') diff --git a/progs/demos/gears.c b/progs/demos/gears.c index 6016162d6f..cf2c0a5443 100644 --- a/progs/demos/gears.c +++ b/progs/demos/gears.c @@ -92,6 +92,7 @@ gear(GLfloat inner_radius, GLfloat outer_radius, GLfloat width, glNormal3f(0.0, 0.0, -1.0); +#if 0 /* draw back face */ glBegin(GL_QUAD_STRIP); for (i = 0; i <= teeth; i++) { @@ -160,6 +161,7 @@ gear(GLfloat inner_radius, GLfloat outer_radius, GLfloat width, glVertex3f(r0 * cos(angle), r0 * sin(angle), width * 0.5); } glEnd(); +#endif } @@ -195,6 +197,7 @@ draw(void) glCallList(gear1); glPopMatrix(); +#if 0 glPushMatrix(); glTranslatef(3.1, -2.0, 0.0); glRotatef(-2.0 * angle - 9.0, 0.0, 0.0, 1.0); @@ -206,6 +209,7 @@ draw(void) glRotatef(-2.0 * angle - 25.0, 0.0, 0.0, 1.0); glCallList(gear3); glPopMatrix(); +#endif glPopMatrix(); @@ -213,6 +217,9 @@ draw(void) Frames++; + if (Frames == 2) + exit(0); + { GLint t = glutGet(GLUT_ELAPSED_TIME); if (t - T0 >= 5000) { diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile index 8df07d1c10..95fd3cd69b 100644 --- a/src/gallium/drivers/i965/Makefile +++ b/src/gallium/drivers/i965/Makefile @@ -68,6 +68,7 @@ C_SOURCES = \ brw_screen_texture.c \ brw_screen_surface.c \ brw_batchbuffer.c \ + brw_winsys_debug.c \ intel_decode.c include ../../Makefile.template diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c index d725e8b27e..22607dc608 100644 --- a/src/gallium/drivers/i965/brw_batchbuffer.c +++ b/src/gallium/drivers/i965/brw_batchbuffer.c @@ -28,12 +28,10 @@ #include "util/u_memory.h" #include "brw_batchbuffer.h" -//#include "brw_decode.h" #include "brw_reg.h" #include "brw_winsys.h" #include "brw_debug.h" #include "brw_structs.h" -#include "intel_decode.h" #define ALWAYS_EMIT_MI_FLUSH 1 diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 598e747fe0..b7330f00f4 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -832,11 +832,6 @@ int brw_upload_urb_fence(struct brw_context *brw); */ int brw_upload_cs_urb_state(struct brw_context *brw); -/* brw_disasm.c */ -int brw_disasm_insn (FILE *file, const struct brw_instruction *inst); -int brw_disasm (FILE *file, - const struct brw_instruction *inst, - unsigned count); /*====================================================================== * Inline conversion functions. These are better-typed than the diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h index 0deddbf977..98407a06ed 100644 --- a/src/gallium/drivers/i965/brw_debug.h +++ b/src/gallium/drivers/i965/brw_debug.h @@ -39,4 +39,5 @@ extern int BRW_DEBUG; #endif + #endif diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c index 4100f11d48..65db27248b 100644 --- a/src/gallium/drivers/i965/brw_disasm.c +++ b/src/gallium/drivers/i965/brw_disasm.c @@ -27,7 +27,9 @@ #include #include -#include "brw_context.h" +#include "brw_disasm.h" +#include "brw_structs.h" +#include "brw_reg.h" #include "brw_defines.h" struct { diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index 7776b4f965..3ee50899fb 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -34,6 +34,7 @@ #include "brw_defines.h" #include "brw_eu.h" #include "brw_debug.h" +#include "brw_disasm.h" diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 05da72ebb2..70e2d9c47a 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -65,7 +65,16 @@ static const struct debug_named_value debug_names[] = { { NULL, 0 } }; +static const struct debug_named_value dump_names[] = { + { "asm", DUMP_ASM}, + { "state", DUMP_STATE}, + { "batch", DUMP_BATCH}, + { NULL, 0 } +}; + int BRW_DEBUG = 0; +int BRW_DUMP = 0; + #endif @@ -327,6 +336,8 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0); BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0); BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB | DEBUG_WM; + + BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0); #endif memset(&chipset, 0, sizeof chipset); diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 00f0af2d07..20cec0f59b 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -41,6 +41,7 @@ #include "brw_context.h" #include "brw_vs.h" #include "brw_debug.h" +#include "brw_disasm.h" /* Choose one of the 4 vec4's which can be packed into each 16-wide reg. */ diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index a723244960..9e86a1256e 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -111,6 +111,7 @@ enum brw_buffer_data_type { }; + /* Relocations to be applied with subdata in a call to sws->bo_subdata, below. * * Effectively this encodes: @@ -274,6 +275,26 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, struct brw_winsys_buffer *buffer); +/************************************************************************* + * Cooperative dumping between winsys and driver. TODO: make this + * driver-only by wrapping calls to winsys->bo_subdata(). + */ + +#ifdef DEBUG +extern int BRW_DUMP; +#else +#define BRW_DUMP 0 +#endif + +#define DUMP_ASM 0x1 +#define DUMP_STATE 0x2 +#define DUMP_BATCH 0x4 + +void brw_dump_data( unsigned pci_id, + enum brw_buffer_data_type data_type, + unsigned offset, + const void *data, + size_t size ); #endif diff --git a/src/gallium/drivers/i965/brw_winsys_debug.c b/src/gallium/drivers/i965/brw_winsys_debug.c new file mode 100644 index 0000000000..f8f6a539bc --- /dev/null +++ b/src/gallium/drivers/i965/brw_winsys_debug.c @@ -0,0 +1,87 @@ +#include "brw_winsys.h" +#include "brw_disasm.h" +#include "brw_structs_dump.h" +#include "brw_structs.h" +#include "intel_decode.h" + + +void brw_dump_data( unsigned pci_id, + enum brw_buffer_data_type data_type, + unsigned offset, + const void *data, + size_t size ) +{ + if (BRW_DUMP & DUMP_ASM) { + switch (data_type) { + case BRW_DATA_GS_WM_PROG: + case BRW_DATA_GS_SF_PROG: + case BRW_DATA_GS_VS_PROG: + case BRW_DATA_GS_GS_PROG: + case BRW_DATA_GS_CLIP_PROG: + brw_disasm( stderr, data, size / sizeof(struct brw_instruction) ); + break; + default: + break; + } + } + + if (BRW_DUMP & DUMP_STATE) { + switch (data_type) { + case BRW_DATA_GS_CC_VP: + brw_dump_cc_viewport( data ); + break; + case BRW_DATA_GS_CC_UNIT: + brw_dump_cc_unit_state( data ); + break; + case BRW_DATA_GS_SAMPLER_DEFAULT_COLOR: + brw_dump_sampler_default_color( data ); + break; + case BRW_DATA_GS_SAMPLER: + brw_dump_sampler_state( data ); + break; + case BRW_DATA_GS_WM_UNIT: + brw_dump_wm_unit_state( data ); + break; + case BRW_DATA_GS_SF_VP: + brw_dump_sf_viewport( data ); + break; + case BRW_DATA_GS_SF_UNIT: + brw_dump_sf_unit_state( data ); + break; + case BRW_DATA_GS_VS_UNIT: + brw_dump_vs_unit_state( data ); + break; + case BRW_DATA_GS_GS_UNIT: + brw_dump_gs_unit_state( data ); + break; + case BRW_DATA_GS_CLIP_VP: + brw_dump_clipper_viewport( data ); + break; + case BRW_DATA_GS_CLIP_UNIT: + brw_dump_clip_unit_state( data ); + break; + case BRW_DATA_SS_SURFACE: + brw_dump_surface_state( data ); + break; + case BRW_DATA_SS_SURF_BIND: + break; + case BRW_DATA_OTHER: + break; + case BRW_DATA_CONSTANT_BUFFER: + break; + default: + break; + } + } + + if (BRW_DUMP & DUMP_BATCH) { + switch (data_type) { + case BRW_DATA_BATCH_BUFFER: + intel_decode(data, size / 4, offset, pci_id); + break; + default: + break; + } + } +} + diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 3250db1848..0b82f4e156 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -35,6 +35,7 @@ #include "brw_context.h" #include "brw_wm.h" #include "brw_debug.h" +#include "brw_disasm.h" /* Not quite sure how correct this is - need to understand horiz * vs. vertical strides a little better. diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c index 191a733c36..5d5dfdae46 100644 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c @@ -44,6 +44,9 @@ i965_libdrm_buffer_from_handle(struct i965_libdrm_winsys *idws, struct i965_libdrm_buffer *buf = CALLOC_STRUCT(i965_libdrm_buffer); uint32_t tile = 0, swizzle = 0; + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + if (!buf) return NULL; pipe_reference_init(&buf->base.reference, 1); @@ -89,6 +92,9 @@ i965_libdrm_texture_from_shared_handle(struct drm_api *api, struct i965_libdrm_winsys *idws = i965_libdrm_winsys(brw_screen(screen)->sws); struct i965_libdrm_buffer *buffer; + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + buffer = i965_libdrm_buffer_from_handle(idws, name, handle); if (!buffer) return NULL; @@ -106,6 +112,10 @@ i965_libdrm_shared_handle_from_texture(struct drm_api *api, { struct i965_libdrm_buffer *buf = NULL; struct brw_winsys_buffer *buffer = NULL; + + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + if (!brw_texture_get_winsys_buffer(texture, &buffer, pitch)) return FALSE; @@ -129,6 +139,10 @@ i965_libdrm_local_handle_from_texture(struct drm_api *api, unsigned *handle) { struct brw_winsys_buffer *buffer = NULL; + + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + if (!brw_texture_get_winsys_buffer(texture, &buffer, pitch)) return FALSE; @@ -142,6 +156,9 @@ i965_libdrm_winsys_destroy(struct brw_winsys_screen *iws) { struct i965_libdrm_winsys *idws = i965_libdrm_winsys(iws); + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + drm_intel_bufmgr_destroy(idws->gem); FREE(idws); @@ -154,6 +171,8 @@ i965_libdrm_create_screen(struct drm_api *api, int drmFD, struct i965_libdrm_winsys *idws; unsigned int deviceID; + debug_printf("%s\n", __FUNCTION__); + if (arg != NULL) { switch(arg->mode) { case DRM_CREATE_NORMAL: @@ -194,6 +213,8 @@ i965_libdrm_create_context(struct drm_api *api, struct pipe_screen *screen) static void destroy(struct drm_api *api) { + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); } diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c index 1f3f19ab72..d4a0c97262 100644 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c @@ -5,16 +5,59 @@ #include "i915_drm.h" #include "intel_bufmgr.h" + + const char *names[BRW_BUFFER_TYPE_MAX] = { - "texture", - "scanout", - "vertex", - "curbe", - "query", - "shader_constants", - "wm_scratch", - "batch", - "state_cache", + "TEXTURE", + "SCANOUT", + "VERTEX", + "CURBE", + "QUERY", + "SHADER_CONSTANTS", + "WM_SCRATCH", + "BATCH", + "GENERAL_STATE", + "SURFACE_STATE", + "PIXEL", + "GENERIC", +}; + +const char *usages[BRW_USAGE_MAX] = { + "STATE", + "QUERY_RESULT", + "RENDER_TARGET", + "DEPTH_BUFFER", + "BLIT_SOURCE", + "BLIT_DEST", + "SAMPLER", + "VERTEX", + "SCRATCH" +}; + + +const char *data_types[BRW_DATA_MAX] = +{ + "GS: CC_VP", + "GS: CC_UNIT", + "GS: WM_PROG", + "GS: SAMPLER_DEFAULT_COLOR", + "GS: SAMPLER", + "GS: WM_UNIT", + "GS: SF_PROG", + "GS: SF_VP", + "GS: SF_UNIT", + "GS: VS_UNIT", + "GS: VS_PROG", + "GS: GS_UNIT", + "GS: GS_PROG", + "GS: CLIP_VP", + "GS: CLIP_UNIT", + "GS: CLIP_PROG", + "SS: SURFACE", + "SS: SURF_BIND", + "CONSTANT DATA", + "BATCH DATA", + "(untyped)" }; static enum pipe_error @@ -27,6 +70,9 @@ i965_libdrm_bo_alloc(struct brw_winsys_screen *sws, struct i965_libdrm_winsys *idws = i965_libdrm_winsys(sws); struct i965_libdrm_buffer *buf; + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + buf = CALLOC_STRUCT(i965_libdrm_buffer); if (!buf) return PIPE_ERROR_OUT_OF_MEMORY; @@ -79,6 +125,9 @@ i965_libdrm_bo_destroy(struct brw_winsys_buffer *buffer) { struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + drm_intel_bo_unreference(buf->bo); FREE(buffer); } @@ -95,6 +144,12 @@ i965_libdrm_bo_emit_reloc(struct brw_winsys_buffer *buffer, int read, write; int ret; + if (BRW_DUMP) + debug_printf("%s buf %p offset %x delta %x buf2 %p/%s/%s\n", + __FUNCTION__, (void *)buffer, + offset, delta, + (void *)buffer2, names[buf2->data_type], usages[usage]); + switch (usage) { case BRW_USAGE_STATE: read = I915_GEM_DOMAIN_INSTRUCTION; @@ -104,7 +159,11 @@ i965_libdrm_bo_emit_reloc(struct brw_winsys_buffer *buffer, read = I915_GEM_DOMAIN_INSTRUCTION; write = I915_GEM_DOMAIN_INSTRUCTION; break; - case BRW_USAGE_BLIT_DEST: + case BRW_USAGE_RENDER_TARGET: + read = I915_GEM_DOMAIN_RENDER; + write = 0; + break; + case BRW_USAGE_DEPTH_BUFFER: read = I915_GEM_DOMAIN_RENDER; write = I915_GEM_DOMAIN_RENDER; break; @@ -112,11 +171,7 @@ i965_libdrm_bo_emit_reloc(struct brw_winsys_buffer *buffer, read = 0; write = I915_GEM_DOMAIN_RENDER; break; - case BRW_USAGE_RENDER_TARGET: - read = I915_GEM_DOMAIN_RENDER; - write = 0; - break; - case BRW_USAGE_DEPTH_BUFFER: + case BRW_USAGE_BLIT_DEST: read = I915_GEM_DOMAIN_RENDER; write = I915_GEM_DOMAIN_RENDER; break; @@ -137,6 +192,11 @@ i965_libdrm_bo_emit_reloc(struct brw_winsys_buffer *buffer, return -1; } + /* Needed?? + ((uint32_t *)buf->bo->virtual)[offset/4] = (delta + + buf2->bo->offset); + */ + ret = dri_bo_emit_reloc( buf->bo, read, write, delta, offset, buf2->bo ); if (ret) return -1; @@ -152,6 +212,9 @@ i965_libdrm_bo_exec(struct brw_winsys_buffer *buffer, struct i965_libdrm_winsys *idws = i965_libdrm_winsys(buffer->sws); int ret; + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + if (idws->send_cmd) { ret = dri_bo_exec(buf->bo, bytes_used, NULL, 0, 0); if (ret) @@ -171,10 +234,20 @@ i965_libdrm_bo_subdata(struct brw_winsys_buffer *buffer, unsigned nr_reloc) { struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + struct i965_libdrm_winsys *idws = i965_libdrm_winsys(buffer->sws); int ret, i; (void)data_type; + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + + if (BRW_DUMP) + brw_dump_data( idws->id, + data_type, + buf->bo->offset + offset, + data, size ); + /* XXX: use bo_map_gtt/memcpy/unmap_gtt under some circumstances??? */ ret = drm_intel_bo_subdata(buf->bo, offset, size, (void*)data); @@ -194,6 +267,9 @@ i965_libdrm_bo_is_busy(struct brw_winsys_buffer *buffer) { struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + return drm_intel_bo_busy(buf->bo); } @@ -204,6 +280,9 @@ i965_libdrm_bo_references(struct brw_winsys_buffer *a, struct i965_libdrm_buffer *bufa = i965_libdrm_buffer(a); struct i965_libdrm_buffer *bufb = i965_libdrm_buffer(b); + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + /* XXX: can't find this func: */ return drm_intel_bo_references(bufa->bo, bufb->bo); @@ -220,6 +299,9 @@ i965_libdrm_check_aperture_space(struct brw_winsys_screen *iws, static drm_intel_bo *bos[128]; int i; + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + if (count > Elements(bos)) { assert(0); return FALSE; @@ -243,6 +325,12 @@ i965_libdrm_bo_map(struct brw_winsys_buffer *buffer, struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); int ret; + + if (BRW_DUMP) + debug_printf("%s %p %s %s\n", __FUNCTION__, (void *)buffer, + write ? "read/write" : "read", + write ? data_types[data_type] : ""); + if (!buf->map_count) { if (buf->map_gtt) { ret = drm_intel_gem_bo_map_gtt(buf->bo); @@ -256,6 +344,7 @@ i965_libdrm_bo_map(struct brw_winsys_buffer *buffer, } } + buf->data_type = data_type; buf->map_count++; return buf->bo->virtual; } @@ -265,7 +354,18 @@ i965_libdrm_bo_flush_range(struct brw_winsys_buffer *buffer, unsigned offset, unsigned length) { + struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + struct i965_libdrm_winsys *idws = i965_libdrm_winsys(buffer->sws); + if (BRW_DUMP) + debug_printf("%s offset %d len %d\n", __FUNCTION__, offset, length); + + if (BRW_DUMP) + brw_dump_data( idws->id, + buf->data_type, + buf->bo->offset + offset, + buf->bo->virtual + offset, + length ); } static void @@ -273,6 +373,9 @@ i965_libdrm_bo_unmap(struct brw_winsys_buffer *buffer) { struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer); + if (BRW_DUMP) + debug_printf("%s\n", __FUNCTION__); + if (--buf->map_count > 0) return; diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h index 7945711263..5b556b18f0 100644 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h @@ -52,6 +52,8 @@ struct i965_libdrm_buffer { boolean flinked; unsigned flink; + unsigned data_type; /* valid while mapped */ + unsigned cheesy_refcount; }; diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c index 9d2bfae090..e712de6307 100644 --- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c +++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c @@ -228,89 +228,7 @@ xlib_brw_bo_exec( struct brw_winsys_buffer *buffer, return 0; } -static void dump_data( struct xlib_brw_winsys *xbw, - enum brw_buffer_data_type data_type, - unsigned offset, - const void *data, - size_t size ) -{ - static int DUMP_ASM = 0; - static int DUMP_STATE = 0; - static int DUMP_BATCH = 1; - - if (DUMP_ASM) { - switch (data_type) { - case BRW_DATA_GS_WM_PROG: - case BRW_DATA_GS_SF_PROG: - case BRW_DATA_GS_VS_PROG: - case BRW_DATA_GS_GS_PROG: - case BRW_DATA_GS_CLIP_PROG: - brw_disasm( stderr, data, size / sizeof(struct brw_instruction) ); - break; - default: - break; - } - } - - if (DUMP_STATE) { - switch (data_type) { - case BRW_DATA_GS_CC_VP: - brw_dump_cc_viewport( data ); - break; - case BRW_DATA_GS_CC_UNIT: - brw_dump_cc_unit_state( data ); - break; - case BRW_DATA_GS_SAMPLER_DEFAULT_COLOR: - brw_dump_sampler_default_color( data ); - break; - case BRW_DATA_GS_SAMPLER: - brw_dump_sampler_state( data ); - break; - case BRW_DATA_GS_WM_UNIT: - brw_dump_wm_unit_state( data ); - break; - case BRW_DATA_GS_SF_VP: - brw_dump_sf_viewport( data ); - break; - case BRW_DATA_GS_SF_UNIT: - brw_dump_sf_unit_state( data ); - break; - case BRW_DATA_GS_VS_UNIT: - brw_dump_vs_unit_state( data ); - break; - case BRW_DATA_GS_GS_UNIT: - brw_dump_gs_unit_state( data ); - break; - case BRW_DATA_GS_CLIP_VP: - brw_dump_clipper_viewport( data ); - break; - case BRW_DATA_GS_CLIP_UNIT: - brw_dump_clip_unit_state( data ); - break; - case BRW_DATA_SS_SURFACE: - brw_dump_surface_state( data ); - break; - case BRW_DATA_SS_SURF_BIND: - break; - case BRW_DATA_OTHER: - break; - case BRW_DATA_CONSTANT_BUFFER: - break; - default: - break; - } - } - if (DUMP_BATCH) { - switch (data_type) { - case BRW_DATA_BATCH_BUFFER: - intel_decode(data, size / 4, offset, xbw->chipset.pci_id); - break; - default: - break; - } - } -} static int @@ -346,10 +264,11 @@ xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer, xlib_brw_buffer(reloc[i].bo)->offset + reloc[i].delta; } - if (1) - dump_data( xbw, data_type, - buf->offset + offset, - buf->virtual + offset, size ); + if (BRW_DUMP) + brw_dump_data( xbw->chipset.pci_id, + data_type, + buf->offset + offset, + buf->virtual + offset, size ); return 0; -- cgit v1.2.3 From 6781f624af8b06061673f3fd6f19ffb6a56c3e8c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 30 Nov 2009 15:35:58 +0000 Subject: i965g: pass backbuffer tiling information to driver The gem winsys gets this information, needs to pass it on. --- src/gallium/drivers/i965/brw_screen.h | 3 --- src/gallium/drivers/i965/brw_screen_tex_layout.c | 1 + src/gallium/drivers/i965/brw_screen_texture.c | 12 +++--------- src/gallium/drivers/i965/brw_winsys.h | 9 ++++++++- src/gallium/winsys/drm/i965/gem/i965_drm_api.c | 10 ++++++---- src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h | 8 +++----- 6 files changed, 21 insertions(+), 22 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index 301b20d549..ab811e48fc 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -72,9 +72,6 @@ struct brw_buffer void *ptr; }; -#define BRW_TILING_NONE 0 -#define BRW_TILING_Y 1 -#define BRW_TILING_X 2 union brw_surface_id { struct { diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c index f793fa8859..71a8890f83 100644 --- a/src/gallium/drivers/i965/brw_screen_tex_layout.c +++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c @@ -32,6 +32,7 @@ #include "brw_screen.h" #include "brw_debug.h" +#include "brw_winsys.h" /* Code to layout images in a mipmap tree for i965. */ diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 666ec70d42..650cac240b 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -472,7 +472,8 @@ boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture, struct pipe_texture * brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, const struct pipe_texture *templ, - const unsigned pitch, + unsigned pitch, + unsigned tiling, struct brw_winsys_buffer *buffer) { struct brw_screen *bscreen = brw_screen(screen); @@ -495,18 +496,11 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, tex->base.screen = screen; tex->cpp = pf_get_size(tex->base.format); + tex->tiling = tiling; make_empty_list(&tex->views[0]); make_empty_list(&tex->views[1]); - if (1) - tex->tiling = BRW_TILING_NONE; - else if (bscreen->chipset.is_965 && - pf_is_depth_or_stencil(templ->format)) - tex->tiling = BRW_TILING_Y; - else - tex->tiling = BRW_TILING_X; - if (!brw_texture_layout(bscreen, tex)) goto fail; diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index 9e86a1256e..af506a283d 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -111,6 +111,12 @@ enum brw_buffer_data_type { }; +/* Matches the i915_drm definitions: + */ +#define BRW_TILING_NONE 0 +#define BRW_TILING_X 1 +#define BRW_TILING_Y 2 + /* Relocations to be applied with subdata in a call to sws->bo_subdata, below. * @@ -271,7 +277,8 @@ boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture, struct pipe_texture * brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, const struct pipe_texture *template, - const unsigned pitch, + unsigned pitch, + unsigned tiling, struct brw_winsys_buffer *buffer); diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c index 5172b5410b..fc9678d2b6 100644 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c @@ -42,7 +42,7 @@ i965_libdrm_buffer_from_handle(struct i965_libdrm_winsys *idws, const char* name, unsigned handle) { struct i965_libdrm_buffer *buf = CALLOC_STRUCT(i965_libdrm_buffer); - uint32_t tile = 0, swizzle = 0; + uint32_t swizzle = 0; if (BRW_DUMP) debug_printf("%s\n", __FUNCTION__); @@ -60,8 +60,8 @@ i965_libdrm_buffer_from_handle(struct i965_libdrm_winsys *idws, if (!buf->bo) goto err; - drm_intel_bo_get_tiling(buf->bo, &tile, &swizzle); - if (tile != 0) + drm_intel_bo_get_tiling(buf->bo, &buf->tiling, &swizzle); + if (buf->tiling != 0) buf->map_gtt = TRUE; return buf; @@ -100,7 +100,9 @@ i965_libdrm_texture_from_shared_handle(struct drm_api *api, if (!buffer) return NULL; - return brw_texture_blanket_winsys_buffer(screen, template, pitch, &buffer->base); + return brw_texture_blanket_winsys_buffer(screen, template, pitch, + buffer->tiling, + &buffer->base); } diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h index 235eaf68fa..c6a7d4a8c5 100644 --- a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h +++ b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h @@ -46,14 +46,12 @@ struct i965_libdrm_buffer { void *ptr; unsigned map_count; - boolean map_gtt; + unsigned data_type; /* valid while mapped */ + unsigned tiling; + boolean map_gtt; boolean flinked; unsigned flink; - - unsigned data_type; /* valid while mapped */ - - unsigned cheesy_refcount; }; static INLINE struct i965_libdrm_buffer * -- cgit v1.2.3 From bb1cde755bce0ea29cc4c1a29ad3841e3b304309 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 30 Nov 2009 16:16:30 +0000 Subject: i965g: turn on texture tiling by default --- src/gallium/drivers/i965/brw_screen.c | 3 +++ src/gallium/drivers/i965/brw_screen.h | 1 + src/gallium/drivers/i965/brw_screen_texture.c | 11 ++++------- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 70e2d9c47a..1855e4fd45 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -396,5 +396,8 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) brw_screen_tex_surface_init(bscreen); brw_screen_buffer_init(bscreen); + bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE); + + return &bscreen->base; } diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h index ab811e48fc..7226d9228b 100644 --- a/src/gallium/drivers/i965/brw_screen.h +++ b/src/gallium/drivers/i965/brw_screen.h @@ -45,6 +45,7 @@ struct brw_screen struct pipe_screen base; struct brw_chipset chipset; struct brw_winsys_screen *sws; + boolean no_tiling; }; /** diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index 650cac240b..f4c20f31a5 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -209,14 +209,11 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, /* XXX: No tiling with compressed textures?? */ - if (tex->compressed == 0 - /* && bscreen->use_texture_tiling */ - /* && bscreen->kernel_exec_fencing */) + if (tex->compressed == 0 && + !bscreen->no_tiling) { - if (1) - tex->tiling = BRW_TILING_NONE; - else if (bscreen->chipset.is_965 && - pf_is_depth_or_stencil(templ->format)) + if (bscreen->chipset.is_965 && + pf_is_depth_or_stencil(templ->format)) tex->tiling = BRW_TILING_Y; else tex->tiling = BRW_TILING_X; -- cgit v1.2.3 From eb68acaf6db689ba2fa62b188ff14507a7007266 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 1 Dec 2009 16:32:33 +0000 Subject: i965g: don't reference unused vars in RSQ Probably overly pedantic --- src/gallium/drivers/i965/brw_vs_emit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 20cec0f59b..6d8366f862 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -1462,7 +1462,8 @@ static void emit_insn(struct brw_vs_compile *c, emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); break; case TGSI_OPCODE_RSQ: - emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, + brw_swizzle(args[0], 0,0,0,0), BRW_MATH_PRECISION_FULL); break; case TGSI_OPCODE_SEQ: emit_seq(p, dst, args[0], args[1]); @@ -1618,7 +1619,7 @@ void brw_vs_emit(struct brw_vs_compile *c) struct tgsi_parse_context parse; struct tgsi_full_instruction *inst; - if (BRW_DEBUG & DEBUG_VS) +// if (BRW_DEBUG & DEBUG_VS) tgsi_dump(c->vp->tokens, 0); c->stack_index = brw_indirect(0, 0); -- cgit v1.2.3 From 63a8637c7425f64f5e48c2df2b60cc56ae6237ab Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 1 Dec 2009 16:39:02 +0000 Subject: i965g: add missing header --- src/gallium/drivers/i965/brw_disasm.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 src/gallium/drivers/i965/brw_disasm.h (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h new file mode 100644 index 0000000000..77d402d35e --- /dev/null +++ b/src/gallium/drivers/i965/brw_disasm.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2008 Keith Packard + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#ifndef BRW_DISASM_H +#define BRW_DISASM_H + +struct brw_instruction; + +int brw_disasm_insn (FILE *file, const struct brw_instruction *inst); +int brw_disasm (FILE *file, + const struct brw_instruction *inst, + unsigned count); + +#endif + -- cgit v1.2.3 From ba4cb8b2caac69c6d2b210a5c3c634d8c1c20940 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 1 Dec 2009 17:04:46 +0000 Subject: i965g: nasty hack for clearing y-tiled surfaces --- src/gallium/drivers/i965/brw_pipe_clear.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c index f846b4342c..211be88178 100644 --- a/src/gallium/drivers/i965/brw_pipe_clear.c +++ b/src/gallium/drivers/i965/brw_pipe_clear.c @@ -79,7 +79,11 @@ try_clear( struct brw_context *brw, BR13 |= BR13_565; } - assert(surface->tiling != BRW_TILING_Y); + /* XXX: nasty hack for clearing depth buffers + */ + if (surface->tiling == BRW_TILING_Y) { + x2 = pitch; + } if (surface->tiling == BRW_TILING_X) { CMD |= XY_DST_TILED; -- cgit v1.2.3 From f217927a57dbf57ffe23fb48417a4cdad7e0eeea Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Dec 2009 15:38:08 +0000 Subject: i965g: remove duplicate set_viewport_state, fixes samples/depth --- src/gallium/drivers/i965/brw_pipe_misc.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_pipe_misc.c b/src/gallium/drivers/i965/brw_pipe_misc.c index 0d0d92df82..3035907807 100644 --- a/src/gallium/drivers/i965/brw_pipe_misc.c +++ b/src/gallium/drivers/i965/brw_pipe_misc.c @@ -30,14 +30,6 @@ static void brw_set_scissor_state( struct pipe_context *pipe, brw->state.dirty.mesa |= PIPE_NEW_SCISSOR; } -static void brw_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->curr.viewport = *viewport; - brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT; -} static void brw_set_clip_state( struct pipe_context *pipe, const struct pipe_clip_state *clip ) @@ -54,7 +46,6 @@ void brw_pipe_misc_init( struct brw_context *brw ) brw->base.set_polygon_stipple = brw_set_polygon_stipple; brw->base.set_scissor_state = brw_set_scissor_state; brw->base.set_clip_state = brw_set_clip_state; - brw->base.set_viewport_state = brw_set_viewport_state; } -- cgit v1.2.3 From 6c719d4c22e84e315e5dc9cbc69885401a7ae231 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Dec 2009 16:55:40 +0000 Subject: i965g: add DEBUG_WINSYS flag --- src/gallium/drivers/i965/brw_debug.h | 2 +- src/gallium/drivers/i965/brw_screen.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h index 98407a06ed..ae8e9254a6 100644 --- a/src/gallium/drivers/i965/brw_debug.h +++ b/src/gallium/drivers/i965/brw_debug.h @@ -14,7 +14,7 @@ #define DEBUG_VERBOSE 0x40 #define DEBUG_BATCH 0x80 #define DEBUG_PIXEL 0x100 -#define DEBUG_BUFMGR 0x200 +#define DEBUG_WINSYS 0x200 #define DEBUG_MIN_URB 0x400 #define DEBUG_DISASSEM 0x800 #define DEBUG_unused3 0x1000 diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 1855e4fd45..3d96a77d65 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -47,7 +47,7 @@ static const struct debug_named_value debug_names[] = { { "verb", DEBUG_VERBOSE}, { "bat", DEBUG_BATCH}, { "pix", DEBUG_PIXEL}, - { "buf", DEBUG_BUFMGR}, + { "wins", DEBUG_WINSYS}, { "min", DEBUG_MIN_URB}, { "dis", DEBUG_DISASSEM}, { "sync", DEBUG_SYNC}, -- cgit v1.2.3 From 1ec7e058f50882b27c0a2abd961bd49848386ff7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Dec 2009 16:56:46 +0000 Subject: i965g: keep refcounts to bound vertex buffers --- src/gallium/drivers/i965/brw_pipe_vertex.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c index 73bba5b088..3d87a2853f 100644 --- a/src/gallium/drivers/i965/brw_pipe_vertex.c +++ b/src/gallium/drivers/i965/brw_pipe_vertex.c @@ -19,11 +19,26 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe, const struct pipe_vertex_buffer *buffers) { struct brw_context *brw = brw_context(pipe); + unsigned i; - /* XXX: don't we need to take some references here? It's a bit - * awkward to do so, though. - */ - memcpy(brw->curr.vertex_buffer, buffers, count * sizeof(buffers[0])); + /* Check for no change */ + if (count == brw->curr.num_vertex_buffers && + memcmp(brw->curr.vertex_buffer, + buffers, + count * sizeof buffers[0]) == 0) + return; + + /* Adjust refcounts */ + for (i = 0; i < count; i++) + pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer, + buffers[i].buffer); + + for ( ; i < brw->curr.num_vertex_buffers; i++) + pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer, + NULL); + + /* Copy remaining data */ + memcpy(brw->curr.vertex_buffer, buffers, count * sizeof buffers[0]); brw->curr.num_vertex_buffers = count; brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER; -- cgit v1.2.3 From 061411b2611634960f0ba36e42916c67918bb53d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Dec 2009 16:57:37 +0000 Subject: i965g: hook vertex state emit up to PIPE_NEW_VERTEX_BUFFER --- src/gallium/drivers/i965/brw_context.h | 2 +- src/gallium/drivers/i965/brw_draw.c | 7 ++++++- src/gallium/drivers/i965/brw_draw_upload.c | 5 +++-- src/gallium/drivers/i965/brw_state_debug.c | 1 - 4 files changed, 10 insertions(+), 5 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index b7330f00f4..143e068987 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -257,7 +257,7 @@ struct brw_sampler { #define BRW_NEW_WM_SURFACES 0x1000 #define BRW_NEW_xxx 0x2000 /* was FENCE */ #define BRW_NEW_INDICES 0x4000 -#define BRW_NEW_VERTICES 0x8000 + /** * Used for any batch entry with a relocated pointer that will be used * by any 3D rendering. Need to re-emit these fresh in each diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 45d5ade1fc..4498773dd8 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -92,6 +92,10 @@ static int brw_emit_prim(struct brw_context *brw, struct brw_3d_primitive prim_packet; int ret; + if (BRW_DEBUG & DEBUG_PRIMS) + debug_printf("%s start %d count %d indexed %d hw_prim %d\n", + __FUNCTION__, start, count, indexed, hw_prim); + prim_packet.header.opcode = CMD_3D_PRIM; prim_packet.header.length = sizeof(prim_packet)/4 - 2; prim_packet.header.pad = 0; @@ -187,7 +191,8 @@ brw_draw_range_elements(struct pipe_context *pipe, hw_prim = brw_set_prim(brw, mode); if (BRW_DEBUG & DEBUG_PRIMS) - debug_printf("PRIM: %s %d %d\n", u_prim_name(mode), start, count); + debug_printf("PRIM: %s start %d count %d index_buffer %p\n", + u_prim_name(mode), start, count, (void *)index_buffer); /* Potentially trigger upload of new index buffer. * diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c index f50ce3005d..a27da5f1c1 100644 --- a/src/gallium/drivers/i965/brw_draw_upload.c +++ b/src/gallium/drivers/i965/brw_draw_upload.c @@ -405,8 +405,9 @@ static int brw_emit_vertices( struct brw_context *brw ) const struct brw_tracked_state brw_vertices = { .dirty = { - .mesa = PIPE_NEW_INDEX_RANGE, - .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES, + .mesa = (PIPE_NEW_INDEX_RANGE | + PIPE_NEW_VERTEX_BUFFER), + .brw = BRW_NEW_BATCH, .cache = 0, }, .prepare = brw_prepare_vertices, diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c index 050f74761c..049c278c93 100644 --- a/src/gallium/drivers/i965/brw_state_debug.c +++ b/src/gallium/drivers/i965/brw_state_debug.c @@ -80,7 +80,6 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_WM_SURFACES), DEFINE_BIT(BRW_NEW_xxx), DEFINE_BIT(BRW_NEW_INDICES), - DEFINE_BIT(BRW_NEW_VERTICES), {0, 0, 0} }; -- cgit v1.2.3 From f72de22439a2d08bb461af60839baf4fbb3e54df Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Dec 2009 19:22:45 +0000 Subject: i965g: remove half-finished change to tgsi_parse --- src/gallium/auxiliary/tgsi/tgsi_scan.h | 3 --- src/gallium/drivers/i965/brw_state_upload.c | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 6754001e88..8a7ee0c7e4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -61,9 +61,6 @@ struct tgsi_shader_info boolean uses_kill; /**< KIL or KILP instruction used? */ boolean uses_fogcoord; /**< fragment shader uses fog coord? */ boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */ - - uint texture_max; - uint texture_mask; }; diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index 233dce03df..bf65ca1cf2 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -191,7 +191,7 @@ enum pipe_error brw_validate_state( struct brw_context *brw ) const struct brw_fragment_shader *fp = brw->curr.fragment_shader; if (fp) { assert(fp->info.file_max[TGSI_FILE_SAMPLER] < (int)brw->curr.num_samplers); - assert(fp->info.texture_max <= brw->curr.num_textures); + /*assert(fp->info.texture_max <= brw->curr.num_textures);*/ } } -- cgit v1.2.3 From 0fc4dd3819af252c028ed43bbd668b4f34104e32 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Dec 2009 19:50:05 +0000 Subject: i965g: fixes to build after merge of master --- src/gallium/auxiliary/util/u_upload_mgr.h | 2 +- src/gallium/drivers/i965/brw_clip_state.c | 2 +- src/gallium/drivers/i965/brw_context.h | 4 +- src/gallium/drivers/i965/brw_draw.c | 2 +- src/gallium/drivers/i965/brw_eu.h | 2 +- src/gallium/drivers/i965/brw_eu_emit.c | 2 +- src/gallium/drivers/i965/brw_pipe_sampler.c | 20 +++++-- src/gallium/drivers/i965/brw_screen.c | 2 +- src/gallium/drivers/i965/brw_screen_surface.c | 5 +- src/gallium/drivers/i965/brw_screen_tex_layout.c | 68 +++++++++++------------- src/gallium/drivers/i965/brw_screen_texture.c | 14 ++--- src/gallium/drivers/i965/brw_state.h | 4 +- src/gallium/drivers/i965/brw_state_upload.c | 4 +- src/gallium/drivers/i965/brw_vs_emit.c | 30 +++++------ src/gallium/drivers/i965/brw_winsys.h | 2 +- src/gallium/drivers/i965/brw_wm.c | 2 +- src/gallium/drivers/i965/brw_wm_emit.c | 2 +- src/gallium/drivers/i965/brw_wm_fp.c | 46 ++++++++-------- 18 files changed, 112 insertions(+), 101 deletions(-) (limited to 'src/gallium/drivers/i965') diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h index d414a1f2f6..e158bed9d0 100644 --- a/src/gallium/auxiliary/util/u_upload_mgr.h +++ b/src/gallium/auxiliary/util/u_upload_mgr.h @@ -32,7 +32,7 @@ #ifndef U_UPLOAD_MGR_H #define U_UPLOAD_MGR_H -#include "pipe/p_error.h" +#include "pipe/p_defines.h" struct pipe_screen; struct pipe_buffer; diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c index 467364e884..5c3ccfd8d0 100644 --- a/src/gallium/drivers/i965/brw_clip_state.c +++ b/src/gallium/drivers/i965/brw_clip_state.c @@ -69,7 +69,7 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key) key->urb_size = brw->urb.vsize; /* */ - key->depth_clamp = 0; // XXX: add this to gallium: ctx->Transform.DepthClamp; + key->depth_clamp = 0; /* XXX: add this to gallium: ctx->Transform.DepthClamp; */ } static enum pipe_error diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 143e068987..56e7807400 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -197,7 +197,7 @@ struct brw_fragment_shader { struct brw_immediate_data immediates; unsigned iz_lookup; - //unsigned wm_lookup; + /*unsigned wm_lookup;*/ unsigned uses_depth:1; unsigned has_flow_control:1; @@ -722,7 +722,7 @@ struct brw_context /** Input sizes, calculated from active vertex program. * One bit per fragment program input attribute. */ - //GLbitfield input_size_masks[4]; + /*GLbitfield input_size_masks[4];*/ /** Array of surface default colors (texture border color) */ struct brw_winsys_buffer *sdc_bo[BRW_MAX_TEX_UNIT]; diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c index 4498773dd8..852fd22982 100644 --- a/src/gallium/drivers/i965/brw_draw.c +++ b/src/gallium/drivers/i965/brw_draw.c @@ -108,7 +108,7 @@ static int brw_emit_prim(struct brw_context *brw, prim_packet.start_vert_location += brw->ib.start_vertex_offset; prim_packet.instance_count = 1; prim_packet.start_instance_location = 0; - prim_packet.base_vert_location = 0; // prim->basevertex; XXX: add this to gallium + prim_packet.base_vert_location = 0; /* prim->basevertex; XXX: add this to gallium */ /* If we're set to always flush, do it before and after the primitive emit. diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h index 565f4ef1c5..af509b2e5f 100644 --- a/src/gallium/drivers/i965/brw_eu.h +++ b/src/gallium/drivers/i965/brw_eu.h @@ -34,7 +34,7 @@ #define BRW_EU_H #include "util/u_debug.h" -#include "pipe/p_error.h" +#include "pipe/p_defines.h" #include "brw_structs.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c index 3ee50899fb..4fe7b6acc1 100644 --- a/src/gallium/drivers/i965/brw_eu_emit.c +++ b/src/gallium/drivers/i965/brw_eu_emit.c @@ -200,7 +200,7 @@ void brw_set_src1( struct brw_instruction *insn, * in the future: */ assert (reg.address_mode == BRW_ADDRESS_DIRECT); - //assert (reg.file == BRW_GENERAL_REGISTER_FILE); + /*assert (reg.file == BRW_GENERAL_REGISTER_FILE); */ if (insn->header.access_mode == BRW_ALIGN_1) { insn->bits3.da1.src1_subreg_nr = reg.subnr; diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c index 5cd38a43a6..5ddc63f57e 100644 --- a/src/gallium/drivers/i965/brw_pipe_sampler.c +++ b/src/gallium/drivers/i965/brw_pipe_sampler.c @@ -204,15 +204,29 @@ static void brw_set_sampler_textures(struct pipe_context *pipe, brw->state.dirty.mesa |= PIPE_NEW_BOUND_TEXTURES; } +static void brw_set_vertex_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ +} + +static void brw_bind_vertex_sampler_state(struct pipe_context *pipe, + unsigned num, void **sampler) +{ +} + void brw_pipe_sampler_init( struct brw_context *brw ) { - brw->base.set_sampler_textures = brw_set_sampler_textures; brw->base.create_sampler_state = brw_create_sampler_state; - brw->base.bind_sampler_states = brw_bind_sampler_state; brw->base.delete_sampler_state = brw_delete_sampler_state; - brw->base.set_sampler_textures = brw_set_sampler_textures; + brw->base.set_fragment_sampler_textures = brw_set_sampler_textures; + brw->base.bind_fragment_sampler_states = brw_bind_sampler_state; + + brw->base.set_vertex_sampler_textures = brw_set_vertex_sampler_textures; + brw->base.bind_vertex_sampler_states = brw_bind_vertex_sampler_state; + } void brw_pipe_sampler_cleanup( struct brw_context *brw ) { diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c index 3d96a77d65..0ecacac9a3 100644 --- a/src/gallium/drivers/i965/brw_screen.c +++ b/src/gallium/drivers/i965/brw_screen.c @@ -396,7 +396,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id) brw_screen_tex_surface_init(bscreen); brw_screen_buffer_init(bscreen); - bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE); + bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE) != NULL; return &bscreen->base; diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c index 1e37c63d6c..e2b9954e59 100644 --- a/src/gallium/drivers/i965/brw_screen_surface.c +++ b/src/gallium/drivers/i965/brw_screen_surface.c @@ -31,6 +31,7 @@ #include "util/u_memory.h" #include "util/u_simple_list.h" +#include "util/u_math.h" #include "pipe/p_screen.h" #include "brw_screen.h" @@ -138,8 +139,8 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen, assert(id.bits.zslice == 0); surface->base.format = tex->base.format; - surface->base.width = tex->base.width[id.bits.level]; - surface->base.height = tex->base.height[id.bits.level]; + surface->base.width = u_minify(tex->base.width0, id.bits.level); + surface->base.height = u_minify(tex->base.height0, id.bits.level); surface->base.offset = tex->image_offset[id.bits.level][id.bits.face]; surface->base.usage = usage; surface->base.zslice = id.bits.zslice; diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c index 71a8890f83..894f4bea40 100644 --- a/src/gallium/drivers/i965/brw_screen_tex_layout.c +++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c @@ -110,10 +110,6 @@ brw_tex_set_level_info(struct brw_texture *tex, assert(tex->image_offset[level] == NULL); assert(nr_images >= 1); - tex->base.width[level] = w; - tex->base.height[level] = h; - tex->base.depth[level] = d; - tex->level_offset[level] = (x + y * tex->pitch) * tex->cpp; tex->nr_images[level] = nr_images; @@ -147,14 +143,14 @@ static void brw_layout_2d( struct brw_texture *tex ) GLuint level; GLuint x = 0; GLuint y = 0; - GLuint width = tex->base.width[0]; - GLuint height = tex->base.height[0]; + GLuint width = tex->base.width0; + GLuint height = tex->base.height0; - tex->pitch = tex->base.width[0]; + tex->pitch = tex->base.width0; brw_tex_alignment_unit(tex->base.format, &align_w, &align_h); if (tex->compressed) { - tex->pitch = align(tex->base.width[0], align_w); + tex->pitch = align(tex->base.width0, align_w); } /* May need to adjust pitch to accomodate the placement of @@ -166,11 +162,11 @@ static void brw_layout_2d( struct brw_texture *tex ) GLuint mip1_width; if (tex->compressed) { - mip1_width = align(minify(tex->base.width[0]), align_w) - + align(minify(minify(tex->base.width[0])), align_w); + mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + + align(u_minify(tex->base.width0, 2), align_w)); } else { - mip1_width = align(minify(tex->base.width[0]), align_w) - + minify(minify(tex->base.width[0])); + mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + + u_minify(tex->base.width0, 2)); } if (mip1_width > tex->pitch) { @@ -209,8 +205,8 @@ static void brw_layout_2d( struct brw_texture *tex ) y += img_height; } - width = minify(width); - height = minify(height); + width = u_minify(width, 1); + height = u_minify(height, 1); } } @@ -222,28 +218,28 @@ brw_layout_cubemap_idgng( struct brw_texture *tex ) GLuint level; GLuint x = 0; GLuint y = 0; - GLuint width = tex->base.width[0]; - GLuint height = tex->base.height[0]; + GLuint width = tex->base.width0; + GLuint height = tex->base.height0; GLuint qpitch = 0; GLuint y_pitch = 0; - tex->pitch = tex->base.width[0]; + tex->pitch = tex->base.width0; brw_tex_alignment_unit(tex->base.format, &align_w, &align_h); y_pitch = align(height, align_h); if (tex->compressed) { - tex->pitch = align(tex->base.width[0], align_w); + tex->pitch = align(tex->base.width0, align_w); } if (tex->base.last_level != 0) { GLuint mip1_width; if (tex->compressed) { - mip1_width = (align(minify(tex->base.width[0]), align_w) + - align(minify(minify(tex->base.width[0])), align_w)); + mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + + align(u_minify(tex->base.width0, 2), align_w)); } else { - mip1_width = (align(minify(tex->base.width[0]), align_w) + - minify(minify(tex->base.width[0]))); + mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + + u_minify(tex->base.width0, 2)); } if (mip1_width > tex->pitch) { @@ -255,19 +251,19 @@ brw_layout_cubemap_idgng( struct brw_texture *tex ) if (tex->compressed) { qpitch = ((y_pitch + - align(minify(y_pitch), align_h) + + align(u_minify(y_pitch, 1), align_h) + 11 * align_h) / 4) * tex->pitch * tex->cpp; tex->total_height = ((y_pitch + - align(minify(y_pitch), align_h) + + align(u_minify(y_pitch, 1), align_h) + 11 * align_h) / 4) * 6; } else { qpitch = (y_pitch + - align(minify(y_pitch), align_h) + + align(u_minify(y_pitch, 1), align_h) + 11 * align_h) * tex->pitch * tex->cpp; tex->total_height = (y_pitch + - align(minify(y_pitch), align_h) + + align(u_minify(y_pitch, 1), align_h) + 11 * align_h) * 6; } @@ -293,8 +289,8 @@ brw_layout_cubemap_idgng( struct brw_texture *tex ) y += img_height; } - width = minify(width); - height = minify(height); + width = u_minify(width, 1); + height = u_minify(height, 1); } return TRUE; @@ -304,9 +300,9 @@ brw_layout_cubemap_idgng( struct brw_texture *tex ) static boolean brw_layout_3d_cube( struct brw_texture *tex ) { - GLuint width = tex->base.width[0]; - GLuint height = tex->base.height[0]; - GLuint depth = tex->base.depth[0]; + GLuint width = tex->base.width0; + GLuint height = tex->base.height0; + GLuint depth = tex->base.depth0; GLuint pack_x_pitch, pack_x_nr; GLuint pack_y_pitch; GLuint level; @@ -320,8 +316,8 @@ brw_layout_3d_cube( struct brw_texture *tex ) tex->pitch = align(width, align_w); pack_y_pitch = (height + 3) / 4; } else { - tex->pitch = brw_tex_pitch_align(tex, tex->base.width[0]); - pack_y_pitch = align(tex->base.height[0], align_h); + tex->pitch = brw_tex_pitch_align(tex, tex->base.width0); + pack_y_pitch = align(tex->base.height0, align_h); } pack_x_pitch = width; @@ -349,9 +345,9 @@ brw_layout_3d_cube( struct brw_texture *tex ) tex->total_height += y; - width = minify(width); - height = minify(height); - depth = minify(depth); + width = u_minify(width, 1); + height = u_minify(height, 1); + depth = u_minify(depth, 1); if (tex->compressed) { pack_y_pitch = (height + 3) / 4; diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c index f4c20f31a5..ff999086c0 100644 --- a/src/gallium/drivers/i965/brw_screen_texture.c +++ b/src/gallium/drivers/i965/brw_screen_texture.c @@ -259,8 +259,8 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, */ tex->ss.ss1.base_addr = 0; /* reloc */ tex->ss.ss2.mip_count = tex->base.last_level; - tex->ss.ss2.width = tex->base.width[0] - 1; - tex->ss.ss2.height = tex->base.height[0] - 1; + tex->ss.ss2.width = tex->base.width0 - 1; + tex->ss.ss2.height = tex->base.height0 - 1; switch (tex->tiling) { case BRW_TILING_NONE: @@ -278,7 +278,7 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen, } tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1; - tex->ss.ss3.depth = tex->base.depth[0] - 1; + tex->ss.ss3.depth = tex->base.depth0 - 1; tex->ss.ss4.min_lod = 0; @@ -478,7 +478,7 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, if (templ->target != PIPE_TEXTURE_2D || templ->last_level != 0 || - templ->depth[0] != 1) + templ->depth0 != 1) return NULL; if (pf_is_compressed(templ->format)) @@ -529,8 +529,8 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, */ tex->ss.ss1.base_addr = 0; /* reloc */ tex->ss.ss2.mip_count = tex->base.last_level; - tex->ss.ss2.width = tex->base.width[0] - 1; - tex->ss.ss2.height = tex->base.height[0] - 1; + tex->ss.ss2.width = tex->base.width0 - 1; + tex->ss.ss2.height = tex->base.height0 - 1; switch (tex->tiling) { case BRW_TILING_NONE: @@ -548,7 +548,7 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen, } tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1; - tex->ss.ss3.depth = tex->base.depth[0] - 1; + tex->ss.ss3.depth = tex->base.depth0 - 1; tex->ss.ss4.min_lod = 0; diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h index a9b8165495..d2bbd0123d 100644 --- a/src/gallium/drivers/i965/brw_state.h +++ b/src/gallium/drivers/i965/brw_state.h @@ -33,12 +33,12 @@ #ifndef BRW_STATE_H #define BRW_STATE_H -#include "pipe/p_error.h" +#include "pipe/p_defines.h" #include "util/u_memory.h" #include "brw_context.h" -static inline void +static INLINE void brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo) { assert(brw->state.validated_bo_count < Elements(brw->state.validated_bos)); diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c index bf65ca1cf2..f8b91eff81 100644 --- a/src/gallium/drivers/i965/brw_state_upload.c +++ b/src/gallium/drivers/i965/brw_state_upload.c @@ -38,7 +38,7 @@ const struct brw_tracked_state *atoms[] = { -// &brw_wm_input_sizes, +/* &brw_wm_input_sizes, */ &brw_vs_prog, &brw_gs_prog, &brw_clip_prog, @@ -56,7 +56,7 @@ const struct brw_tracked_state *atoms[] = &brw_cc_unit, &brw_vs_surfaces, /* must do before unit */ - //&brw_wm_constant_surface, /* must do before wm surfaces/bind bo */ + /*&brw_wm_constant_surface,*/ /* must do before wm surfaces/bind bo */ &brw_wm_surfaces, /* must do before samplers and unit */ &brw_wm_samplers, diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 6d8366f862..1d0fff0d9e 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -1067,22 +1067,22 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, { struct brw_reg reg; - if (src->SrcRegister.File == TGSI_FILE_NULL) + if (src->Register.File == TGSI_FILE_NULL) return brw_null_reg(); reg = get_src_reg(c, argIndex, - src->SrcRegister.File, - src->SrcRegister.Index, - src->SrcRegister.Indirect); + src->Register.File, + src->Register.Index, + src->Register.Indirect); /* Convert 3-bit swizzle to 2-bit. */ - reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SrcRegister.SwizzleX, - src->SrcRegister.SwizzleY, - src->SrcRegister.SwizzleZ, - src->SrcRegister.SwizzleW); + reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->Register.SwizzleX, + src->Register.SwizzleY, + src->Register.SwizzleZ, + src->Register.SwizzleW); - reg.negate = src->SrcRegister.Negate ? 1 : 0; + reg.negate = src->Register.Negate ? 1 : 0; /* XXX: abs, absneg */ @@ -1353,7 +1353,7 @@ static void emit_insn(struct brw_vs_compile *c, const struct tgsi_full_instruction *inst) { unsigned opcode = inst->Instruction.Opcode; - unsigned label = inst->InstructionExtLabel.Label; + unsigned label = inst->Label.Label; struct brw_compile *p = &c->func; struct brw_reg args[3], dst; GLuint i; @@ -1366,7 +1366,7 @@ static void emit_insn(struct brw_vs_compile *c, /* Get argument regs. */ for (i = 0; i < 3; i++) { - args[i] = get_arg(c, &inst->FullSrcRegisters[i], i); + args[i] = get_arg(c, &inst->Src[i], i); } /* Get dest regs. Note that it is possible for a reg to be both @@ -1374,9 +1374,9 @@ static void emit_insn(struct brw_vs_compile *c, * care needs to be taken emitting multi-operation instructions. */ dst = get_dst(c, - inst->FullDstRegisters[0].DstRegister.File, - inst->FullDstRegisters[0].DstRegister.Index, - inst->FullDstRegisters[0].DstRegister.WriteMask); + inst->Dst[0].Register.File, + inst->Dst[0].Register.Index, + inst->Dst[0].Register.WriteMask); /* XXX: saturate */ @@ -1619,7 +1619,7 @@ void brw_vs_emit(struct brw_vs_compile *c) struct tgsi_parse_context parse; struct tgsi_full_instruction *inst; -// if (BRW_DEBUG & DEBUG_VS) + if (BRW_DEBUG & DEBUG_VS) tgsi_dump(c->vp->tokens, 0); c->stack_index = brw_indirect(0, 0); diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h index af506a283d..2f47067716 100644 --- a/src/gallium/drivers/i965/brw_winsys.h +++ b/src/gallium/drivers/i965/brw_winsys.h @@ -27,7 +27,7 @@ #define BRW_WINSYS_H #include "pipe/p_compiler.h" -#include "pipe/p_error.h" +#include "pipe/p_defines.h" #include "pipe/p_refcnt.h" struct brw_winsys; diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c index 2c9d3e5e87..fdf820a9aa 100644 --- a/src/gallium/drivers/i965/brw_wm.c +++ b/src/gallium/drivers/i965/brw_wm.c @@ -180,7 +180,7 @@ static enum pipe_error do_wm_prog( struct brw_context *brw, /* XXX: GLSL support */ exit(1); - //brw_wm_branching_shader_emit(brw, c); + /* brw_wm_branching_shader_emit(brw, c); */ } else { c->dispatch_width = 16; diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c index 0b82f4e156..7e57d0306b 100644 --- a/src/gallium/drivers/i965/brw_wm_emit.c +++ b/src/gallium/drivers/i965/brw_wm_emit.c @@ -1007,7 +1007,7 @@ static void emit_killp( struct brw_wm_compile *c ) brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */ brw_AND(p, r0uw, c->emit_mask_reg, r0uw); brw_pop_insn_state(p); } diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c index a8b5e15f36..9c5b527f89 100644 --- a/src/gallium/drivers/i965/brw_wm_fp.c +++ b/src/gallium/drivers/i965/brw_wm_fp.c @@ -957,15 +957,15 @@ static struct brw_fp_dst translate_dst( struct brw_wm_compile *c, { struct brw_fp_dst out; - out.file = dst->DstRegister.File; - out.index = dst->DstRegister.Index; - out.writemask = dst->DstRegister.WriteMask; - out.indirect = dst->DstRegister.Indirect; + out.file = dst->Register.File; + out.index = dst->Register.Index; + out.writemask = dst->Register.WriteMask; + out.indirect = dst->Register.Indirect; out.saturate = (saturate == TGSI_SAT_ZERO_ONE); if (out.indirect) { - assert(dst->DstRegisterInd.File == TGSI_FILE_ADDRESS); - assert(dst->DstRegisterInd.Index == 0); + assert(dst->Indirect.File == TGSI_FILE_ADDRESS); + assert(dst->Indirect.Index == 0); } return out; @@ -977,14 +977,14 @@ static struct brw_fp_src translate_src( struct brw_wm_compile *c, { struct brw_fp_src out; - out.file = src->SrcRegister.File; - out.index = src->SrcRegister.Index; - out.indirect = src->SrcRegister.Indirect; + out.file = src->Register.File; + out.index = src->Register.Index; + out.indirect = src->Register.Indirect; - out.swizzle = ((src->SrcRegister.SwizzleX << 0) | - (src->SrcRegister.SwizzleY << 2) | - (src->SrcRegister.SwizzleZ << 4) | - (src->SrcRegister.SwizzleW << 6)); + out.swizzle = ((src->Register.SwizzleX << 0) | + (src->Register.SwizzleY << 2) | + (src->Register.SwizzleZ << 4) | + (src->Register.SwizzleW << 6)); switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) { case TGSI_UTIL_SIGN_CLEAR: @@ -1010,8 +1010,8 @@ static struct brw_fp_src translate_src( struct brw_wm_compile *c, } if (out.indirect) { - assert(src->SrcRegisterInd.File == TGSI_FILE_ADDRESS); - assert(src->SrcRegisterInd.Index == 0); + assert(src->Indirect.File == TGSI_FILE_ADDRESS); + assert(src->Indirect.Index == 0); } return out; @@ -1027,11 +1027,11 @@ static void emit_insn( struct brw_wm_compile *c, struct brw_fp_src src[3]; int i; - dst = translate_dst( c, &inst->FullDstRegisters[0], + dst = translate_dst( c, &inst->Dst[0], inst->Instruction.Saturate ); for (i = 0; i < inst->Instruction.NumSrcRegs; i++) - src[i] = translate_src( c, &inst->FullSrcRegisters[i] ); + src[i] = translate_src( c, &inst->Src[i] ); switch (opcode) { case TGSI_OPCODE_ABS: @@ -1063,7 +1063,7 @@ static void emit_insn( struct brw_wm_compile *c, case TGSI_OPCODE_TEX: precalc_tex(c, dst, - inst->InstructionExtTexture.Texture, + inst->Texture.Texture, src[1].index, /* use sampler unit for tex idx */ src[0], /* coord */ src[1]); /* sampler */ @@ -1071,7 +1071,7 @@ static void emit_insn( struct brw_wm_compile *c, case TGSI_OPCODE_TXP: precalc_txp(c, dst, - inst->InstructionExtTexture.Texture, + inst->Texture.Texture, src[1].index, /* use sampler unit for tex idx */ src[0], /* coord */ src[1]); /* sampler */ @@ -1081,7 +1081,7 @@ static void emit_insn( struct brw_wm_compile *c, /* XXX: TXB not done */ precalc_tex(c, dst, - inst->InstructionExtTexture.Texture, + inst->Texture.Texture, src[1].index, /* use sampler unit for tex idx*/ src[0], src[1]); @@ -1169,14 +1169,14 @@ int brw_wm_pass_fp( struct brw_wm_compile *c ) unsigned first, last, mask; unsigned attrib; - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; + first = decl->Range.First; + last = decl->Range.Last; mask = decl->Declaration.UsageMask; for (attrib = first; attrib <= last; attrib++) { emit_interp(c, attrib, - decl->Semantic.SemanticName, + decl->Semantic.Name, decl->Declaration.Interpolate ); } } -- cgit v1.2.3